Skip to content

Commit c1a375e

Browse files
committed
ENH: Add FrozenList.union and .difference
Re-attempt of gh-15506. Closes gh-15475.
1 parent 9019582 commit c1a375e

File tree

4 files changed

+69
-11
lines changed

4 files changed

+69
-11
lines changed

doc/source/groupby.rst

+10
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,16 @@ We could naturally group by either the ``A`` or ``B`` columns, or both:
125125
grouped = df.groupby('A')
126126
grouped = df.groupby(['A', 'B'])
127127
128+
.. versionadded:: 0.24
129+
130+
If we also have a MultiIndex on columns ``A`` and ``B``, we can group by all
131+
but the specified columns
132+
133+
.. ipython:: python
134+
135+
df2 = df.set_index(['A', 'B'])
136+
grouped = df2.groupby(level=df2.index.names.difference(['B'])
137+
128138
These will split the DataFrame on its index (rows). We could also split by the
129139
columns:
130140

doc/source/whatsnew/v0.24.0.txt

+2-3
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,9 @@ v0.24.0 (Month XX, 2018)
1313
New features
1414
~~~~~~~~~~~~
1515
- :func:`merge` now directly allows merge between objects of type ``DataFrame`` and named ``Series``, without the need to convert the ``Series`` object into a ``DataFrame`` beforehand (:issue:`21220`)
16-
17-
1816
- ``ExcelWriter`` now accepts ``mode`` as a keyword argument, enabling append to existing workbooks when using the ``openpyxl`` engine (:issue:`3441`)
19-
17+
- ``FrozenList`` has gained the ``.union()`` and ``.difference()`` methods. This functionality greatly simplifies groupby's that rely on explicitly excluding certain columns. See :ref:`Splitting an object into groups
18+
<groupby.split>` for more information (:issue:`15475`, :issue:`15506`)
2019
- :func:`DataFrame.to_parquet` now accepts ``index`` as an argument, allowing
2120
the user to override the engine's default behavior to include or omit the
2221
dataframe's indexes from the resulting Parquet file. (:issue:`20768`)

pandas/core/indexes/frozen.py

+37-5
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,47 @@ class FrozenList(PandasObject, list):
2323
because it's technically non-hashable, will be used
2424
for lookups, appropriately, etc.
2525
"""
26-
# Sidenote: This has to be of type list, otherwise it messes up PyTables
27-
# typechecks
26+
# Side note: This has to be of type list. Otherwise,
27+
# it messes up PyTables type checks.
2828

29-
def __add__(self, other):
29+
def union(self, other):
30+
"""
31+
Returns a FrozenList with other concatenated to the end of self.
32+
33+
Parameters
34+
----------
35+
other : array-like
36+
The array-like whose elements we are concatenating.
37+
38+
Returns
39+
-------
40+
diff : FrozenList
41+
The collection difference between self and other.
42+
"""
3043
if isinstance(other, tuple):
3144
other = list(other)
32-
return self.__class__(super(FrozenList, self).__add__(other))
45+
return type(self)(super(FrozenList, self).__add__(other))
46+
47+
def difference(self, other):
48+
"""
49+
Returns a FrozenList with elements from other removed from self.
50+
51+
Parameters
52+
----------
53+
other : array-like
54+
The array-like whose elements we are removing self.
55+
56+
Returns
57+
-------
58+
diff : FrozenList
59+
The collection difference between self and other.
60+
"""
61+
other = set(other)
62+
temp = [x for x in self if x not in other]
63+
return type(self)(temp)
3364

34-
__iadd__ = __add__
65+
# TODO: Consider deprecating these in favor of `union` (xref gh-15506)
66+
__add__ = __iadd__ = union
3567

3668
# Python 2 compat
3769
def __getslice__(self, i, j):

pandas/tests/indexes/test_frozen.py

+20-3
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ class TestFrozenList(CheckImmutable, CheckStringMixin):
1111
mutable_methods = ('extend', 'pop', 'remove', 'insert')
1212
unicode_container = FrozenList([u("\u05d0"), u("\u05d1"), "c"])
1313

14-
def setup_method(self, method):
14+
def setup_method(self, _):
1515
self.lst = [1, 2, 3, 4, 5]
1616
self.container = FrozenList(self.lst)
1717
self.klass = FrozenList
@@ -25,13 +25,30 @@ def test_add(self):
2525
expected = FrozenList([1, 2, 3] + self.lst)
2626
self.check_result(result, expected)
2727

28-
def test_inplace(self):
28+
def test_iadd(self):
2929
q = r = self.container
30+
3031
q += [5]
3132
self.check_result(q, self.lst + [5])
32-
# other shouldn't be mutated
33+
34+
# Other shouldn't be mutated.
3335
self.check_result(r, self.lst)
3436

37+
def test_union(self):
38+
result = self.container.union((1, 2, 3))
39+
expected = FrozenList(self.lst + [1, 2, 3])
40+
self.check_result(result, expected)
41+
42+
def test_difference(self):
43+
result = self.container.difference([2])
44+
expected = FrozenList([1, 3, 4, 5])
45+
self.check_result(result, expected)
46+
47+
def test_difference_dupe(self):
48+
result = FrozenList([1, 2, 3, 2]).difference([2])
49+
expected = FrozenList([1, 3])
50+
self.check_result(result, expected)
51+
3552

3653
class TestFrozenNDArray(CheckImmutable, CheckStringMixin):
3754
mutable_methods = ('put', 'itemset', 'fill')

0 commit comments

Comments
 (0)