Skip to content

Commit 570e9e3

Browse files
committed
ENH: Add FrozenList.union and .difference
Re-attempt of gh-15506. Closes gh-15475.
1 parent 62a15fa commit 570e9e3

File tree

4 files changed

+67
-10
lines changed

4 files changed

+67
-10
lines changed

doc/source/groupby.rst

+10
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,16 @@ We could naturally group by either the ``A`` or ``B`` columns, or both:
125125
grouped = df.groupby('A')
126126
grouped = df.groupby(['A', 'B'])
127127
128+
.. versionadded:: 0.24
129+
130+
If we also have a MultiIndex on columns ``A`` and ``B``, we can group by all
131+
but the specified columns
132+
133+
.. ipython:: python
134+
135+
df2 = df.set_index(['A', 'B'])
136+
grouped = df2.groupby(level=df2.index.names.difference(['B'])
137+
128138
These will split the DataFrame on its index (rows). We could also split by the
129139
columns:
130140

doc/source/whatsnew/v0.24.0.txt

+1-3
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,8 @@ v0.24.0 (Month XX, 2018)
1313
New features
1414
~~~~~~~~~~~~
1515
- :func:`merge` now directly allows merge between objects of type ``DataFrame`` and named ``Series``, without the need to convert the ``Series`` object into a ``DataFrame`` beforehand (:issue:`21220`)
16-
17-
1816
- ``ExcelWriter`` now accepts ``mode`` as a keyword argument, enabling append to existing workbooks when using the ``openpyxl`` engine (:issue:`3441`)
19-
17+
- ``FrozenList`` has gained the ``.union()`` and ``.difference()`` methods (:issue:`15475`, :issue:`15506`)
2018
- :func:`DataFrame.to_parquet` now accepts ``index`` as an argument, allowing
2119
the user to override the engine's default behavior to include or omit the
2220
dataframe's indexes from the resulting Parquet file. (:issue:`20768`)

pandas/core/indexes/frozen.py

+36-4
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,47 @@ class FrozenList(PandasObject, list):
2222
because it's technically non-hashable, will be used
2323
for lookups, appropriately, etc.
2424
"""
25-
# Sidenote: This has to be of type list, otherwise it messes up PyTables
26-
# typechecks
25+
# Side note: This has to be of type list. Otherwise,
26+
# it messes up PyTables type checks.
2727

28-
def __add__(self, other):
28+
def union(self, other):
29+
"""
30+
Returns a FrozenList with other concatenated to the end of self.
31+
32+
Parameters
33+
----------
34+
other : array-like
35+
The array-like whose elements we are concatenating.
36+
37+
Returns
38+
-------
39+
diff : FrozenList
40+
The collection difference between self and other.
41+
"""
2942
if isinstance(other, tuple):
3043
other = list(other)
3144
return self.__class__(super(FrozenList, self).__add__(other))
3245

33-
__iadd__ = __add__
46+
def difference(self, other):
47+
"""
48+
Returns a FrozenList with elements from other removed from self.
49+
50+
Parameters
51+
----------
52+
other : array-like
53+
The array-like whose elements we are removing self.
54+
55+
Returns
56+
-------
57+
diff : FrozenList
58+
The collection difference between self and other.
59+
"""
60+
other = set(other)
61+
temp = [x for x in self if x not in other]
62+
return self.__class__(temp)
63+
64+
# TODO: Consider deprecating these in favor of `union` (xref gh-15506)
65+
__add__ = __iadd__ = union
3466

3567
# Python 2 compat
3668
def __getslice__(self, i, j):

pandas/tests/indexes/test_frozen.py

+20-3
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ class TestFrozenList(CheckImmutable, CheckStringMixin):
1010
mutable_methods = ('extend', 'pop', 'remove', 'insert')
1111
unicode_container = FrozenList([u("\u05d0"), u("\u05d1"), "c"])
1212

13-
def setup_method(self, method):
13+
def setup_method(self, _):
1414
self.lst = [1, 2, 3, 4, 5]
1515
self.container = FrozenList(self.lst)
1616
self.klass = FrozenList
@@ -24,13 +24,30 @@ def test_add(self):
2424
expected = FrozenList([1, 2, 3] + self.lst)
2525
self.check_result(result, expected)
2626

27-
def test_inplace(self):
27+
def test_iadd(self):
2828
q = r = self.container
29+
2930
q += [5]
3031
self.check_result(q, self.lst + [5])
31-
# other shouldn't be mutated
32+
33+
# Other shouldn't be mutated.
3234
self.check_result(r, self.lst)
3335

36+
def test_union(self):
37+
result = self.container.union((1, 2, 3))
38+
expected = FrozenList(self.lst + [1, 2, 3])
39+
self.check_result(result, expected)
40+
41+
def test_difference(self):
42+
result = self.container.difference([2])
43+
expected = FrozenList([1, 3, 4, 5])
44+
self.check_result(result, expected)
45+
46+
def test_difference_dupe(self):
47+
result = FrozenList([1, 2, 3, 2]).difference([2])
48+
expected = FrozenList([1, 3])
49+
self.check_result(result, expected)
50+
3451

3552
class TestFrozenNDArray(CheckImmutable, CheckStringMixin):
3653
mutable_methods = ('put', 'itemset', 'fill')

0 commit comments

Comments
 (0)