Skip to content

Commit 2dbf013

Browse files
GuessWhoSamFooNo-Stream
authored andcommitted
BUG: DataFrame.groupby() interprets tuple as list of keys
closes pandas-dev#17979 Author: sfoo <[email protected]> Author: Jeff Reback <[email protected]> Closes pandas-dev#17996 from GuessWhoSamFoo/groupby_tuples and squashes the following commits: afb0031 [Jeff Reback] TST: separate out grouping-type tests c52b2a8 [sfoo] Moved notes to 0.22; created is_axis_multiindex var - pending internal use fb52c1c [sfoo] Added whatsnew; checked match_axis_length 99ebc4e [sfoo] Cast groupby tuple as list when multiindex
1 parent fa81a59 commit 2dbf013

File tree

3 files changed

+24
-2
lines changed

3 files changed

+24
-2
lines changed

doc/source/whatsnew/v0.22.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ Bug Fixes
8888
~~~~~~~~~
8989

9090
- Bug in ``pd.read_msgpack()`` with a non existent file is passed in Python 2 (:issue:`15296`)
91+
- Bug in ``DataFrame.groupby`` where key as tuple in a ``MultiIndex`` were interpreted as a list of keys (:issue:`17979`)
9192

9293
Conversion
9394
^^^^^^^^^^

pandas/core/groupby.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -2754,6 +2754,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
27542754
27552755
"""
27562756
group_axis = obj._get_axis(axis)
2757+
is_axis_multiindex = isinstance(obj._info_axis, MultiIndex)
27572758

27582759
# validate that the passed single level is compatible with the passed
27592760
# axis of the object
@@ -2814,7 +2815,9 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
28142815
elif isinstance(key, BaseGrouper):
28152816
return key, [], obj
28162817

2817-
if not isinstance(key, (tuple, list)):
2818+
# when MultiIndex, allow tuple to be a key
2819+
if not isinstance(key, (tuple, list)) or \
2820+
(isinstance(key, tuple) and is_axis_multiindex):
28182821
keys = [key]
28192822
match_axis_length = False
28202823
else:
@@ -2918,7 +2921,6 @@ def is_in_obj(gpr):
29182921

29192922
# create the internals grouper
29202923
grouper = BaseGrouper(group_axis, groupings, sort=sort, mutated=mutated)
2921-
29222924
return grouper, exclusions, obj
29232925

29242926

pandas/tests/groupby/test_grouping.py

+19
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,25 @@ def test_multifunc_select_col_integer_cols(self):
356356
# it works!
357357
df.groupby(1, as_index=False)[2].agg({'Q': np.mean})
358358

359+
def test_groupby_multiindex_tuple(self):
360+
# GH 17979
361+
df = pd.DataFrame([[1, 2, 3, 4], [3, 4, 5, 6], [1, 4, 2, 3]],
362+
columns=pd.MultiIndex.from_arrays(
363+
[['a', 'b', 'b', 'c'],
364+
[1, 1, 2, 2]]))
365+
expected = df.groupby([('b', 1)]).groups
366+
result = df.groupby(('b', 1)).groups
367+
tm.assert_dict_equal(expected, result)
368+
369+
df2 = pd.DataFrame([[1, 2, 3, 4], [3, 4, 5, 6], [1, 4, 2, 3]],
370+
columns=pd.MultiIndex.from_arrays(
371+
[['a', 'b', 'b', 'c'],
372+
['d', 'd', 'e', 'e']]))
373+
df2.groupby([('b', 'd')]).groups
374+
expected = df.groupby([('b', 'd')]).groups
375+
result = df.groupby(('b', 'd')).groups
376+
tm.assert_dict_equal(expected, result)
377+
359378
@pytest.mark.parametrize('sort', [True, False])
360379
def test_groupby_level(self, sort):
361380
# GH 17537

0 commit comments

Comments
 (0)