From 6d0dfc12e192ba9891849acfcf463d3f519bb98e Mon Sep 17 00:00:00 2001 From: ArtificialQualia Date: Fri, 15 Mar 2019 17:38:49 -0400 Subject: [PATCH] BUG: Fix groupby with MultiIndex Series corner case (#25704) --- doc/source/whatsnew/v0.25.0.rst | 1 + pandas/core/groupby/grouper.py | 2 ++ pandas/tests/groupby/test_groupby.py | 20 ++++++++++++++++++++ 3 files changed, 23 insertions(+) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index ddc5e543c6165..7a5d5f243876b 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -237,6 +237,7 @@ Groupby/Resample/Rolling - Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.nunique` in which the names of column levels were lost (:issue:`23222`) - Bug in :func:`pandas.core.groupby.GroupBy.agg` when applying a aggregation function to timezone aware data (:issue:`23683`) - Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` where timezone information would be dropped (:issue:`21603`) +- Bug in :func:`Series.groupby` where using ``groupby`` with a :class:`MultiIndex` Series with a list of labels equal to the length of the series caused incorrect grouping (:issue:`25704`) Reshaping diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index d1ebb9cbe8ac4..317a6025cb504 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -526,6 +526,8 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True, if isinstance(obj, DataFrame): all_in_columns_index = all(g in obj.columns or g in obj.index.names for g in keys) + elif isinstance(obj, Series): + all_in_columns_index = all(g in obj.index.names for g in keys) else: all_in_columns_index = False except Exception: diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index f80a7300334e4..74563ab9517e2 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1723,3 +1723,23 @@ def test_groupby_empty_list_raises(): msg = "Grouper and axis must be same length" with pytest.raises(ValueError, match=msg): df.groupby([[]]) + + +def test_groupby_multiindex_series_keys_len_equal_group_axis(): + # GH 25704 + index_array = [ + ['x', 'x'], + ['a', 'b'], + ['k', 'k'] + ] + index_names = ['first', 'second', 'third'] + ri = pd.MultiIndex.from_arrays(index_array, names=index_names) + s = pd.Series(data=[1, 2], index=ri) + result = s.groupby(['first', 'third']).sum() + + index_array = [['x'], ['k']] + index_names = ['first', 'third'] + ei = pd.MultiIndex.from_arrays(index_array, names=index_names) + expected = pd.Series([3], index=ei) + + assert_series_equal(result, expected)