diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 94f8fd16ea85a..37f609b847b02 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -371,6 +371,7 @@ Groupby/Resample/Rolling - Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.nunique` in which the names of column levels were lost (:issue:`23222`) - Bug in :func:`pandas.core.groupby.GroupBy.agg` when applying a aggregation function to timezone aware data (:issue:`23683`) - Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` where timezone information would be dropped (:issue:`21603`) +- Bug in :func:`Series.groupby` where using ``groupby`` with a :class:`MultiIndex` Series with a list of labels equal to the length of the series caused incorrect grouping (:issue:`25704`) - Ensured that ordering of outputs in ``groupby`` aggregation functions is consistent across all versions of Python (:issue:`25692`) - Bug in :func:`idxmax` and :func:`idxmin` on :meth:`DataFrame.groupby` with datetime column would return incorrect dtype (:issue:`25444`, :issue:`15306`) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 617dc6a3aba08..d02775cd4b328 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -524,6 +524,8 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True, if isinstance(obj, DataFrame): all_in_columns_index = all(g in obj.columns or g in obj.index.names for g in keys) + elif isinstance(obj, Series): + all_in_columns_index = all(g in obj.index.names for g in keys) else: all_in_columns_index = False except Exception: diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index d5f2ac3d3f7bb..4f1e9344c4336 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1719,3 +1719,23 @@ def test_groupby_empty_list_raises(): msg = "Grouper and axis must be same length" with pytest.raises(ValueError, match=msg): df.groupby([[]]) + + +def test_groupby_multiindex_series_keys_len_equal_group_axis(): + # GH 25704 + index_array = [ + ['x', 'x'], + ['a', 'b'], + ['k', 'k'] + ] + index_names = ['first', 'second', 'third'] + ri = pd.MultiIndex.from_arrays(index_array, names=index_names) + s = pd.Series(data=[1, 2], index=ri) + result = s.groupby(['first', 'third']).sum() + + index_array = [['x'], ['k']] + index_names = ['first', 'third'] + ei = pd.MultiIndex.from_arrays(index_array, names=index_names) + expected = pd.Series([3], index=ei) + + assert_series_equal(result, expected)