Skip to content

Commit 6d0dfc1

Browse files
BUG: Fix groupby with MultiIndex Series corner case (pandas-dev#25704)
1 parent a61d823 commit 6d0dfc1

File tree

3 files changed

+23
-0
lines changed

3 files changed

+23
-0
lines changed

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ Groupby/Resample/Rolling
237237
- Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.nunique` in which the names of column levels were lost (:issue:`23222`)
238238
- Bug in :func:`pandas.core.groupby.GroupBy.agg` when applying a aggregation function to timezone aware data (:issue:`23683`)
239239
- Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` where timezone information would be dropped (:issue:`21603`)
240+
- Bug in :func:`Series.groupby` where using ``groupby`` with a :class:`MultiIndex` Series with a list of labels equal to the length of the series caused incorrect grouping (:issue:`25704`)
240241

241242

242243
Reshaping

pandas/core/groupby/grouper.py

+2
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,8 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
526526
if isinstance(obj, DataFrame):
527527
all_in_columns_index = all(g in obj.columns or g in obj.index.names
528528
for g in keys)
529+
elif isinstance(obj, Series):
530+
all_in_columns_index = all(g in obj.index.names for g in keys)
529531
else:
530532
all_in_columns_index = False
531533
except Exception:

pandas/tests/groupby/test_groupby.py

+20
Original file line numberDiff line numberDiff line change
@@ -1723,3 +1723,23 @@ def test_groupby_empty_list_raises():
17231723
msg = "Grouper and axis must be same length"
17241724
with pytest.raises(ValueError, match=msg):
17251725
df.groupby([[]])
1726+
1727+
1728+
def test_groupby_multiindex_series_keys_len_equal_group_axis():
1729+
# GH 25704
1730+
index_array = [
1731+
['x', 'x'],
1732+
['a', 'b'],
1733+
['k', 'k']
1734+
]
1735+
index_names = ['first', 'second', 'third']
1736+
ri = pd.MultiIndex.from_arrays(index_array, names=index_names)
1737+
s = pd.Series(data=[1, 2], index=ri)
1738+
result = s.groupby(['first', 'third']).sum()
1739+
1740+
index_array = [['x'], ['k']]
1741+
index_names = ['first', 'third']
1742+
ei = pd.MultiIndex.from_arrays(index_array, names=index_names)
1743+
expected = pd.Series([3], index=ei)
1744+
1745+
assert_series_equal(result, expected)

0 commit comments

Comments
 (0)