Skip to content

Commit 279753c

Browse files
shantanu-gontiajreback
authored andcommitted
BUG: ngroups and len(groups) do not equal when grouping with a list of Grouper and column label (GH26326) (#26374)
1 parent b99e84f commit 279753c

File tree

4 files changed

+36
-2
lines changed

4 files changed

+36
-2
lines changed

doc/source/whatsnew/v0.25.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -447,9 +447,9 @@ Groupby/Resample/Rolling
447447
- Bug in :meth:`pandas.core.groupby.GroupBy.cumsum`, :meth:`pandas.core.groupby.GroupBy.cumprod`, :meth:`pandas.core.groupby.GroupBy.cummin` and :meth:`pandas.core.groupby.GroupBy.cummax` with categorical column having absent categories, would return incorrect result or segfault (:issue:`16771`)
448448
- Bug in :meth:`pandas.core.groupby.GroupBy.nth` where NA values in the grouping would return incorrect results (:issue:`26011`)
449449
- Bug in :meth:`pandas.core.groupby.SeriesGroupBy.transform` where transforming an empty group would raise error (:issue:`26208`)
450+
- Bug in :meth:`pandas.core.frame.DataFrame.groupby` where passing a :class:`pandas.core.groupby.grouper.Grouper` would return incorrect groups when using the ``.groups`` accessor (:issue:`26326`)
450451
- Bug in :meth:`pandas.core.groupby.GroupBy.agg` where incorrect results are returned for uint64 columns. (:issue:`26310`)
451452

452-
453453
Reshaping
454454
^^^^^^^^^
455455

pandas/core/groupby/grouper.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
280280
if self.name is None:
281281
self.name = grouper.result_index.name
282282
self.obj = self.grouper.obj
283-
self.grouper = grouper
283+
self.grouper = grouper._get_grouper()
284284

285285
else:
286286
if self.grouper is None and self.name is not None:

pandas/core/groupby/ops.py

+18
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,15 @@ def _get_splitter(self, data, axis=0):
150150
comp_ids, _, ngroups = self.group_info
151151
return get_splitter(data, comp_ids, ngroups, axis=axis)
152152

153+
def _get_grouper(self):
154+
"""
155+
We are a grouper as part of another's groupings.
156+
157+
We have a specific method of grouping, so cannot
158+
convert to a Index for our grouper.
159+
"""
160+
return self.groupings[0].grouper
161+
153162
def _get_group_keys(self):
154163
if len(self.groupings) == 1:
155164
return self.levels[0]
@@ -707,6 +716,15 @@ def groups(self):
707716
def nkeys(self):
708717
return 1
709718

719+
def _get_grouper(self):
720+
"""
721+
We are a grouper as part of another's groupings.
722+
723+
We have a specific method of grouping, so cannot
724+
convert to a Index for our grouper.
725+
"""
726+
return self
727+
710728
def get_iterator(self, data, axis=0):
711729
"""
712730
Groupby iterator

pandas/tests/groupby/test_groupby.py

+16
Original file line numberDiff line numberDiff line change
@@ -1736,3 +1736,19 @@ def test_groupby_multiindex_series_keys_len_equal_group_axis():
17361736
expected = pd.Series([3], index=ei)
17371737

17381738
assert_series_equal(result, expected)
1739+
1740+
1741+
def test_groupby_groups_in_BaseGrouper():
1742+
# GH 26326
1743+
# Test if DataFrame grouped with a pandas.Grouper has correct groups
1744+
mi = pd.MultiIndex.from_product([['A', 'B'],
1745+
['C', 'D']], names=['alpha', 'beta'])
1746+
df = pd.DataFrame({'foo': [1, 2, 1, 2], 'bar': [1, 2, 3, 4]},
1747+
index=mi)
1748+
result = df.groupby([pd.Grouper(level='alpha'), 'beta'])
1749+
expected = df.groupby(['alpha', 'beta'])
1750+
assert(result.groups == expected.groups)
1751+
1752+
result = df.groupby(['beta', pd.Grouper(level='alpha')])
1753+
expected = df.groupby(['beta', 'alpha'])
1754+
assert(result.groups == expected.groups)

0 commit comments

Comments
 (0)