Skip to content

Commit 9123f7d

Browse files
committed
BUG: do not crash on a callable grouper returning tuples
closes #22257
1 parent 0370740 commit 9123f7d

File tree

3 files changed

+30
-3
lines changed

3 files changed

+30
-3
lines changed

doc/source/whatsnew/v0.23.5.txt

+3-2
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,11 @@ and bug fixes. We recommend that all users upgrade to this version.
2020
Fixed Regressions
2121
~~~~~~~~~~~~~~~~~
2222

23-
- Constructing a DataFrame with an index argument that wasn't already an
23+
- Constructing a :class:`DataFrame` with an index argument that wasn't already an
2424
instance of :class:`~pandas.core.Index` was broken in `4efb39f
2525
<https://github.com/pandas-dev/pandas/commit/4efb39f01f5880122fa38d91e12d217ef70fad9e>`_ (:issue:`22227`).
26-
-
26+
- Passing :meth:`DataFrame.groupby` as grouper a callable or mapping which
27+
returns tuples was broken in 0.21.1 (:issue:`22257`).
2728
-
2829

2930
.. _whatsnew_0235.bug_fixes:

pandas/core/groupby/grouper.py

+9
Original file line numberDiff line numberDiff line change
@@ -508,6 +508,15 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
508508
warnings.warn(msg, FutureWarning, stacklevel=5)
509509
key = list(key)
510510

511+
if callable(key) or isinstance(key, dict):
512+
if level is None:
513+
key = group_axis.map(key)
514+
else:
515+
key = group_axis.get_level_values(level=level).map(key)
516+
# If the grouper is a mapping, 'level' is _only_ used to determine
517+
# the mapping input
518+
level = None
519+
511520
if not isinstance(key, list):
512521
keys = [key]
513522
match_axis_length = False

pandas/tests/groupby/test_grouping.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,23 @@ def test_grouper_creation_bug(self):
202202
expected = s.groupby(level='one').sum()
203203
assert_series_equal(result, expected)
204204

205+
@pytest.mark.parametrize('func', [False, True])
206+
def test_grouper_returning_tuples(self, func):
207+
# GH 22257 , both with dict and with callable
208+
df = pd.DataFrame({'X': ['A', 'B', 'A', 'B'], 'Y': [1, 4, 3, 2]})
209+
mapping = dict(zip(range(4), [('C', 5), ('D', 6)] * 2))
210+
211+
if func:
212+
gb = df.groupby(by=lambda idx: mapping[idx], sort=False)
213+
else:
214+
gb = df.groupby(by=mapping, sort=False)
215+
216+
name, expected = list(gb)[0]
217+
assert name == ('C', 5)
218+
result = gb.get_group(name)
219+
220+
assert_frame_equal(result, expected)
221+
205222
def test_grouper_column_and_index(self):
206223
# GH 14327
207224

@@ -346,7 +363,7 @@ def test_groupby_grouper_f_sanity_checked(self):
346363
# when the elements are Timestamp.
347364
# the result is Index[0:6], very confusing.
348365

349-
pytest.raises(AssertionError, ts.groupby, lambda key: key[0:6])
366+
pytest.raises(ValueError, ts.groupby, lambda key: key[0:6])
350367

351368
def test_grouping_error_on_multidim_input(self, df):
352369
pytest.raises(ValueError,

0 commit comments

Comments
 (0)