diff --git a/doc/source/whatsnew/v0.23.5.txt b/doc/source/whatsnew/v0.23.5.txt index 88ea7a6caecfc..39c47303142a6 100644 --- a/doc/source/whatsnew/v0.23.5.txt +++ b/doc/source/whatsnew/v0.23.5.txt @@ -20,10 +20,11 @@ and bug fixes. We recommend that all users upgrade to this version. Fixed Regressions ~~~~~~~~~~~~~~~~~ -- Constructing a DataFrame with an index argument that wasn't already an +- Constructing a :class:`DataFrame` with an index argument that wasn't already an instance of :class:`~pandas.core.Index` was broken in `4efb39f `_ (:issue:`22227`). -- +- Passing :meth:`DataFrame.groupby` as grouper a callable or mapping which + returns tuples was broken in 0.21.1 (:issue:`22257`). - .. _whatsnew_0235.bug_fixes: diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 35d4a024a4e6c..945d55c63da92 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -508,6 +508,15 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True, warnings.warn(msg, FutureWarning, stacklevel=5) key = list(key) + if callable(key) or isinstance(key, dict): + if level is None: + key = group_axis.map(key) + else: + key = group_axis.get_level_values(level=level).map(key) + # If the grouper is a mapping, 'level' is _only_ used to determine + # the mapping input + level = None + if not isinstance(key, list): keys = [key] match_axis_length = False diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 58e9797dbeea5..5913c2f481e10 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -202,6 +202,23 @@ def test_grouper_creation_bug(self): expected = s.groupby(level='one').sum() assert_series_equal(result, expected) + @pytest.mark.parametrize('func', [False, True]) + def test_grouper_returning_tuples(self, func): + # GH 22257 , both with dict and with callable + df = pd.DataFrame({'X': ['A', 'B', 'A', 'B'], 'Y': [1, 4, 3, 2]}) + mapping = dict(zip(range(4), [('C', 5), ('D', 6)] * 2)) + + if func: + gb = df.groupby(by=lambda idx: mapping[idx], sort=False) + else: + gb = df.groupby(by=mapping, sort=False) + + name, expected = list(gb)[0] + assert name == ('C', 5) + result = gb.get_group(name) + + assert_frame_equal(result, expected) + def test_grouper_column_and_index(self): # GH 14327 @@ -346,7 +363,7 @@ def test_groupby_grouper_f_sanity_checked(self): # when the elements are Timestamp. # the result is Index[0:6], very confusing. - pytest.raises(AssertionError, ts.groupby, lambda key: key[0:6]) + pytest.raises(ValueError, ts.groupby, lambda key: key[0:6]) def test_grouping_error_on_multidim_input(self, df): pytest.raises(ValueError,