From bc8081df566f9acb5a37c371ba8da0f0721a3ee9 Mon Sep 17 00:00:00 2001 From: Sven Skobowsky Date: Wed, 28 Jul 2021 17:00:03 +0200 Subject: [PATCH 1/3] BUG: Series.groupby fails with InvalidIndexError on time series with a tuple-named grouper. (#42731) --- pandas/core/groupby/grouper.py | 4 +++- pandas/tests/groupby/test_groupby.py | 9 +++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 76815d780a1ad..00a074f60feef 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -845,9 +845,11 @@ def is_in_obj(gpr) -> bool: return False try: return gpr is obj[gpr.name] - except (KeyError, IndexError): + except (KeyError, IndexError, InvalidIndexError): # IndexError reached in e.g. test_skip_group_keys when we pass # lambda here + # InvalidIndexError raised on key-types inappropriate for index, + # e.g. DatetimeIndex.get_loc(tuple()) return False for gpr, level in zip(keys, levels): diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 538a707aa3580..3e308484a6e71 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2311,6 +2311,15 @@ def test_groupby_series_with_tuple_name(): tm.assert_series_equal(result, expected) +def test_time_series_groupby_series_named_with_tuple(): + # GH 42731 + ser = Series([1, 2, 3, 4], index=[Timestamp(2021, 7, 28 + i) for i in range(4)]) + grp = Series([1, 0, 1, 0], name=('a', 'a')) + result = ser.groupby(grp).last() + expected = ser.groupby(grp.rename(None)).last() + tm.assert_series_equal(result, expected, check_names=False) + + @pytest.mark.xfail(not IS64, reason="GH#38778: fail on 32-bit system") @pytest.mark.parametrize( "func, values", [("sum", [97.0, 98.0]), ("mean", [24.25, 24.5])] From 929a00dbcc704b9d09e0dacfd4d97a9727f753b3 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 14 Aug 2021 09:31:13 -0400 Subject: [PATCH 2/3] whatsnew, moved and expanded test --- doc/source/whatsnew/v1.3.2.rst | 1 + pandas/tests/groupby/test_groupby.py | 9 --------- pandas/tests/groupby/test_grouping.py | 18 ++++++++++++++++++ 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst index a94eab960418b..f1ed3095122df 100644 --- a/doc/source/whatsnew/v1.3.2.rst +++ b/doc/source/whatsnew/v1.3.2.rst @@ -26,6 +26,7 @@ Fixed regressions - Fixed regression in :func:`concat` where ``copy=False`` was not honored in ``axis=1`` Series concatenation (:issue:`42501`) - Regression in :meth:`Series.nlargest` and :meth:`Series.nsmallest` with nullable integer or float dtype (:issue:`42816`) - Fixed regression in :meth:`Series.quantile` with :class:`Int64Dtype` (:issue:`42626`) +- Fixed regression in :meth:`Series.groupby` and :meth:`DataFrame.groupby` where supplying the ``by`` argument with a Series named with a tuple would incorrectly raise (:issue:`42731`) .. --------------------------------------------------------------------------- diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 3e308484a6e71..538a707aa3580 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2311,15 +2311,6 @@ def test_groupby_series_with_tuple_name(): tm.assert_series_equal(result, expected) -def test_time_series_groupby_series_named_with_tuple(): - # GH 42731 - ser = Series([1, 2, 3, 4], index=[Timestamp(2021, 7, 28 + i) for i in range(4)]) - grp = Series([1, 0, 1, 0], name=('a', 'a')) - result = ser.groupby(grp).last() - expected = ser.groupby(grp.rename(None)).last() - tm.assert_series_equal(result, expected, check_names=False) - - @pytest.mark.xfail(not IS64, reason="GH#38778: fail on 32-bit system") @pytest.mark.parametrize( "func, values", [("sum", [97.0, 98.0]), ("mean", [24.25, 24.5])] diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 3d02e784d83b0..b05bdd32eeb47 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -396,6 +396,24 @@ def test_groupby_dict_mapping(self): tm.assert_series_equal(result, result2) tm.assert_series_equal(result, expected2) + @pytest.mark.parametrize( + "index", + [ + [0, 1, 2, 3], + ["a", "b", "c", "d"], + [Timestamp(2021, 7, 28 + i) for i in range(4)], + ], + ) + @pytest.mark.parametrize("box", [Series, DataFrame]) + def test_groupby_series_named_with_tuple(self, box, index): + # GH 42731 + obj = box([1, 2, 3, 4], index=index) + groups = Series([1, 0, 1, 0], index=index, name=("a", "a")) + result = obj.groupby(groups).last() + expected = box([4, 3]) + expected.index.name = ("a", "a") + tm.assert_equal(result, expected) + def test_groupby_grouper_f_sanity_checked(self): dates = date_range("01-Jan-2013", periods=12, freq="MS") ts = Series(np.random.randn(12), index=dates) From 701889199efd7580bdc2e9e397dc7552df9c5aad Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 4 Sep 2021 22:10:42 -0400 Subject: [PATCH 3/3] Use frame_or_series --- pandas/tests/groupby/test_grouping.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index b28179781d53e..527b93a28359c 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -408,13 +408,12 @@ def test_groupby_dict_mapping(self): [Timestamp(2021, 7, 28 + i) for i in range(4)], ], ) - @pytest.mark.parametrize("box", [Series, DataFrame]) - def test_groupby_series_named_with_tuple(self, box, index): + def test_groupby_series_named_with_tuple(self, frame_or_series, index): # GH 42731 - obj = box([1, 2, 3, 4], index=index) + obj = frame_or_series([1, 2, 3, 4], index=index) groups = Series([1, 0, 1, 0], index=index, name=("a", "a")) result = obj.groupby(groups).last() - expected = box([4, 3]) + expected = frame_or_series([4, 3]) expected.index.name = ("a", "a") tm.assert_equal(result, expected)