From c21c5c9c140ec3302646ce06ff713959c6a184e8 Mon Sep 17 00:00:00 2001 From: aram-cinnamon Date: Sun, 4 Aug 2024 22:48:25 +0200 Subject: [PATCH 01/12] Index should be ignored when using the on keyword argument to resample --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/resample.py | 3 ++ .../tests/resample/test_resampler_grouper.py | 33 +++++++++++++++++++ 3 files changed, 37 insertions(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index f25edd39cf7da..946f8c8920c18 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -120,6 +120,7 @@ These improvements also fixed certain bugs in groupby: - :meth:`.DataFrameGroupBy.agg` would fail when there are multiple groupings, unobserved groups, and ``as_index=False`` (:issue:`36698`) - :meth:`.DataFrameGroupBy.groups` with ``sort=False`` would sort groups; they now occur in the order they are observed (:issue:`56966`) - :meth:`.DataFrameGroupBy.nunique` would fail when there are multiple groupings, unobserved groups, and ``as_index=False`` (:issue:`52848`) +- :meth:`.DataFrameGroupBy.resample` with an ``on`` value that is not ``None`` would have incorrect values when the index is out of order (:issue:`59350`) - :meth:`.DataFrameGroupBy.sum` would have incorrect values when there are multiple groupings, unobserved groups, and non-numeric data (:issue:`43891`) - :meth:`.DataFrameGroupBy.value_counts` would produce incorrect results when used with some categorical and some non-categorical groupings and ``observed=False`` (:issue:`56016`) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 8ee71ea2293e6..a912548dd6da9 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1927,6 +1927,9 @@ def get_resampler_for_grouping( """ # .resample uses 'on' similar to how .groupby uses 'key' tg = TimeGrouper(freq=rule, key=on, **kwargs) + # GH 59350: Index should be ignored when using the on keyword argument to resample + if on is not None: + groupby.obj = groupby.obj.reset_index(drop=True) resampler = tg._get_resampler(groupby.obj) return resampler._get_resampler_for_grouping( groupby=groupby, include_groups=include_groups, key=tg.key diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index ff1b82210e20d..85880341d3ddc 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -689,3 +689,36 @@ def test_groupby_resample_on_index_with_list_of_keys_missing_column(): rs = gb.resample("2D") with pytest.raises(KeyError, match="Columns not found"): rs[["val_not_in_dataframe"]] + + +def test_groupby_resample_when_index_is_out_of_order(): + # GH 59350 + df = DataFrame( + data={ + "datetime": [ + pd.to_datetime("2024-07-30T00:00Z"), + pd.to_datetime("2024-07-30T00:01Z"), + ], + "group": ["A", "A"], + "numbers": [100, 200], + }, + index=[1, 0], + ) + + gb = df.groupby("group") + rs = gb.resample("1min", on="datetime") + result = rs.aggregate({"numbers": "sum"}) + + index = pd.MultiIndex.from_arrays( + [ + ["A", "A"], + [pd.to_datetime("2024-07-30T00:00Z"), pd.to_datetime("2024-07-30T00:01Z")], + ], + names=[ + "group", + "datetime", + ], + ) + expected = DataFrame({"numbers": [100, 200]}, index=index) + + tm.assert_frame_equal(result, expected) From 626b722399b4eb7678f9aff43ffa0826159bd91b Mon Sep 17 00:00:00 2001 From: aram-cinnamon Date: Sat, 10 Aug 2024 16:43:57 +0200 Subject: [PATCH 02/12] add test for when index is set from column --- .../tests/resample/test_resampler_grouper.py | 40 +++++++++++++------ 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 85880341d3ddc..33e9aeb9dab4b 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -691,20 +691,34 @@ def test_groupby_resample_on_index_with_list_of_keys_missing_column(): rs[["val_not_in_dataframe"]] -def test_groupby_resample_when_index_is_out_of_order(): +@pytest.mark.parametrize( + "df", + [ + DataFrame( + data={ + "datetime": [ + pd.to_datetime("2024-07-30T00:00Z"), + pd.to_datetime("2024-07-30T00:01Z"), + ], + "group": ["A", "A"], + "numbers": [100, 200], + }, + index=[1, 0], + ), + DataFrame( + data={ + "datetime": [ + pd.to_datetime("2024-07-30T00:00Z"), + pd.to_datetime("2024-07-30T00:01Z"), + ], + "group": ["A", "A"], + "numbers": [100, 200], + }, + ).set_index("group"), + ], +) +def test_groupby_resample_on_column_when_index_is_not_a_range_of_numbers(df): # GH 59350 - df = DataFrame( - data={ - "datetime": [ - pd.to_datetime("2024-07-30T00:00Z"), - pd.to_datetime("2024-07-30T00:01Z"), - ], - "group": ["A", "A"], - "numbers": [100, 200], - }, - index=[1, 0], - ) - gb = df.groupby("group") rs = gb.resample("1min", on="datetime") result = rs.aggregate({"numbers": "sum"}) From 70d6ee8cac941c7429e31198cf571185aba94d00 Mon Sep 17 00:00:00 2001 From: aram-cinnamon Date: Sat, 10 Aug 2024 16:54:25 +0200 Subject: [PATCH 03/12] minor: change test function name --- pandas/tests/resample/test_resampler_grouper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 33e9aeb9dab4b..67e3e6fb250de 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -717,7 +717,7 @@ def test_groupby_resample_on_index_with_list_of_keys_missing_column(): ).set_index("group"), ], ) -def test_groupby_resample_on_column_when_index_is_not_a_range_of_numbers(df): +def test_groupby_resample_on_column_when_index_is_unusual(df): # GH 59350 gb = df.groupby("group") rs = gb.resample("1min", on="datetime") From 5d779419eae056d653d9c87f268857fa6dba5d9c Mon Sep 17 00:00:00 2001 From: aram-cinnamon Date: Sat, 10 Aug 2024 16:57:44 +0200 Subject: [PATCH 04/12] add test case --- pandas/tests/resample/test_resampler_grouper.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 67e3e6fb250de..76670fabf8445 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -715,6 +715,16 @@ def test_groupby_resample_on_index_with_list_of_keys_missing_column(): "numbers": [100, 200], }, ).set_index("group"), + DataFrame( + data={ + "datetime": [ + pd.to_datetime("2024-07-30T00:00Z"), + pd.to_datetime("2024-07-30T00:01Z"), + ], + "group": ["A", "A"], + "numbers": [100, 200], + }, + ).set_index("datetime", drop=False), ], ) def test_groupby_resample_on_column_when_index_is_unusual(df): From c92ec0b890aab4ac41d411d75991e01227af7635 Mon Sep 17 00:00:00 2001 From: aram-cinnamon Date: Tue, 13 Aug 2024 02:38:01 +0200 Subject: [PATCH 05/12] move reset_index --- pandas/core/resample.py | 6 +- .../tests/resample/test_resampler_grouper.py | 125 ++++++++++++++++++ 2 files changed, 128 insertions(+), 3 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index a912548dd6da9..dae3a896db6c8 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1597,6 +1597,9 @@ def _gotitem(self, key, ndim, subset=None): if isinstance(key, list) and self.key not in key and self.key is not None: key.append(self.key) groupby = self._groupby[key] + # GH 59350: Index is ignored when using the on keyword argument to resample + if self.key is not None: + groupby.obj = groupby.obj.reset_index(drop=True) except IndexError: groupby = self._groupby @@ -1927,9 +1930,6 @@ def get_resampler_for_grouping( """ # .resample uses 'on' similar to how .groupby uses 'key' tg = TimeGrouper(freq=rule, key=on, **kwargs) - # GH 59350: Index should be ignored when using the on keyword argument to resample - if on is not None: - groupby.obj = groupby.obj.reset_index(drop=True) resampler = tg._get_resampler(groupby.obj) return resampler._get_resampler_for_grouping( groupby=groupby, include_groups=include_groups, key=tg.key diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 76670fabf8445..5f8305c1ec6da 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -691,6 +691,38 @@ def test_groupby_resample_on_index_with_list_of_keys_missing_column(): rs[["val_not_in_dataframe"]] +def test_groupby_resample_after_set_index_and_not_on_column(): + # GH 59350 + df = DataFrame( + data={ + "datetime": [ + pd.to_datetime("2024-07-30T00:00Z"), + pd.to_datetime("2024-07-30T00:01Z"), + ], + "group": ["A", "A"], + "numbers": [100, 200], + }, + index=[1, 0], + ).set_index("datetime") + gb = df.groupby("group") + rs = gb.resample("1min") + result = rs.aggregate({"numbers": "sum"}) + + index = pd.MultiIndex.from_arrays( + [ + ["A", "A"], + [pd.to_datetime("2024-07-30T00:00Z"), pd.to_datetime("2024-07-30T00:01Z")], + ], + names=[ + "group", + "datetime", + ], + ) + expected = DataFrame({"numbers": [100, 200]}, index=index) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( "df", [ @@ -746,3 +778,96 @@ def test_groupby_resample_on_column_when_index_is_unusual(df): expected = DataFrame({"numbers": [100, 200]}, index=index) tm.assert_frame_equal(result, expected) + + +def test_groupby_resample_then_groupby_is_reused_when_index_is_out_of_order(): + df = DataFrame( + data={ + "datetime": [ + pd.to_datetime("2024-07-30T00:00Z"), + pd.to_datetime("2024-07-30T00:01Z"), + ], + "group": ["A", "A"], + "numbers": [100, 200], + }, + index=[1, 0], + ) + + gb = df.groupby("group") + + # use gb + result_1 = gb[["numbers"]].transform("sum") + + index = Index([1, 0]) + expected = DataFrame({"numbers": [300, 300]}, index=index) + + tm.assert_frame_equal(result_1, expected) + + # resample gb, unrelated to above + rs = gb.resample("1min", on="datetime") + result_2 = rs.aggregate({"numbers": "sum"}) + + index = pd.MultiIndex.from_arrays( + [ + ["A", "A"], + [pd.to_datetime("2024-07-30T00:00Z"), pd.to_datetime("2024-07-30T00:01Z")], + ], + names=[ + "group", + "datetime", + ], + ) + expected = DataFrame({"numbers": [100, 200]}, index=index) + + tm.assert_frame_equal(result_2, expected) + + # reuse gb, unrelated to above + result_3 = gb[["numbers"]].transform("sum") + + tm.assert_frame_equal(result_1, result_3) + + +def test_groupby_resample_then_groupby_is_reused_when_index_is_set_from_column(): + df = DataFrame( + data={ + "datetime": [ + pd.to_datetime("2024-07-30T00:00Z"), + pd.to_datetime("2024-07-30T00:01Z"), + ], + "group": ["A", "A"], + "numbers": [100, 200], + }, + ).set_index("group") + + gb = df.groupby("group") + + # use gb + result_1 = gb[["numbers"]].transform("sum") + + index = Index(["A", "A"], name="group") + expected = DataFrame({"numbers": [300, 300]}, index=index) + + tm.assert_frame_equal(result_1, expected) + + # resample gb, unrelated to above + rs = gb.resample("1min", on="datetime") + result_2 = rs.aggregate({"numbers": "sum"}) + + index = pd.MultiIndex.from_arrays( + [ + ["A", "A"], + [pd.to_datetime("2024-07-30T00:00Z"), pd.to_datetime("2024-07-30T00:01Z")], + ], + names=[ + "group", + "datetime", + ], + ) + expected = DataFrame({"numbers": [100, 200]}, index=index) + + tm.assert_frame_equal(result_2, expected) + + # reuse gb, unrelated to above + result_3 = gb[["numbers"]].transform("sum") + + tm.assert_frame_equal(result_1, result_3) From 1ef4d3bc782ffcff7e82ae0a2389c5d9ea540013 Mon Sep 17 00:00:00 2001 From: aram-cinnamon Date: Tue, 13 Aug 2024 02:50:17 +0200 Subject: [PATCH 06/12] minor: add GH issue number to tests --- pandas/tests/resample/test_resampler_grouper.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 5f8305c1ec6da..1ad23d95526ef 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -781,6 +781,7 @@ def test_groupby_resample_on_column_when_index_is_unusual(df): def test_groupby_resample_then_groupby_is_reused_when_index_is_out_of_order(): + # GH 59350 df = DataFrame( data={ "datetime": [ @@ -828,6 +829,7 @@ def test_groupby_resample_then_groupby_is_reused_when_index_is_out_of_order(): def test_groupby_resample_then_groupby_is_reused_when_index_is_set_from_column(): + # GH 59350 df = DataFrame( data={ "datetime": [ From 55fe96bc2ab4a69d3486d97b790b7ff41a95245c Mon Sep 17 00:00:00 2001 From: aram-cinnamon Date: Sun, 18 Aug 2024 00:35:39 +0200 Subject: [PATCH 07/12] add test --- .../tests/resample/test_resampler_grouper.py | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 1ad23d95526ef..c570951378951 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -873,3 +873,62 @@ def test_groupby_resample_then_groupby_is_reused_when_index_is_set_from_column() result_3 = gb[["numbers"]].transform("sum") tm.assert_frame_equal(result_1, result_3) + + +def test_groupby_resample_then_groupby_is_reused_when_groupby_selection_is_not_none(): + # GH 59350 + df = DataFrame( + data={ + "datetime": [ + pd.to_datetime("2024-07-30T00:00Z"), + pd.to_datetime("2024-07-30T00:01Z"), + ], + "group": ["A", "A"], + "numbers": [100, 200], + }, + index=[1, 0], + ) + + gb = df.groupby("group") + gb = gb[["numbers", "datetime"]] # gb._selection is ["numbers", "datetime"] + + # use gb + result_1 = gb.transform("max") + + index = Index([1, 0]) + expected = DataFrame( + { + "numbers": [200, 200], + "datetime": [ + pd.to_datetime("2024-07-30T00:01Z"), + pd.to_datetime("2024-07-30T00:01Z"), + ], + }, + index=index, + ) + + tm.assert_frame_equal(result_1, expected) + + # resample gb, unrelated to above + rs = gb.resample("1min", on="datetime") + result_2 = rs.aggregate({"numbers": "sum"}) # Enter the `except IndexError:` block + + index = pd.MultiIndex.from_arrays( + [ + ["A", "A"], + [pd.to_datetime("2024-07-30T00:00Z"), pd.to_datetime("2024-07-30T00:01Z")], + ], + names=[ + "group", + "datetime", + ], + ) + columns = pd.MultiIndex.from_arrays([["numbers"], ["numbers"]]) + expected = DataFrame([[100], [200]], index=index, columns=columns) + + tm.assert_frame_equal(result_2, expected) + + # reuse gb, unrelated to above + result_3 = gb.transform("max") + + tm.assert_frame_equal(result_1, result_3) From 10836a26d9687fd41c9f0b6b92e53efbeb058943 Mon Sep 17 00:00:00 2001 From: aram-cinnamon Date: Wed, 28 Aug 2024 03:23:38 +0200 Subject: [PATCH 08/12] push reset_index to _set_grouper --- pandas/core/groupby/grouper.py | 5 +++++ pandas/core/resample.py | 3 --- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 5f9ebdcea4a2d..9ee9661b340fc 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -34,6 +34,7 @@ from pandas.core.indexes.api import ( Index, MultiIndex, + RangeIndex, default_index, ) from pandas.core.series import Series @@ -349,6 +350,10 @@ def _set_grouper( unsorted_ax = self._grouper.take(reverse_indexer) ax = unsorted_ax.take(obj.index) else: + if not isinstance(obj.index, RangeIndex): + # GH 59350: Index is ignored when using the on keyword argument + # to resample. + obj = obj.reset_index(drop=True) ax = self._grouper.take(obj.index) else: if key not in obj._info_axis: diff --git a/pandas/core/resample.py b/pandas/core/resample.py index ecab40b8dcecc..b621fcf9a6415 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1597,9 +1597,6 @@ def _gotitem(self, key, ndim, subset=None): if isinstance(key, list) and self.key not in key and self.key is not None: key.append(self.key) groupby = self._groupby[key] - # GH 59350: Index is ignored when using the on keyword argument to resample - if self.key is not None: - groupby.obj = groupby.obj.reset_index(drop=True) except IndexError: groupby = self._groupby From 62bdbdf05a4fde75e7b9d8ec0e9298ccd1db1be7 Mon Sep 17 00:00:00 2001 From: aram-cinnamon Date: Wed, 28 Aug 2024 03:35:20 +0200 Subject: [PATCH 09/12] simplify --- pandas/core/groupby/grouper.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 9ee9661b340fc..283aceb59d20b 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -349,12 +349,11 @@ def _set_grouper( reverse_indexer = self._indexer.argsort() unsorted_ax = self._grouper.take(reverse_indexer) ax = unsorted_ax.take(obj.index) - else: - if not isinstance(obj.index, RangeIndex): - # GH 59350: Index is ignored when using the on keyword argument - # to resample. - obj = obj.reset_index(drop=True) + elif isinstance(obj.index, RangeIndex): ax = self._grouper.take(obj.index) + else: + # GH 59350 + ax = self._grouper else: if key not in obj._info_axis: raise KeyError(f"The grouper name {key} is not found") From 7a0ad972f6c355cd9978369bd5411771ec107858 Mon Sep 17 00:00:00 2001 From: aram-cinnamon Date: Wed, 4 Sep 2024 05:16:43 +0200 Subject: [PATCH 10/12] remove tests per comments --- .../tests/resample/test_resampler_grouper.py | 154 ------------------ 1 file changed, 154 deletions(-) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index c570951378951..db27b3af11d2e 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -778,157 +778,3 @@ def test_groupby_resample_on_column_when_index_is_unusual(df): expected = DataFrame({"numbers": [100, 200]}, index=index) tm.assert_frame_equal(result, expected) - - -def test_groupby_resample_then_groupby_is_reused_when_index_is_out_of_order(): - # GH 59350 - df = DataFrame( - data={ - "datetime": [ - pd.to_datetime("2024-07-30T00:00Z"), - pd.to_datetime("2024-07-30T00:01Z"), - ], - "group": ["A", "A"], - "numbers": [100, 200], - }, - index=[1, 0], - ) - - gb = df.groupby("group") - - # use gb - result_1 = gb[["numbers"]].transform("sum") - - index = Index([1, 0]) - expected = DataFrame({"numbers": [300, 300]}, index=index) - - tm.assert_frame_equal(result_1, expected) - - # resample gb, unrelated to above - rs = gb.resample("1min", on="datetime") - result_2 = rs.aggregate({"numbers": "sum"}) - - index = pd.MultiIndex.from_arrays( - [ - ["A", "A"], - [pd.to_datetime("2024-07-30T00:00Z"), pd.to_datetime("2024-07-30T00:01Z")], - ], - names=[ - "group", - "datetime", - ], - ) - expected = DataFrame({"numbers": [100, 200]}, index=index) - - tm.assert_frame_equal(result_2, expected) - - # reuse gb, unrelated to above - result_3 = gb[["numbers"]].transform("sum") - - tm.assert_frame_equal(result_1, result_3) - - -def test_groupby_resample_then_groupby_is_reused_when_index_is_set_from_column(): - # GH 59350 - df = DataFrame( - data={ - "datetime": [ - pd.to_datetime("2024-07-30T00:00Z"), - pd.to_datetime("2024-07-30T00:01Z"), - ], - "group": ["A", "A"], - "numbers": [100, 200], - }, - ).set_index("group") - - gb = df.groupby("group") - - # use gb - result_1 = gb[["numbers"]].transform("sum") - - index = Index(["A", "A"], name="group") - expected = DataFrame({"numbers": [300, 300]}, index=index) - - tm.assert_frame_equal(result_1, expected) - - # resample gb, unrelated to above - rs = gb.resample("1min", on="datetime") - result_2 = rs.aggregate({"numbers": "sum"}) - - index = pd.MultiIndex.from_arrays( - [ - ["A", "A"], - [pd.to_datetime("2024-07-30T00:00Z"), pd.to_datetime("2024-07-30T00:01Z")], - ], - names=[ - "group", - "datetime", - ], - ) - expected = DataFrame({"numbers": [100, 200]}, index=index) - - tm.assert_frame_equal(result_2, expected) - - # reuse gb, unrelated to above - result_3 = gb[["numbers"]].transform("sum") - - tm.assert_frame_equal(result_1, result_3) - - -def test_groupby_resample_then_groupby_is_reused_when_groupby_selection_is_not_none(): - # GH 59350 - df = DataFrame( - data={ - "datetime": [ - pd.to_datetime("2024-07-30T00:00Z"), - pd.to_datetime("2024-07-30T00:01Z"), - ], - "group": ["A", "A"], - "numbers": [100, 200], - }, - index=[1, 0], - ) - - gb = df.groupby("group") - gb = gb[["numbers", "datetime"]] # gb._selection is ["numbers", "datetime"] - - # use gb - result_1 = gb.transform("max") - - index = Index([1, 0]) - expected = DataFrame( - { - "numbers": [200, 200], - "datetime": [ - pd.to_datetime("2024-07-30T00:01Z"), - pd.to_datetime("2024-07-30T00:01Z"), - ], - }, - index=index, - ) - - tm.assert_frame_equal(result_1, expected) - - # resample gb, unrelated to above - rs = gb.resample("1min", on="datetime") - result_2 = rs.aggregate({"numbers": "sum"}) # Enter the `except IndexError:` block - - index = pd.MultiIndex.from_arrays( - [ - ["A", "A"], - [pd.to_datetime("2024-07-30T00:00Z"), pd.to_datetime("2024-07-30T00:01Z")], - ], - names=[ - "group", - "datetime", - ], - ) - columns = pd.MultiIndex.from_arrays([["numbers"], ["numbers"]]) - expected = DataFrame([[100], [200]], index=index, columns=columns) - - tm.assert_frame_equal(result_2, expected) - - # reuse gb, unrelated to above - result_3 = gb.transform("max") - - tm.assert_frame_equal(result_1, result_3) From 8a7f6626ce79af562e78500c6c687e283090158c Mon Sep 17 00:00:00 2001 From: aram-cinnamon Date: Tue, 26 Nov 2024 17:56:27 +0100 Subject: [PATCH 11/12] change test_groupby_resample_on_api_with_getitem --- pandas/tests/resample/test_resampler_grouper.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index db27b3af11d2e..17b3371c7eaa9 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -139,7 +139,8 @@ def test_getitem_multiple(): def test_groupby_resample_on_api_with_getitem(): # GH 17813 df = DataFrame( - {"id": list("aabbb"), "date": date_range("1-1-2016", periods=5), "data": 1} + {"id": list("aabbb"), "date": date_range("1-1-2016", periods=5), "data": 1}, + index=list("xyzab"), ) exp = df.set_index("date").groupby("id").resample("2D")["data"].sum() result = df.groupby("id").resample("2D", on="date")["data"].sum() From 7777766c442820de942adcdfbb3eec530d6d288a Mon Sep 17 00:00:00 2001 From: aram-cedarwood <@> Date: Wed, 9 Apr 2025 16:49:45 +0200 Subject: [PATCH 12/12] add comments --- pandas/core/groupby/grouper.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index e237bb6d64d6b..1d68f02275379 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -350,9 +350,13 @@ def _set_grouper( unsorted_ax = self._grouper.take(reverse_indexer) ax = unsorted_ax.take(obj.index) elif isinstance(obj.index, RangeIndex): + # Standard case for RangeIndex ax = self._grouper.take(obj.index) else: # GH 59350 + # If index is not RangeIndex and not sorted here, + # avoid re-taking based on potentially mis-ordered obj.index. + # self._grouper should already align with obj's values via key. ax = self._grouper else: if key not in obj._info_axis: