diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 29be9a7341f00..660177b9258ee 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -148,6 +148,7 @@ These improvements also fixed certain bugs in groupby: - :meth:`.DataFrameGroupBy.agg` would fail when there are multiple groupings, unobserved groups, and ``as_index=False`` (:issue:`36698`) - :meth:`.DataFrameGroupBy.groups` with ``sort=False`` would sort groups; they now occur in the order they are observed (:issue:`56966`) - :meth:`.DataFrameGroupBy.nunique` would fail when there are multiple groupings, unobserved groups, and ``as_index=False`` (:issue:`52848`) +- :meth:`.DataFrameGroupBy.resample` with an ``on`` value that is not ``None`` would have incorrect values when the index is out of order (:issue:`59350`) - :meth:`.DataFrameGroupBy.sum` would have incorrect values when there are multiple groupings, unobserved groups, and non-numeric data (:issue:`43891`) - :meth:`.DataFrameGroupBy.value_counts` would produce incorrect results when used with some categorical and some non-categorical groupings and ``observed=False`` (:issue:`56016`) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index c9d874fc08dbe..1d68f02275379 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -34,6 +34,7 @@ from pandas.core.indexes.api import ( Index, MultiIndex, + RangeIndex, default_index, ) from pandas.core.series import Series @@ -348,8 +349,15 @@ def _set_grouper( reverse_indexer = self._indexer.argsort() unsorted_ax = self._grouper.take(reverse_indexer) ax = unsorted_ax.take(obj.index) - else: + elif isinstance(obj.index, RangeIndex): + # Standard case for RangeIndex ax = self._grouper.take(obj.index) + else: + # GH 59350 + # If index is not RangeIndex and not sorted here, + # avoid re-taking based on potentially mis-ordered obj.index. + # self._grouper should already align with obj's values via key. + ax = self._grouper else: if key not in obj._info_axis: raise KeyError(f"The grouper name {key} is not found") diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 7870c5a9d3e17..be2f528d4bb8e 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -129,7 +129,8 @@ def test_getitem_multiple(): def test_groupby_resample_on_api_with_getitem(): # GH 17813 df = DataFrame( - {"id": list("aabbb"), "date": date_range("1-1-2016", periods=5), "data": 1} + {"id": list("aabbb"), "date": date_range("1-1-2016", periods=5), "data": 1}, + index=list("xyzab"), ) exp = df.set_index("date").groupby("id").resample("2D")["data"].sum() result = df.groupby("id").resample("2D", on="date")["data"].sum() @@ -672,3 +673,92 @@ def test_groupby_resample_on_index_with_list_of_keys_missing_column(): rs = gb.resample("2D") with pytest.raises(KeyError, match="Columns not found"): rs[["val_not_in_dataframe"]] + + +def test_groupby_resample_after_set_index_and_not_on_column(): + # GH 59350 + df = DataFrame( + data={ + "datetime": [ + pd.to_datetime("2024-07-30T00:00Z"), + pd.to_datetime("2024-07-30T00:01Z"), + ], + "group": ["A", "A"], + "numbers": [100, 200], + }, + index=[1, 0], + ).set_index("datetime") + gb = df.groupby("group") + rs = gb.resample("1min") + result = rs.aggregate({"numbers": "sum"}) + + index = pd.MultiIndex.from_arrays( + [ + ["A", "A"], + [pd.to_datetime("2024-07-30T00:00Z"), pd.to_datetime("2024-07-30T00:01Z")], + ], + names=[ + "group", + "datetime", + ], + ) + expected = DataFrame({"numbers": [100, 200]}, index=index) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "df", + [ + DataFrame( + data={ + "datetime": [ + pd.to_datetime("2024-07-30T00:00Z"), + pd.to_datetime("2024-07-30T00:01Z"), + ], + "group": ["A", "A"], + "numbers": [100, 200], + }, + index=[1, 0], + ), + DataFrame( + data={ + "datetime": [ + pd.to_datetime("2024-07-30T00:00Z"), + pd.to_datetime("2024-07-30T00:01Z"), + ], + "group": ["A", "A"], + "numbers": [100, 200], + }, + ).set_index("group"), + DataFrame( + data={ + "datetime": [ + pd.to_datetime("2024-07-30T00:00Z"), + pd.to_datetime("2024-07-30T00:01Z"), + ], + "group": ["A", "A"], + "numbers": [100, 200], + }, + ).set_index("datetime", drop=False), + ], +) +def test_groupby_resample_on_column_when_index_is_unusual(df): + # GH 59350 + gb = df.groupby("group") + rs = gb.resample("1min", on="datetime") + result = rs.aggregate({"numbers": "sum"}) + + index = pd.MultiIndex.from_arrays( + [ + ["A", "A"], + [pd.to_datetime("2024-07-30T00:00Z"), pd.to_datetime("2024-07-30T00:01Z")], + ], + names=[ + "group", + "datetime", + ], + ) + expected = DataFrame({"numbers": [100, 200]}, index=index) + + tm.assert_frame_equal(result, expected)