diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 3b9ddf8138689..dd3f0368f9227 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -408,6 +408,7 @@ Groupby/resample/rolling - Bug in :meth:`pandas.DataFrame.rolling` operation along rows (``axis=1``) incorrectly omits columns containing ``float16`` and ``float32`` (:issue:`41779`) - Bug in :meth:`Resampler.aggregate` did not allow the use of Named Aggregation (:issue:`32803`) - Bug in :meth:`Series.rolling` when the :class:`Series` ``dtype`` was ``Int64`` (:issue:`43016`) +- Bug in :meth:`DataFrame.groupby.rolling` when specifying ``on`` and calling ``__getitem__`` would subsequently return incorrect results (:issue:`43355`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 66ffc2600e88e..a8e2ecf3d7f54 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -766,7 +766,8 @@ def _gotitem(self, key, ndim, subset=None): # here so our index is carried through to the selected obj # when we do the splitting for the groupby if self.on is not None: - self.obj = self.obj.set_index(self._on) + # GH 43355 + subset = self.obj.set_index(self._on) return super()._gotitem(key, ndim, subset=subset) def _validate_monotonic(self): diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index 2523ec585a491..30f27db6dc2d2 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -1019,3 +1019,42 @@ def test_times_array(self, times_frame): result = gb.ewm(halflife=halflife, times="C").mean() expected = gb.ewm(halflife=halflife, times=times_frame["C"].values).mean() tm.assert_frame_equal(result, expected) + + def test_dont_mutate_obj_after_slicing(self): + # GH 43355 + df = DataFrame( + { + "id": ["a", "a", "b", "b", "b"], + "timestamp": date_range("2021-9-1", periods=5, freq="H"), + "y": range(5), + } + ) + grp = df.groupby("id").rolling("1H", on="timestamp") + result = grp.count() + expected_df = DataFrame( + { + "timestamp": date_range("2021-9-1", periods=5, freq="H"), + "y": [1.0] * 5, + }, + index=MultiIndex.from_arrays( + [["a", "a", "b", "b", "b"], list(range(5))], names=["id", None] + ), + ) + tm.assert_frame_equal(result, expected_df) + + result = grp["y"].count() + expected_series = Series( + [1.0] * 5, + index=MultiIndex.from_arrays( + [ + ["a", "a", "b", "b", "b"], + date_range("2021-9-1", periods=5, freq="H"), + ], + names=["id", "timestamp"], + ), + name="y", + ) + tm.assert_series_equal(result, expected_series) + # This is the key test + result = grp.count() + tm.assert_frame_equal(result, expected_df)