From 0b220363b9e896e88e04906baf9d510b58d66c5f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 2 Sep 2021 17:50:10 -0700 Subject: [PATCH 1/2] BUG: rolling.groupby with on and __getitem__ doesn't mutate underlying object --- doc/source/whatsnew/v1.4.0.rst | 1 + pandas/core/window/rolling.py | 3 ++- pandas/tests/window/test_groupby.py | 39 +++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 2f8cb346935a9..febb1210c39ad 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -398,6 +398,7 @@ Groupby/resample/rolling - Bug in :meth:`pandas.DataFrame.rolling` operation along rows (``axis=1``) incorrectly omits columns containing ``float16`` and ``float32`` (:issue:`41779`) - Bug in :meth:`Resampler.aggregate` did not allow the use of Named Aggregation (:issue:`32803`) - Bug in :meth:`Series.rolling` when the :class:`Series` ``dtype`` was ``Int64`` (:issue:`43016`) +- Bug in :meth:`DataFrame.groupby.rolling` when specifying ``on`` and calling ``__getitem__`` would return subsequent incorrect results (:issue:`43355`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 66ffc2600e88e..a8e2ecf3d7f54 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -766,7 +766,8 @@ def _gotitem(self, key, ndim, subset=None): # here so our index is carried through to the selected obj # when we do the splitting for the groupby if self.on is not None: - self.obj = self.obj.set_index(self._on) + # GH 43355 + subset = self.obj.set_index(self._on) return super()._gotitem(key, ndim, subset=subset) def _validate_monotonic(self): diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index 2523ec585a491..30f27db6dc2d2 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -1019,3 +1019,42 @@ def test_times_array(self, times_frame): result = gb.ewm(halflife=halflife, times="C").mean() expected = gb.ewm(halflife=halflife, times=times_frame["C"].values).mean() tm.assert_frame_equal(result, expected) + + def test_dont_mutate_obj_after_slicing(self): + # GH 43355 + df = DataFrame( + { + "id": ["a", "a", "b", "b", "b"], + "timestamp": date_range("2021-9-1", periods=5, freq="H"), + "y": range(5), + } + ) + grp = df.groupby("id").rolling("1H", on="timestamp") + result = grp.count() + expected_df = DataFrame( + { + "timestamp": date_range("2021-9-1", periods=5, freq="H"), + "y": [1.0] * 5, + }, + index=MultiIndex.from_arrays( + [["a", "a", "b", "b", "b"], list(range(5))], names=["id", None] + ), + ) + tm.assert_frame_equal(result, expected_df) + + result = grp["y"].count() + expected_series = Series( + [1.0] * 5, + index=MultiIndex.from_arrays( + [ + ["a", "a", "b", "b", "b"], + date_range("2021-9-1", periods=5, freq="H"), + ], + names=["id", "timestamp"], + ), + name="y", + ) + tm.assert_series_equal(result, expected_series) + # This is the key test + result = grp.count() + tm.assert_frame_equal(result, expected_df) From b47aa3d1e7f1d12082caa2ec6f0ed8450a07a6db Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 2 Sep 2021 19:54:42 -0700 Subject: [PATCH 2/2] Grammar --- doc/source/whatsnew/v1.4.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index febb1210c39ad..2a480f8b80b1e 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -398,7 +398,7 @@ Groupby/resample/rolling - Bug in :meth:`pandas.DataFrame.rolling` operation along rows (``axis=1``) incorrectly omits columns containing ``float16`` and ``float32`` (:issue:`41779`) - Bug in :meth:`Resampler.aggregate` did not allow the use of Named Aggregation (:issue:`32803`) - Bug in :meth:`Series.rolling` when the :class:`Series` ``dtype`` was ``Int64`` (:issue:`43016`) -- Bug in :meth:`DataFrame.groupby.rolling` when specifying ``on`` and calling ``__getitem__`` would return subsequent incorrect results (:issue:`43355`) +- Bug in :meth:`DataFrame.groupby.rolling` when specifying ``on`` and calling ``__getitem__`` would subsequently return incorrect results (:issue:`43355`) Reshaping ^^^^^^^^^