Skip to content

Commit 69beb8c

Browse files
mroeschkefeefladder
authored andcommitted
BUG: rolling.groupby with on and __getitem__ doesn't mutate underlying object (pandas-dev#43374)
1 parent e036cf9 commit 69beb8c

File tree

3 files changed

+42
-1
lines changed

3 files changed

+42
-1
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,7 @@ Groupby/resample/rolling
408408
- Bug in :meth:`pandas.DataFrame.rolling` operation along rows (``axis=1``) incorrectly omits columns containing ``float16`` and ``float32`` (:issue:`41779`)
409409
- Bug in :meth:`Resampler.aggregate` did not allow the use of Named Aggregation (:issue:`32803`)
410410
- Bug in :meth:`Series.rolling` when the :class:`Series` ``dtype`` was ``Int64`` (:issue:`43016`)
411+
- Bug in :meth:`DataFrame.groupby.rolling` when specifying ``on`` and calling ``__getitem__`` would subsequently return incorrect results (:issue:`43355`)
411412

412413
Reshaping
413414
^^^^^^^^^

pandas/core/window/rolling.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -766,7 +766,8 @@ def _gotitem(self, key, ndim, subset=None):
766766
# here so our index is carried through to the selected obj
767767
# when we do the splitting for the groupby
768768
if self.on is not None:
769-
self.obj = self.obj.set_index(self._on)
769+
# GH 43355
770+
subset = self.obj.set_index(self._on)
770771
return super()._gotitem(key, ndim, subset=subset)
771772

772773
def _validate_monotonic(self):

pandas/tests/window/test_groupby.py

+39
Original file line numberDiff line numberDiff line change
@@ -1019,3 +1019,42 @@ def test_times_array(self, times_frame):
10191019
result = gb.ewm(halflife=halflife, times="C").mean()
10201020
expected = gb.ewm(halflife=halflife, times=times_frame["C"].values).mean()
10211021
tm.assert_frame_equal(result, expected)
1022+
1023+
def test_dont_mutate_obj_after_slicing(self):
1024+
# GH 43355
1025+
df = DataFrame(
1026+
{
1027+
"id": ["a", "a", "b", "b", "b"],
1028+
"timestamp": date_range("2021-9-1", periods=5, freq="H"),
1029+
"y": range(5),
1030+
}
1031+
)
1032+
grp = df.groupby("id").rolling("1H", on="timestamp")
1033+
result = grp.count()
1034+
expected_df = DataFrame(
1035+
{
1036+
"timestamp": date_range("2021-9-1", periods=5, freq="H"),
1037+
"y": [1.0] * 5,
1038+
},
1039+
index=MultiIndex.from_arrays(
1040+
[["a", "a", "b", "b", "b"], list(range(5))], names=["id", None]
1041+
),
1042+
)
1043+
tm.assert_frame_equal(result, expected_df)
1044+
1045+
result = grp["y"].count()
1046+
expected_series = Series(
1047+
[1.0] * 5,
1048+
index=MultiIndex.from_arrays(
1049+
[
1050+
["a", "a", "b", "b", "b"],
1051+
date_range("2021-9-1", periods=5, freq="H"),
1052+
],
1053+
names=["id", "timestamp"],
1054+
),
1055+
name="y",
1056+
)
1057+
tm.assert_series_equal(result, expected_series)
1058+
# This is the key test
1059+
result = grp.count()
1060+
tm.assert_frame_equal(result, expected_df)

0 commit comments

Comments
 (0)