Skip to content

Commit ab599f3

Browse files
authored
BUG: RollingGroupby no longer keeps the groupby column in the result (#40341)
1 parent 55bfcec commit ab599f3

File tree

4 files changed

+76
-0
lines changed

4 files changed

+76
-0
lines changed

doc/source/whatsnew/v1.3.0.rst

+32
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,38 @@ cast to ``dtype=object`` (:issue:`38709`)
302302
ser2
303303
304304
305+
.. _whatsnew_130.notable_bug_fixes.rolling_groupby_column:
306+
307+
GroupBy.rolling no longer returns grouped-by column in values
308+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
309+
310+
The group-by column will now be dropped from the result of a
311+
``groupby.rolling`` operation (:issue:`32262`)
312+
313+
.. ipython:: python
314+
315+
df = pd.DataFrame({"A": [1, 1, 2, 3], "B": [0, 1, 2, 3]})
316+
df
317+
318+
*Previous behavior*:
319+
320+
.. code-block:: ipython
321+
322+
In [1]: df.groupby("A").rolling(2).sum()
323+
Out[1]:
324+
A B
325+
A
326+
1 0 NaN NaN
327+
1 2.0 1.0
328+
2 2 NaN NaN
329+
3 3 NaN NaN
330+
331+
*New behavior*:
332+
333+
.. ipython:: python
334+
335+
df.groupby("A").rolling(2).sum()
336+
305337
.. _whatsnew_130.notable_bug_fixes.rolling_var_precision:
306338

307339
Removed artificial truncation in rolling variance and standard deviation

pandas/core/window/rolling.py

+4
Original file line numberDiff line numberDiff line change
@@ -558,6 +558,10 @@ def __init__(
558558
if _grouper is None:
559559
raise ValueError("Must pass a Grouper object.")
560560
self._grouper = _grouper
561+
# GH 32262: It's convention to keep the grouping column in
562+
# groupby.<agg_func>, but unexpected to users in
563+
# groupby.rolling.<agg_func>
564+
obj = obj.drop(columns=self._grouper.names, errors="ignore")
561565
super().__init__(obj, *args, **kwargs)
562566

563567
def _apply(

pandas/tests/window/test_groupby.py

+37
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ def test_rolling(self, f):
8383

8484
result = getattr(r, f)()
8585
expected = g.apply(lambda x: getattr(x.rolling(4), f)())
86+
# groupby.apply doesn't drop the grouped-by column
87+
expected = expected.drop("A", axis=1)
8688
# GH 39732
8789
expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)])
8890
expected.index = expected_index
@@ -95,6 +97,8 @@ def test_rolling_ddof(self, f):
9597

9698
result = getattr(r, f)(ddof=1)
9799
expected = g.apply(lambda x: getattr(x.rolling(4), f)(ddof=1))
100+
# groupby.apply doesn't drop the grouped-by column
101+
expected = expected.drop("A", axis=1)
98102
# GH 39732
99103
expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)])
100104
expected.index = expected_index
@@ -111,6 +115,8 @@ def test_rolling_quantile(self, interpolation):
111115
expected = g.apply(
112116
lambda x: x.rolling(4).quantile(0.4, interpolation=interpolation)
113117
)
118+
# groupby.apply doesn't drop the grouped-by column
119+
expected = expected.drop("A", axis=1)
114120
# GH 39732
115121
expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)])
116122
expected.index = expected_index
@@ -147,6 +153,8 @@ def test_rolling_apply(self, raw):
147153
# reduction
148154
result = r.apply(lambda x: x.sum(), raw=raw)
149155
expected = g.apply(lambda x: x.rolling(4).apply(lambda y: y.sum(), raw=raw))
156+
# groupby.apply doesn't drop the grouped-by column
157+
expected = expected.drop("A", axis=1)
150158
# GH 39732
151159
expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)])
152160
expected.index = expected_index
@@ -442,6 +450,8 @@ def test_groupby_rolling_empty_frame(self):
442450
# GH 36197
443451
expected = DataFrame({"s1": []})
444452
result = expected.groupby("s1").rolling(window=1).sum()
453+
# GH 32262
454+
expected = expected.drop(columns="s1")
445455
# GH-38057 from_tuples gives empty object dtype, we now get float/int levels
446456
# expected.index = MultiIndex.from_tuples([], names=["s1", None])
447457
expected.index = MultiIndex.from_product(
@@ -451,6 +461,8 @@ def test_groupby_rolling_empty_frame(self):
451461

452462
expected = DataFrame({"s1": [], "s2": []})
453463
result = expected.groupby(["s1", "s2"]).rolling(window=1).sum()
464+
# GH 32262
465+
expected = expected.drop(columns=["s1", "s2"])
454466
expected.index = MultiIndex.from_product(
455467
[
456468
Index([], dtype="float64"),
@@ -503,6 +515,8 @@ def test_groupby_rolling_no_sort(self):
503515
columns=["foo", "bar"],
504516
index=MultiIndex.from_tuples([(2, 0), (1, 1)], names=["foo", None]),
505517
)
518+
# GH 32262
519+
expected = expected.drop(columns="foo")
506520
tm.assert_frame_equal(result, expected)
507521

508522
def test_groupby_rolling_count_closed_on(self):
@@ -553,6 +567,8 @@ def test_groupby_rolling_sem(self, func, kwargs):
553567
[("a", 0), ("a", 1), ("b", 2), ("b", 3), ("b", 4)], names=["a", None]
554568
),
555569
)
570+
# GH 32262
571+
expected = expected.drop(columns="a")
556572
tm.assert_frame_equal(result, expected)
557573

558574
@pytest.mark.parametrize(
@@ -666,6 +682,19 @@ def test_groupby_rolling_object_doesnt_affect_groupby_apply(self):
666682
assert not g.mutated
667683
assert not g.grouper.mutated
668684

685+
@pytest.mark.parametrize(
686+
"columns", [MultiIndex.from_tuples([("A", ""), ("B", "C")]), ["A", "B"]]
687+
)
688+
def test_by_column_not_in_values(self, columns):
689+
# GH 32262
690+
df = DataFrame([[1, 0]] * 20 + [[2, 0]] * 12 + [[3, 0]] * 8, columns=columns)
691+
g = df.groupby("A")
692+
original_obj = g.obj.copy(deep=True)
693+
r = g.rolling(4)
694+
result = r.sum()
695+
assert "A" not in result.columns
696+
tm.assert_frame_equal(g.obj, original_obj)
697+
669698

670699
class TestExpanding:
671700
def setup_method(self):
@@ -680,6 +709,8 @@ def test_expanding(self, f):
680709

681710
result = getattr(r, f)()
682711
expected = g.apply(lambda x: getattr(x.expanding(), f)())
712+
# groupby.apply doesn't drop the grouped-by column
713+
expected = expected.drop("A", axis=1)
683714
# GH 39732
684715
expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)])
685716
expected.index = expected_index
@@ -692,6 +723,8 @@ def test_expanding_ddof(self, f):
692723

693724
result = getattr(r, f)(ddof=0)
694725
expected = g.apply(lambda x: getattr(x.expanding(), f)(ddof=0))
726+
# groupby.apply doesn't drop the grouped-by column
727+
expected = expected.drop("A", axis=1)
695728
# GH 39732
696729
expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)])
697730
expected.index = expected_index
@@ -708,6 +741,8 @@ def test_expanding_quantile(self, interpolation):
708741
expected = g.apply(
709742
lambda x: x.expanding().quantile(0.4, interpolation=interpolation)
710743
)
744+
# groupby.apply doesn't drop the grouped-by column
745+
expected = expected.drop("A", axis=1)
711746
# GH 39732
712747
expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)])
713748
expected.index = expected_index
@@ -748,6 +783,8 @@ def test_expanding_apply(self, raw):
748783
# reduction
749784
result = r.apply(lambda x: x.sum(), raw=raw)
750785
expected = g.apply(lambda x: x.expanding().apply(lambda y: y.sum(), raw=raw))
786+
# groupby.apply doesn't drop the grouped-by column
787+
expected = expected.drop("A", axis=1)
751788
# GH 39732
752789
expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)])
753790
expected.index = expected_index

pandas/tests/window/test_rolling.py

+3
Original file line numberDiff line numberDiff line change
@@ -719,6 +719,9 @@ def scaled_sum(*args):
719719
df = DataFrame(data={"X": range(5)}, index=[0, 0, 1, 1, 1])
720720

721721
expected = DataFrame(data={"X": [0.0, 0.5, 1.0, 1.5, 2.0]}, index=_index)
722+
# GH 40341
723+
if "by" in grouping:
724+
expected = expected.drop(columns="X", errors="ignore")
722725
result = df.groupby(**grouping).rolling(1).apply(scaled_sum, raw=raw, args=(2,))
723726
tm.assert_frame_equal(result, expected)
724727

0 commit comments

Comments
 (0)