From d62a5e57792e08a8ff92a759bd87d4d207023a83 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 9 Mar 2021 19:53:43 -0800 Subject: [PATCH 1/2] BUG: RollingGroupby no longer keeps the groupby column in the result --- doc/source/whatsnew/v1.3.0.rst | 32 +++++++++++++++++++++++++ pandas/core/window/rolling.py | 4 ++++ pandas/tests/window/test_groupby.py | 37 +++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 110264ad9f058..915a294b8e93b 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -264,6 +264,38 @@ cast to ``dtype=object`` (:issue:`38709`) ser ser2 + +GroupBy.rolling no longer returns grouped-by column in values +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The group-by column will now be dropped from the result of a +``groupby.rolling`` operation (:issue:`32262`) + +.. ipython:: python + + df = pd.DataFrame({"A": [1, 1, 2, 3], "B": [0, 1, 2, 3]}) + df + +*Previous behavior*: + +.. code-block:: ipython + + In [1]: df.groupby("A").rolling(2).sum() + Out[1]: + A B + A + 1 0 NaN NaN + 1 2.0 1.0 + 2 2 NaN NaN + 3 3 NaN NaN + +*New behavior*: + +.. ipython:: python + + df.groupby("A").rolling(2).sum() + + .. _whatsnew_130.api_breaking.deps: Increased minimum versions for dependencies diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 503849bf673d5..a956d093acbd8 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -552,6 +552,10 @@ def __init__( if _grouper is None: raise ValueError("Must pass a Grouper object.") self._grouper = _grouper + # GH 32262: It's convention to keep the grouping column in + # groupby., but unexpected to users in + # groupby.rolling. + obj = obj.drop(columns=self._grouper.names, errors="ignore") super().__init__(obj, *args, **kwargs) def _apply( diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index c3c5bbe460134..5c2f69a9247e9 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -83,6 +83,8 @@ def test_rolling(self, f): result = getattr(r, f)() expected = g.apply(lambda x: getattr(x.rolling(4), f)()) + # groupby.apply doesn't drop the grouped-by column + expected = expected.drop("A", axis=1) # GH 39732 expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) expected.index = expected_index @@ -95,6 +97,8 @@ def test_rolling_ddof(self, f): result = getattr(r, f)(ddof=1) expected = g.apply(lambda x: getattr(x.rolling(4), f)(ddof=1)) + # groupby.apply doesn't drop the grouped-by column + expected = expected.drop("A", axis=1) # GH 39732 expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) expected.index = expected_index @@ -111,6 +115,8 @@ def test_rolling_quantile(self, interpolation): expected = g.apply( lambda x: x.rolling(4).quantile(0.4, interpolation=interpolation) ) + # groupby.apply doesn't drop the grouped-by column + expected = expected.drop("A", axis=1) # GH 39732 expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) expected.index = expected_index @@ -147,6 +153,8 @@ def test_rolling_apply(self, raw): # reduction result = r.apply(lambda x: x.sum(), raw=raw) expected = g.apply(lambda x: x.rolling(4).apply(lambda y: y.sum(), raw=raw)) + # groupby.apply doesn't drop the grouped-by column + expected = expected.drop("A", axis=1) # GH 39732 expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) expected.index = expected_index @@ -442,6 +450,8 @@ def test_groupby_rolling_empty_frame(self): # GH 36197 expected = DataFrame({"s1": []}) result = expected.groupby("s1").rolling(window=1).sum() + # GH 32262 + expected = expected.drop(columns="s1") # GH-38057 from_tuples gives empty object dtype, we now get float/int levels # expected.index = MultiIndex.from_tuples([], names=["s1", None]) expected.index = MultiIndex.from_product( @@ -451,6 +461,8 @@ def test_groupby_rolling_empty_frame(self): expected = DataFrame({"s1": [], "s2": []}) result = expected.groupby(["s1", "s2"]).rolling(window=1).sum() + # GH 32262 + expected = expected.drop(columns=["s1", "s2"]) expected.index = MultiIndex.from_product( [ Index([], dtype="float64"), @@ -503,6 +515,8 @@ def test_groupby_rolling_no_sort(self): columns=["foo", "bar"], index=MultiIndex.from_tuples([(2, 0), (1, 1)], names=["foo", None]), ) + # GH 32262 + expected = expected.drop(columns="foo") tm.assert_frame_equal(result, expected) def test_groupby_rolling_count_closed_on(self): @@ -553,6 +567,8 @@ def test_groupby_rolling_sem(self, func, kwargs): [("a", 0), ("a", 1), ("b", 2), ("b", 3), ("b", 4)], names=["a", None] ), ) + # GH 32262 + expected = expected.drop(columns="a") tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -666,6 +682,19 @@ def test_groupby_rolling_object_doesnt_affect_groupby_apply(self): assert not g.mutated assert not g.grouper.mutated + @pytest.mark.parametrize( + "columns", [MultiIndex.from_tuples([("A", ""), ("B", "C")]), ["A", "B"]] + ) + def test_by_column_not_in_values(self, columns): + # GH 32262 + df = DataFrame([[1, 0]] * 20 + [[2, 0]] * 12 + [[3, 0]] * 8, columns=columns) + g = df.groupby("A") + original_obj = g.obj.copy(deep=True) + r = g.rolling(4) + result = r.sum() + assert "A" not in result.columns + tm.assert_frame_equal(g.obj, original_obj) + class TestExpanding: def setup_method(self): @@ -680,6 +709,8 @@ def test_expanding(self, f): result = getattr(r, f)() expected = g.apply(lambda x: getattr(x.expanding(), f)()) + # groupby.apply doesn't drop the grouped-by column + expected = expected.drop("A", axis=1) # GH 39732 expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) expected.index = expected_index @@ -692,6 +723,8 @@ def test_expanding_ddof(self, f): result = getattr(r, f)(ddof=0) expected = g.apply(lambda x: getattr(x.expanding(), f)(ddof=0)) + # groupby.apply doesn't drop the grouped-by column + expected = expected.drop("A", axis=1) # GH 39732 expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) expected.index = expected_index @@ -708,6 +741,8 @@ def test_expanding_quantile(self, interpolation): expected = g.apply( lambda x: x.expanding().quantile(0.4, interpolation=interpolation) ) + # groupby.apply doesn't drop the grouped-by column + expected = expected.drop("A", axis=1) # GH 39732 expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) expected.index = expected_index @@ -748,6 +783,8 @@ def test_expanding_apply(self, raw): # reduction result = r.apply(lambda x: x.sum(), raw=raw) expected = g.apply(lambda x: x.expanding().apply(lambda y: y.sum(), raw=raw)) + # groupby.apply doesn't drop the grouped-by column + expected = expected.drop("A", axis=1) # GH 39732 expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) expected.index = expected_index From d040abfcfa633ee45da5250607ace879ddd943b4 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 9 Mar 2021 20:43:37 -0800 Subject: [PATCH 2/2] Fix test for when by is provided and not level --- pandas/tests/window/test_rolling.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 70c076e086fb7..c32e894f1e704 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -719,6 +719,9 @@ def scaled_sum(*args): df = DataFrame(data={"X": range(5)}, index=[0, 0, 1, 1, 1]) expected = DataFrame(data={"X": [0.0, 0.5, 1.0, 1.5, 2.0]}, index=_index) + # GH 40341 + if "by" in grouping: + expected = expected.drop(columns="X", errors="ignore") result = df.groupby(**grouping).rolling(1).apply(scaled_sum, raw=raw, args=(2,)) tm.assert_frame_equal(result, expected)