Skip to content

BUG: RollingGroupby no longer keeps the groupby column in the result #40341

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Mar 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,38 @@ cast to ``dtype=object`` (:issue:`38709`)
ser2


.. _whatsnew_130.notable_bug_fixes.rolling_groupby_column:

GroupBy.rolling no longer returns grouped-by column in values
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

The group-by column will now be dropped from the result of a
``groupby.rolling`` operation (:issue:`32262`)

.. ipython:: python

df = pd.DataFrame({"A": [1, 1, 2, 3], "B": [0, 1, 2, 3]})
df

*Previous behavior*:

.. code-block:: ipython

In [1]: df.groupby("A").rolling(2).sum()
Out[1]:
A B
A
1 0 NaN NaN
1 2.0 1.0
2 2 NaN NaN
3 3 NaN NaN

*New behavior*:

.. ipython:: python

df.groupby("A").rolling(2).sum()

.. _whatsnew_130.notable_bug_fixes.rolling_var_precision:

Removed artificial truncation in rolling variance and standard deviation
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,10 @@ def __init__(
if _grouper is None:
raise ValueError("Must pass a Grouper object.")
self._grouper = _grouper
# GH 32262: It's convention to keep the grouping column in
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is the difference in conventions just a historical thing? might we want to make these match eventually?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems to be historical #32332 (review)

Would be great to have groupby apply match but might take come convincing

# groupby.<agg_func>, but unexpected to users in
# groupby.rolling.<agg_func>
obj = obj.drop(columns=self._grouper.names, errors="ignore")
super().__init__(obj, *args, **kwargs)

def _apply(
Expand Down
37 changes: 37 additions & 0 deletions pandas/tests/window/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ def test_rolling(self, f):

result = getattr(r, f)()
expected = g.apply(lambda x: getattr(x.rolling(4), f)())
# groupby.apply doesn't drop the grouped-by column
expected = expected.drop("A", axis=1)
# GH 39732
expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)])
expected.index = expected_index
Expand All @@ -95,6 +97,8 @@ def test_rolling_ddof(self, f):

result = getattr(r, f)(ddof=1)
expected = g.apply(lambda x: getattr(x.rolling(4), f)(ddof=1))
# groupby.apply doesn't drop the grouped-by column
expected = expected.drop("A", axis=1)
# GH 39732
expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)])
expected.index = expected_index
Expand All @@ -111,6 +115,8 @@ def test_rolling_quantile(self, interpolation):
expected = g.apply(
lambda x: x.rolling(4).quantile(0.4, interpolation=interpolation)
)
# groupby.apply doesn't drop the grouped-by column
expected = expected.drop("A", axis=1)
# GH 39732
expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)])
expected.index = expected_index
Expand Down Expand Up @@ -147,6 +153,8 @@ def test_rolling_apply(self, raw):
# reduction
result = r.apply(lambda x: x.sum(), raw=raw)
expected = g.apply(lambda x: x.rolling(4).apply(lambda y: y.sum(), raw=raw))
# groupby.apply doesn't drop the grouped-by column
expected = expected.drop("A", axis=1)
# GH 39732
expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)])
expected.index = expected_index
Expand Down Expand Up @@ -442,6 +450,8 @@ def test_groupby_rolling_empty_frame(self):
# GH 36197
expected = DataFrame({"s1": []})
result = expected.groupby("s1").rolling(window=1).sum()
# GH 32262
expected = expected.drop(columns="s1")
# GH-38057 from_tuples gives empty object dtype, we now get float/int levels
# expected.index = MultiIndex.from_tuples([], names=["s1", None])
expected.index = MultiIndex.from_product(
Expand All @@ -451,6 +461,8 @@ def test_groupby_rolling_empty_frame(self):

expected = DataFrame({"s1": [], "s2": []})
result = expected.groupby(["s1", "s2"]).rolling(window=1).sum()
# GH 32262
expected = expected.drop(columns=["s1", "s2"])
expected.index = MultiIndex.from_product(
[
Index([], dtype="float64"),
Expand Down Expand Up @@ -503,6 +515,8 @@ def test_groupby_rolling_no_sort(self):
columns=["foo", "bar"],
index=MultiIndex.from_tuples([(2, 0), (1, 1)], names=["foo", None]),
)
# GH 32262
expected = expected.drop(columns="foo")
tm.assert_frame_equal(result, expected)

def test_groupby_rolling_count_closed_on(self):
Expand Down Expand Up @@ -553,6 +567,8 @@ def test_groupby_rolling_sem(self, func, kwargs):
[("a", 0), ("a", 1), ("b", 2), ("b", 3), ("b", 4)], names=["a", None]
),
)
# GH 32262
expected = expected.drop(columns="a")
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize(
Expand Down Expand Up @@ -666,6 +682,19 @@ def test_groupby_rolling_object_doesnt_affect_groupby_apply(self):
assert not g.mutated
assert not g.grouper.mutated

@pytest.mark.parametrize(
"columns", [MultiIndex.from_tuples([("A", ""), ("B", "C")]), ["A", "B"]]
)
def test_by_column_not_in_values(self, columns):
# GH 32262
df = DataFrame([[1, 0]] * 20 + [[2, 0]] * 12 + [[3, 0]] * 8, columns=columns)
g = df.groupby("A")
original_obj = g.obj.copy(deep=True)
r = g.rolling(4)
result = r.sum()
assert "A" not in result.columns
tm.assert_frame_equal(g.obj, original_obj)


class TestExpanding:
def setup_method(self):
Expand All @@ -680,6 +709,8 @@ def test_expanding(self, f):

result = getattr(r, f)()
expected = g.apply(lambda x: getattr(x.expanding(), f)())
# groupby.apply doesn't drop the grouped-by column
expected = expected.drop("A", axis=1)
# GH 39732
expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)])
expected.index = expected_index
Expand All @@ -692,6 +723,8 @@ def test_expanding_ddof(self, f):

result = getattr(r, f)(ddof=0)
expected = g.apply(lambda x: getattr(x.expanding(), f)(ddof=0))
# groupby.apply doesn't drop the grouped-by column
expected = expected.drop("A", axis=1)
# GH 39732
expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)])
expected.index = expected_index
Expand All @@ -708,6 +741,8 @@ def test_expanding_quantile(self, interpolation):
expected = g.apply(
lambda x: x.expanding().quantile(0.4, interpolation=interpolation)
)
# groupby.apply doesn't drop the grouped-by column
expected = expected.drop("A", axis=1)
# GH 39732
expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)])
expected.index = expected_index
Expand Down Expand Up @@ -748,6 +783,8 @@ def test_expanding_apply(self, raw):
# reduction
result = r.apply(lambda x: x.sum(), raw=raw)
expected = g.apply(lambda x: x.expanding().apply(lambda y: y.sum(), raw=raw))
# groupby.apply doesn't drop the grouped-by column
expected = expected.drop("A", axis=1)
# GH 39732
expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)])
expected.index = expected_index
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/window/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,9 @@ def scaled_sum(*args):
df = DataFrame(data={"X": range(5)}, index=[0, 0, 1, 1, 1])

expected = DataFrame(data={"X": [0.0, 0.5, 1.0, 1.5, 2.0]}, index=_index)
# GH 40341
if "by" in grouping:
expected = expected.drop(columns="X", errors="ignore")
result = df.groupby(**grouping).rolling(1).apply(scaled_sum, raw=raw, args=(2,))
tm.assert_frame_equal(result, expected)

Expand Down