Skip to content

Revert "BUG: groupby().rolling(freq) with monotonic dates within groups (#46065)" #46078

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.4.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ Fixed regressions

Bug fixes
~~~~~~~~~
- Bug in :meth:`Groupby.rolling` with a frequency window would raise a ``ValueError`` even if the datetimes within each group were monotonic (:issue:`46061`)
-
-

.. ---------------------------------------------------------------------------
Expand Down
34 changes: 17 additions & 17 deletions pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -837,7 +837,7 @@ def _gotitem(self, key, ndim, subset=None):
subset = self.obj.set_index(self._on)
return super()._gotitem(key, ndim, subset=subset)

def _validate_datetimelike_monotonic(self):
def _validate_monotonic(self):
"""
Validate that "on" is monotonic; already validated at a higher level.
"""
Expand Down Expand Up @@ -1687,7 +1687,7 @@ def _validate(self):
or isinstance(self._on, (DatetimeIndex, TimedeltaIndex, PeriodIndex))
) and isinstance(self.window, (str, BaseOffset, timedelta)):

self._validate_datetimelike_monotonic()
self._validate_monotonic()

# this will raise ValueError on non-fixed freqs
try:
Expand All @@ -1712,13 +1712,18 @@ def _validate(self):
elif not is_integer(self.window) or self.window < 0:
raise ValueError("window must be an integer 0 or greater")

def _validate_datetimelike_monotonic(self):
def _validate_monotonic(self):
"""
Validate monotonic (increasing or decreasing).
"""
if not (self._on.is_monotonic_increasing or self._on.is_monotonic_decreasing):
on = "index" if self.on is None else self.on
raise ValueError(f"{on} must be monotonic.")
self._raise_monotonic_error()

def _raise_monotonic_error(self):
formatted = self.on
if self.on is None:
formatted = "index"
raise ValueError(f"{formatted} must be monotonic")

@doc(
_shared_docs["aggregate"],
Expand Down Expand Up @@ -2626,17 +2631,12 @@ def _get_window_indexer(self) -> GroupbyIndexer:
)
return window_indexer

def _validate_datetimelike_monotonic(self):
def _validate_monotonic(self):
"""
Validate that each group in self._on is monotonic
Validate that on is monotonic;
"""
# GH 46061
on = "index" if self.on is None else self.on
if self._on.hasnans:
raise ValueError(f"{on} must not have any NaT values.")
for group_indices in self._grouper.indices.values():
group_on = self._on.take(group_indices)
if not (
group_on.is_monotonic_increasing or group_on.is_monotonic_decreasing
):
raise ValueError(f"Each group within {on} must be monotonic.")
if (
not (self._on.is_monotonic_increasing or self._on.is_monotonic_decreasing)
or self._on.hasnans
):
self._raise_monotonic_error()
79 changes: 1 addition & 78 deletions pandas/tests/window/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -678,7 +678,7 @@ def test_groupby_rolling_nans_in_index(self, rollings, key):
)
if key == "index":
df = df.set_index("a")
with pytest.raises(ValueError, match=f"{key} must not have any NaT values"):
with pytest.raises(ValueError, match=f"{key} must be monotonic"):
df.groupby("c").rolling("60min", **rollings)

@pytest.mark.parametrize("group_keys", [True, False])
Expand Down Expand Up @@ -922,83 +922,6 @@ def test_nan_and_zero_endpoints(self):
)
tm.assert_series_equal(result, expected)

def test_groupby_rolling_non_monotonic(self):
# GH 43909

shuffled = [3, 0, 1, 2]
sec = 1_000
df = DataFrame(
[{"t": Timestamp(2 * x * sec), "x": x + 1, "c": 42} for x in shuffled]
)
with pytest.raises(ValueError, match=r".* must be monotonic"):
df.groupby("c").rolling(on="t", window="3s")

def test_groupby_monotonic(self):

# GH 15130
# we don't need to validate monotonicity when grouping

# GH 43909 we should raise an error here to match
# behaviour of non-groupby rolling.

data = [
["David", "1/1/2015", 100],
["David", "1/5/2015", 500],
["David", "5/30/2015", 50],
["David", "7/25/2015", 50],
["Ryan", "1/4/2014", 100],
["Ryan", "1/19/2015", 500],
["Ryan", "3/31/2016", 50],
["Joe", "7/1/2015", 100],
["Joe", "9/9/2015", 500],
["Joe", "10/15/2015", 50],
]

df = DataFrame(data=data, columns=["name", "date", "amount"])
df["date"] = to_datetime(df["date"])
df = df.sort_values("date")

expected = (
df.set_index("date")
.groupby("name")
.apply(lambda x: x.rolling("180D")["amount"].sum())
)
result = df.groupby("name").rolling("180D", on="date")["amount"].sum()
tm.assert_series_equal(result, expected)

def test_datelike_on_monotonic_within_each_group(self):
# GH 13966 (similar to #15130, closed by #15175)

# superseded by 43909
# GH 46061: OK if the on is monotonic relative to each each group

dates = date_range(start="2016-01-01 09:30:00", periods=20, freq="s")
df = DataFrame(
{
"A": [1] * 20 + [2] * 12 + [3] * 8,
"B": np.concatenate((dates, dates)),
"C": np.arange(40),
}
)

expected = (
df.set_index("B").groupby("A").apply(lambda x: x.rolling("4s")["C"].mean())
)
result = df.groupby("A").rolling("4s", on="B").C.mean()
tm.assert_series_equal(result, expected)

def test_datelike_on_not_monotonic_within_each_group(self):
# GH 46061
df = DataFrame(
{
"A": [1] * 3 + [2] * 3,
"B": [Timestamp(year, 1, 1) for year in [2020, 2021, 2019]] * 2,
"C": range(6),
}
)
with pytest.raises(ValueError, match="Each group within B must be monotonic."):
df.groupby("A").rolling("365D", on="B")


class TestExpanding:
def setup_method(self):
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/window/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -1420,6 +1420,18 @@ def test_groupby_rolling_nan_included():
tm.assert_frame_equal(result, expected)


def test_groupby_rolling_non_monotonic():
# GH 43909

shuffled = [3, 0, 1, 2]
sec = 1_000
df = DataFrame(
[{"t": Timestamp(2 * x * sec), "x": x + 1, "c": 42} for x in shuffled]
)
with pytest.raises(ValueError, match=r".* must be monotonic"):
df.groupby("c").rolling(on="t", window="3s")


@pytest.mark.parametrize("method", ["skew", "kurt"])
def test_rolling_skew_kurt_numerical_stability(method):
# GH#6929
Expand Down
60 changes: 60 additions & 0 deletions pandas/tests/window/test_timeseries_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
Series,
Timestamp,
date_range,
to_datetime,
)
import pandas._testing as tm

Expand Down Expand Up @@ -647,6 +648,65 @@ def agg_by_day(x):

tm.assert_frame_equal(result, expected)

def test_groupby_monotonic(self):

# GH 15130
# we don't need to validate monotonicity when grouping

# GH 43909 we should raise an error here to match
# behaviour of non-groupby rolling.

data = [
["David", "1/1/2015", 100],
["David", "1/5/2015", 500],
["David", "5/30/2015", 50],
["David", "7/25/2015", 50],
["Ryan", "1/4/2014", 100],
["Ryan", "1/19/2015", 500],
["Ryan", "3/31/2016", 50],
["Joe", "7/1/2015", 100],
["Joe", "9/9/2015", 500],
["Joe", "10/15/2015", 50],
]

df = DataFrame(data=data, columns=["name", "date", "amount"])
df["date"] = to_datetime(df["date"])
df = df.sort_values("date")

expected = (
df.set_index("date")
.groupby("name")
.apply(lambda x: x.rolling("180D")["amount"].sum())
)
result = df.groupby("name").rolling("180D", on="date")["amount"].sum()
tm.assert_series_equal(result, expected)

def test_non_monotonic_raises(self):
# GH 13966 (similar to #15130, closed by #15175)

# superseded by 43909

dates = date_range(start="2016-01-01 09:30:00", periods=20, freq="s")
df = DataFrame(
{
"A": [1] * 20 + [2] * 12 + [3] * 8,
"B": np.concatenate((dates, dates)),
"C": np.arange(40),
}
)

expected = (
df.set_index("B").groupby("A").apply(lambda x: x.rolling("4s")["C"].mean())
)
with pytest.raises(ValueError, match=r".* must be monotonic"):
df.groupby("A").rolling(
"4s", on="B"
).C.mean() # should raise for non-monotonic t series

df2 = df.sort_values("B")
result = df2.groupby("A").rolling("4s", on="B").C.mean()
tm.assert_series_equal(result, expected)

def test_rolling_cov_offset(self):
# GH16058

Expand Down