Skip to content

Commit b1827ea

Browse files
BUG: 43909 - check monoticity of rolling groupby (#44068)
1 parent 7658a6a commit b1827ea

File tree

4 files changed

+28
-6
lines changed

4 files changed

+28
-6
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -810,6 +810,7 @@ Groupby/resample/rolling
810810
- Bug in :meth:`GroupBy.nth` failing on ``axis=1`` (:issue:`43926`)
811811
- Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not respecting right bound on centered datetime-like windows, if the index contain duplicates (:issue:`3944`)
812812
- Bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` when using a :class:`pandas.api.indexers.BaseIndexer` subclass that returned unequal start and end arrays would segfault instead of raising a ``ValueError`` (:issue:`44470`)
813+
- Bug in :meth:`Groupby.rolling` when non-monotonic data passed, fails to correctly raise ``ValueError`` (:issue:`43909`)
813814
- Fixed bug where grouping by a :class:`Series` that has a categorical data type and length unequal to the axis of grouping raised ``ValueError`` (:issue:`44179`)
814815

815816
Reshaping

pandas/core/window/rolling.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -2629,8 +2629,9 @@ def _get_window_indexer(self) -> GroupbyIndexer:
26292629
def _validate_monotonic(self):
26302630
"""
26312631
Validate that on is monotonic;
2632-
in this case we have to check only for nans, because
2633-
monotonicity was already validated at a higher level.
26342632
"""
2635-
if self._on.hasnans:
2633+
if (
2634+
not (self._on.is_monotonic_increasing or self._on.is_monotonic_decreasing)
2635+
or self._on.hasnans
2636+
):
26362637
self._raise_monotonic_error()

pandas/tests/window/test_rolling.py

+12
Original file line numberDiff line numberDiff line change
@@ -1419,6 +1419,18 @@ def test_groupby_rolling_nan_included():
14191419
tm.assert_frame_equal(result, expected)
14201420

14211421

1422+
def test_groupby_rolling_non_monotonic():
1423+
# GH 43909
1424+
1425+
shuffled = [3, 0, 1, 2]
1426+
sec = 1_000
1427+
df = DataFrame(
1428+
[{"t": Timestamp(2 * x * sec), "x": x + 1, "c": 42} for x in shuffled]
1429+
)
1430+
with pytest.raises(ValueError, match=r".* must be monotonic"):
1431+
df.groupby("c").rolling(on="t", window="3s")
1432+
1433+
14221434
@pytest.mark.parametrize("method", ["skew", "kurt"])
14231435
def test_rolling_skew_kurt_numerical_stability(method):
14241436
# GH#6929

pandas/tests/window/test_timeseries_window.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,9 @@ def test_groupby_monotonic(self):
648648
# GH 15130
649649
# we don't need to validate monotonicity when grouping
650650

651+
# GH 43909 we should raise an error here to match
652+
# behaviour of non-groupby rolling.
653+
651654
data = [
652655
["David", "1/1/2015", 100],
653656
["David", "1/5/2015", 500],
@@ -663,6 +666,7 @@ def test_groupby_monotonic(self):
663666

664667
df = DataFrame(data=data, columns=["name", "date", "amount"])
665668
df["date"] = to_datetime(df["date"])
669+
df = df.sort_values("date")
666670

667671
expected = (
668672
df.set_index("date")
@@ -672,9 +676,11 @@ def test_groupby_monotonic(self):
672676
result = df.groupby("name").rolling("180D", on="date")["amount"].sum()
673677
tm.assert_series_equal(result, expected)
674678

675-
def test_non_monotonic(self):
679+
def test_non_monotonic_raises(self):
676680
# GH 13966 (similar to #15130, closed by #15175)
677681

682+
# superseded by 43909
683+
678684
dates = date_range(start="2016-01-01 09:30:00", periods=20, freq="s")
679685
df = DataFrame(
680686
{
@@ -684,11 +690,13 @@ def test_non_monotonic(self):
684690
}
685691
)
686692

687-
result = df.groupby("A").rolling("4s", on="B").C.mean()
688693
expected = (
689694
df.set_index("B").groupby("A").apply(lambda x: x.rolling("4s")["C"].mean())
690695
)
691-
tm.assert_series_equal(result, expected)
696+
with pytest.raises(ValueError, match=r".* must be monotonic"):
697+
df.groupby("A").rolling(
698+
"4s", on="B"
699+
).C.mean() # should raise for non-monotonic t series
692700

693701
df2 = df.sort_values("B")
694702
result = df2.groupby("A").rolling("4s", on="B").C.mean()

0 commit comments

Comments
 (0)