diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index ffd32e263aa50..6baed863476a9 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -809,6 +809,7 @@ Groupby/resample/rolling - Bug in :meth:`GroupBy.nth` failing on ``axis=1`` (:issue:`43926`) - Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not respecting right bound on centered datetime-like windows, if the index contain duplicates (:issue:`3944`) - Bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` when using a :class:`pandas.api.indexers.BaseIndexer` subclass that returned unequal start and end arrays would segfault instead of raising a ``ValueError`` (:issue:`44470`) +- Bug in :meth:`Groupby.rolling` when non-monotonic data passed, fails to correctly raise ``ValueError`` (:issue:`43909`) - Fixed bug where grouping by a :class:`Series` that has a categorical data type and length unequal to the axis of grouping raised ``ValueError`` (:issue:`44179`) Reshaping diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index defae3392bfce..49d696f461300 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -2626,8 +2626,9 @@ def _get_window_indexer(self) -> GroupbyIndexer: def _validate_monotonic(self): """ Validate that on is monotonic; - in this case we have to check only for nans, because - monotonicity was already validated at a higher level. """ - if self._on.hasnans: + if ( + not (self._on.is_monotonic_increasing or self._on.is_monotonic_decreasing) + or self._on.hasnans + ): self._raise_monotonic_error() diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index b60f2e60e1035..814bd6b998182 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -1419,6 +1419,18 @@ def test_groupby_rolling_nan_included(): tm.assert_frame_equal(result, expected) +def test_groupby_rolling_non_monotonic(): + # GH 43909 + + shuffled = [3, 0, 1, 2] + sec = 1_000 + df = DataFrame( + [{"t": Timestamp(2 * x * sec), "x": x + 1, "c": 42} for x in shuffled] + ) + with pytest.raises(ValueError, match=r".* must be monotonic"): + df.groupby("c").rolling(on="t", window="3s") + + @pytest.mark.parametrize("method", ["skew", "kurt"]) def test_rolling_skew_kurt_numerical_stability(method): # GH#6929 diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py index 7cd319480083b..f2cf7bd47e15b 100644 --- a/pandas/tests/window/test_timeseries_window.py +++ b/pandas/tests/window/test_timeseries_window.py @@ -648,6 +648,9 @@ def test_groupby_monotonic(self): # GH 15130 # we don't need to validate monotonicity when grouping + # GH 43909 we should raise an error here to match + # behaviour of non-groupby rolling. + data = [ ["David", "1/1/2015", 100], ["David", "1/5/2015", 500], @@ -663,6 +666,7 @@ def test_groupby_monotonic(self): df = DataFrame(data=data, columns=["name", "date", "amount"]) df["date"] = to_datetime(df["date"]) + df = df.sort_values("date") expected = ( df.set_index("date") @@ -672,9 +676,11 @@ def test_groupby_monotonic(self): result = df.groupby("name").rolling("180D", on="date")["amount"].sum() tm.assert_series_equal(result, expected) - def test_non_monotonic(self): + def test_non_monotonic_raises(self): # GH 13966 (similar to #15130, closed by #15175) + # superseded by 43909 + dates = date_range(start="2016-01-01 09:30:00", periods=20, freq="s") df = DataFrame( { @@ -684,11 +690,13 @@ def test_non_monotonic(self): } ) - result = df.groupby("A").rolling("4s", on="B").C.mean() expected = ( df.set_index("B").groupby("A").apply(lambda x: x.rolling("4s")["C"].mean()) ) - tm.assert_series_equal(result, expected) + with pytest.raises(ValueError, match=r".* must be monotonic"): + df.groupby("A").rolling( + "4s", on="B" + ).C.mean() # should raise for non-monotonic t series df2 = df.sort_values("B") result = df2.groupby("A").rolling("4s", on="B").C.mean()