pandas-dev · simonjayhawkins · Apr 1, 2022 · Apr 1, 2022 · Mar 31, 2022
diff --git a/doc/source/whatsnew/v1.4.2.rst b/doc/source/whatsnew/v1.4.2.rst
@@ -32,6 +32,8 @@ Bug fixes
 - Fix some cases for subclasses that define their ``_constructor`` properties as general callables (:issue:`46018`)
 - Fixed "longtable" formatting in :meth:`.Styler.to_latex` when ``column_format`` is given in extended format (:issue:`46037`)
 - Fixed incorrect rendering in :meth:`.Styler.format` with ``hyperlinks="html"`` when the url contains a colon or other special characters (:issue:`46389`)
+- Improved error message in :class:`~pandas.core.window.Rolling` when ``window`` is a frequency and ``NaT`` is in the rolling axis (:issue:`46087`)
+- Fixed :meth:`Groupby.rolling` with a frequency window that would raise a ``ValueError`` even if the datetimes within each group were monotonic (:issue:`46061`)
 
 .. ---------------------------------------------------------------------------
 

diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
@@ -837,12 +837,6 @@ def _gotitem(self, key, ndim, subset=None):
             subset = self.obj.set_index(self._on)
         return super()._gotitem(key, ndim, subset=subset)
 
-    def _validate_monotonic(self):
-        """
-        Validate that "on" is monotonic; already validated at a higher level.
-        """
-        pass
-
 
 class Window(BaseWindow):
     """
@@ -1687,7 +1681,7 @@ def _validate(self):
             or isinstance(self._on, (DatetimeIndex, TimedeltaIndex, PeriodIndex))
         ) and isinstance(self.window, (str, BaseOffset, timedelta)):
 
-            self._validate_monotonic()
+            self._validate_datetimelike_monotonic()
 
             # this will raise ValueError on non-fixed freqs
             try:
@@ -1712,18 +1706,24 @@ def _validate(self):
         elif not is_integer(self.window) or self.window < 0:
             raise ValueError("window must be an integer 0 or greater")
 
-    def _validate_monotonic(self):
+    def _validate_datetimelike_monotonic(self):
         """
-        Validate monotonic (increasing or decreasing).
+        Validate self._on is monotonic (increasing or decreasing) and has
+        no NaT values for frequency windows.
         """
+        if self._on.hasnans:
+            self._raise_monotonic_error("values must not have NaT")
         if not (self._on.is_monotonic_increasing or self._on.is_monotonic_decreasing):
-            self._raise_monotonic_error()
+            self._raise_monotonic_error("values must be monotonic")
 
-    def _raise_monotonic_error(self):
-        formatted = self.on
-        if self.on is None:
-            formatted = "index"
-        raise ValueError(f"{formatted} must be monotonic")
+    def _raise_monotonic_error(self, msg: str):
+        on = self.on
+        if on is None:
+            if self.axis == 0:
+                on = "index"
+            else:
+                on = "column"
+        raise ValueError(f"{on} {msg}")
 
     @doc(
         _shared_docs["aggregate"],
@@ -2631,12 +2631,20 @@ def _get_window_indexer(self) -> GroupbyIndexer:
         )
         return window_indexer
 
-    def _validate_monotonic(self):
+    def _validate_datetimelike_monotonic(self):
         """
-        Validate that on is monotonic;
+        Validate that each group in self._on is monotonic
         """
-        if (
-            not (self._on.is_monotonic_increasing or self._on.is_monotonic_decreasing)
-            or self._on.hasnans
-        ):
-            self._raise_monotonic_error()
+        # GH 46061
+        if self._on.hasnans:
+            self._raise_monotonic_error("values must not have NaT")
+        for group_indices in self._grouper.indices.values():
+            group_on = self._on.take(group_indices)
+            if not (
+                group_on.is_monotonic_increasing or group_on.is_monotonic_decreasing
+            ):
+                on = "index" if self.on is None else self.on
+                raise ValueError(
+                    f"Each group within {on} must be monotonic. "
+                    f"Sort the values in {on} first."
+                )
diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py
@@ -651,7 +651,7 @@ def test_groupby_rolling_nans_in_index(self, rollings, key):
         )
         if key == "index":
             df = df.set_index("a")
-        with pytest.raises(ValueError, match=f"{key} must be monotonic"):
+        with pytest.raises(ValueError, match=f"{key} values must not have NaT"):
             df.groupby("c").rolling("60min", **rollings)
 
     @pytest.mark.parametrize("group_keys", [True, False])
@@ -895,6 +895,83 @@ def test_nan_and_zero_endpoints(self):
         )
         tm.assert_series_equal(result, expected)
 
+    def test_groupby_rolling_non_monotonic(self):
+        # GH 43909
+
+        shuffled = [3, 0, 1, 2]
+        sec = 1_000
+        df = DataFrame(
+            [{"t": Timestamp(2 * x * sec), "x": x + 1, "c": 42} for x in shuffled]
+        )
+        with pytest.raises(ValueError, match=r".* must be monotonic"):
+            df.groupby("c").rolling(on="t", window="3s")
+
+    def test_groupby_monotonic(self):
+
+        # GH 15130
+        # we don't need to validate monotonicity when grouping
+
+        # GH 43909 we should raise an error here to match
+        # behaviour of non-groupby rolling.
+
+        data = [
+            ["David", "1/1/2015", 100],
+            ["David", "1/5/2015", 500],
+            ["David", "5/30/2015", 50],
+            ["David", "7/25/2015", 50],
+            ["Ryan", "1/4/2014", 100],
+            ["Ryan", "1/19/2015", 500],
+            ["Ryan", "3/31/2016", 50],
+            ["Joe", "7/1/2015", 100],
+            ["Joe", "9/9/2015", 500],
+            ["Joe", "10/15/2015", 50],
+        ]
+
+        df = DataFrame(data=data, columns=["name", "date", "amount"])
+        df["date"] = to_datetime(df["date"])
+        df = df.sort_values("date")
+
+        expected = (
+            df.set_index("date")
+            .groupby("name")
+            .apply(lambda x: x.rolling("180D")["amount"].sum())
+        )
+        result = df.groupby("name").rolling("180D", on="date")["amount"].sum()
+        tm.assert_series_equal(result, expected)
+
+    def test_datelike_on_monotonic_within_each_group(self):
+        # GH 13966 (similar to #15130, closed by #15175)
+
+        # superseded by 43909
+        # GH 46061: OK if the on is monotonic relative to each each group
+
+        dates = date_range(start="2016-01-01 09:30:00", periods=20, freq="s")
+        df = DataFrame(
+            {
+                "A": [1] * 20 + [2] * 12 + [3] * 8,
+                "B": np.concatenate((dates, dates)),
+                "C": np.arange(40),
+            }
+        )
+
+        expected = (
+            df.set_index("B").groupby("A").apply(lambda x: x.rolling("4s")["C"].mean())
+        )
+        result = df.groupby("A").rolling("4s", on="B").C.mean()
+        tm.assert_series_equal(result, expected)
+
+    def test_datelike_on_not_monotonic_within_each_group(self):
+        # GH 46061
+        df = DataFrame(
+            {
+                "A": [1] * 3 + [2] * 3,
+                "B": [Timestamp(year, 1, 1) for year in [2020, 2021, 2019]] * 2,
+                "C": range(6),
+            }
+        )
+        with pytest.raises(ValueError, match="Each group within B must be monotonic."):
+            df.groupby("A").rolling("365D", on="B")
+
 
 class TestExpanding:
     def setup_method(self):

diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py
@@ -1419,18 +1419,6 @@ def test_groupby_rolling_nan_included():
     tm.assert_frame_equal(result, expected)
 
 
-def test_groupby_rolling_non_monotonic():
-    # GH 43909
-
-    shuffled = [3, 0, 1, 2]
-    sec = 1_000
-    df = DataFrame(
-        [{"t": Timestamp(2 * x * sec), "x": x + 1, "c": 42} for x in shuffled]
-    )
-    with pytest.raises(ValueError, match=r".* must be monotonic"):
-        df.groupby("c").rolling(on="t", window="3s")
-
-
 @pytest.mark.parametrize("method", ["skew", "kurt"])
 def test_rolling_skew_kurt_numerical_stability(method):
     # GH#6929

diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py
@@ -5,10 +5,10 @@
     DataFrame,
     Index,
     MultiIndex,
+    NaT,
     Series,
     Timestamp,
     date_range,
-    to_datetime,
 )
 import pandas._testing as tm
 
@@ -134,7 +134,7 @@ def test_non_monotonic_on(self):
 
         assert not df.index.is_monotonic
 
-        msg = "index must be monotonic"
+        msg = "index values must be monotonic"
         with pytest.raises(ValueError, match=msg):
             df.rolling("2s").sum()
 
@@ -643,65 +643,6 @@ def agg_by_day(x):
 
         tm.assert_frame_equal(result, expected)
 
-    def test_groupby_monotonic(self):
-
-        # GH 15130
-        # we don't need to validate monotonicity when grouping
-
-        # GH 43909 we should raise an error here to match
-        # behaviour of non-groupby rolling.
-
-        data = [
-            ["David", "1/1/2015", 100],
-            ["David", "1/5/2015", 500],
-            ["David", "5/30/2015", 50],
-            ["David", "7/25/2015", 50],
-            ["Ryan", "1/4/2014", 100],
-            ["Ryan", "1/19/2015", 500],
-            ["Ryan", "3/31/2016", 50],
-            ["Joe", "7/1/2015", 100],
-            ["Joe", "9/9/2015", 500],
-            ["Joe", "10/15/2015", 50],
-        ]
-
-        df = DataFrame(data=data, columns=["name", "date", "amount"])
-        df["date"] = to_datetime(df["date"])
-        df = df.sort_values("date")
-
-        expected = (
-            df.set_index("date")
-            .groupby("name")
-            .apply(lambda x: x.rolling("180D")["amount"].sum())
-        )
-        result = df.groupby("name").rolling("180D", on="date")["amount"].sum()
-        tm.assert_series_equal(result, expected)
-
-    def test_non_monotonic_raises(self):
-        # GH 13966 (similar to #15130, closed by #15175)
-
-        # superseded by 43909
-
-        dates = date_range(start="2016-01-01 09:30:00", periods=20, freq="s")
-        df = DataFrame(
-            {
-                "A": [1] * 20 + [2] * 12 + [3] * 8,
-                "B": np.concatenate((dates, dates)),
-                "C": np.arange(40),
-            }
-        )
-
-        expected = (
-            df.set_index("B").groupby("A").apply(lambda x: x.rolling("4s")["C"].mean())
-        )
-        with pytest.raises(ValueError, match=r".* must be monotonic"):
-            df.groupby("A").rolling(
-                "4s", on="B"
-            ).C.mean()  # should raise for non-monotonic t series
-
-        df2 = df.sort_values("B")
-        result = df2.groupby("A").rolling("4s", on="B").C.mean()
-        tm.assert_series_equal(result, expected)
-
     def test_rolling_cov_offset(self):
         # GH16058
 
@@ -757,3 +698,12 @@ def test_rolling_on_multi_index_level(self):
             {"column": [0.0, 1.0, 3.0, 6.0, 10.0, 15.0]}, index=df.index
         )
         tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("msg, axis", [["column", 1], ["index", 0]])
+def test_nat_axis_error(msg, axis):
+    idx = [Timestamp("2020"), NaT]
+    kwargs = {"columns" if axis == 1 else "index": idx}
+    df = DataFrame(np.eye(2), **kwargs)
+    with pytest.raises(ValueError, match=f"{msg} values must not have NaT"):
+        df.rolling("D", axis=axis).mean()