Skip to content

ENH: Improve error message when rolling over NaT value #46087

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Feb 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ Other enhancements
- :meth:`.GroupBy.min` and :meth:`.GroupBy.max` now supports `Numba <https://numba.pydata.org/>`_ execution with the ``engine`` keyword (:issue:`45428`)
- Implemented a ``bool``-dtype :class:`Index`, passing a bool-dtype array-like to ``pd.Index`` will now retain ``bool`` dtype instead of casting to ``object`` (:issue:`45061`)
- Implemented a complex-dtype :class:`Index`, passing a complex-dtype array-like to ``pd.Index`` will now retain complex dtype instead of casting to ``object`` (:issue:`45845`)
- Improved error message in :class:`~pandas.core.window.Rolling` when ``window`` is a frequency and ``NaT`` is in the rolling axis (:issue:`46087`)
- :class:`Series` and :class:`DataFrame` with ``IntegerDtype`` now supports bitwise operations (:issue:`34463`)
-

Expand Down
40 changes: 15 additions & 25 deletions pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -837,12 +837,6 @@ def _gotitem(self, key, ndim, subset=None):
subset = self.obj.set_index(self._on)
return super()._gotitem(key, ndim, subset=subset)

def _validate_monotonic(self):
"""
Validate that "on" is monotonic; already validated at a higher level.
"""
pass


class Window(BaseWindow):
"""
Expand Down Expand Up @@ -1687,7 +1681,7 @@ def _validate(self):
or isinstance(self._on, (DatetimeIndex, TimedeltaIndex, PeriodIndex))
) and isinstance(self.window, (str, BaseOffset, timedelta)):

self._validate_monotonic()
self._validate_datetimelike_monotonic()

# this will raise ValueError on non-fixed freqs
try:
Expand All @@ -1712,18 +1706,24 @@ def _validate(self):
elif not is_integer(self.window) or self.window < 0:
raise ValueError("window must be an integer 0 or greater")

def _validate_monotonic(self):
def _validate_datetimelike_monotonic(self):
"""
Validate monotonic (increasing or decreasing).
Validate self._on is monotonic (increasing or decreasing) and has
no NaT values for frequency windows.
"""
if self._on.hasnans:
self._raise_monotonic_error("values must not have NaT")
if not (self._on.is_monotonic_increasing or self._on.is_monotonic_decreasing):
self._raise_monotonic_error()
self._raise_monotonic_error("values must be monotonic")

def _raise_monotonic_error(self):
formatted = self.on
if self.on is None:
formatted = "index"
raise ValueError(f"{formatted} must be monotonic")
def _raise_monotonic_error(self, msg: str):
on = self.on
if on is None:
if self.axis == 0:
on = "index"
else:
on = "column"
raise ValueError(f"{on} {msg}")

@doc(
_shared_docs["aggregate"],
Expand Down Expand Up @@ -2630,13 +2630,3 @@ def _get_window_indexer(self) -> GroupbyIndexer:
indexer_kwargs=indexer_kwargs,
)
return window_indexer

def _validate_monotonic(self):
"""
Validate that on is monotonic;
"""
if (
not (self._on.is_monotonic_increasing or self._on.is_monotonic_decreasing)
or self._on.hasnans
):
self._raise_monotonic_error()
2 changes: 1 addition & 1 deletion pandas/tests/window/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -678,7 +678,7 @@ def test_groupby_rolling_nans_in_index(self, rollings, key):
)
if key == "index":
df = df.set_index("a")
with pytest.raises(ValueError, match=f"{key} must be monotonic"):
with pytest.raises(ValueError, match=f"{key} values must not have NaT"):
df.groupby("c").rolling("60min", **rollings)

@pytest.mark.parametrize("group_keys", [True, False])
Expand Down
12 changes: 11 additions & 1 deletion pandas/tests/window/test_timeseries_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
DataFrame,
Index,
MultiIndex,
NaT,
Series,
Timestamp,
date_range,
Expand Down Expand Up @@ -139,7 +140,7 @@ def test_non_monotonic_on(self):

assert not df.index.is_monotonic_increasing

msg = "index must be monotonic"
msg = "index values must be monotonic"
with pytest.raises(ValueError, match=msg):
df.rolling("2s").sum()

Expand Down Expand Up @@ -762,3 +763,12 @@ def test_rolling_on_multi_index_level(self):
{"column": [0.0, 1.0, 3.0, 6.0, 10.0, 15.0]}, index=df.index
)
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("msg, axis", [["column", 1], ["index", 0]])
def test_nat_axis_error(msg, axis):
idx = [Timestamp("2020"), NaT]
kwargs = {"columns" if axis == 1 else "index": idx}
df = DataFrame(np.eye(2), **kwargs)
with pytest.raises(ValueError, match=f"{msg} values must not have NaT"):
df.rolling("D", axis=axis).mean()