diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 53f254aee2e0e..bca8b8e4febf2 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -42,6 +42,7 @@ Other enhancements - Added :meth:`MultiIndex.dtypes` (:issue:`37062`) - Added ``end`` and ``end_day`` options for ``origin`` in :meth:`DataFrame.resample` (:issue:`37804`) - Improve error message when ``usecols`` and ``names`` do not match for :func:`read_csv` and ``engine="c"`` (:issue:`29042`) +- Improved consistency of error message when passing an invalid ``win_type`` argument in :class:`Window` (:issue:`15969`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index a45fc9e89a7f7..43d09fbaace3b 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -108,8 +108,8 @@ def __init__( self.min_periods = min_periods self.center = center self.win_type = win_type - self.win_freq = None self.axis = obj._get_axis_number(axis) if axis is not None else None + self._win_freq_i8 = None self.validate() @property @@ -120,10 +120,6 @@ def is_datetimelike(self) -> Optional[bool]: def _on(self): return None - @property - def is_freq_type(self) -> bool: - return self.win_type == "freq" - def validate(self) -> None: if self.center is not None and not is_bool(self.center): raise ValueError("center must be a boolean") @@ -329,9 +325,9 @@ def _get_window_indexer(self) -> BaseIndexer: """ if isinstance(self.window, BaseIndexer): return self.window - if self.is_freq_type: + if self._win_freq_i8 is not None: return VariableWindowIndexer( - index_array=self._index_array, window_size=self.window + index_array=self._index_array, window_size=self._win_freq_i8 ) return FixedWindowIndexer(window_size=self.window) @@ -816,7 +812,6 @@ def _gotitem(self, key, ndim, subset=None): # when we do the splitting for the groupby if self.on is not None: self.obj = self.obj.set_index(self._on) - self.on = None return super()._gotitem(key, ndim, subset=subset) def _validate_monotonic(self): @@ -992,22 +987,21 @@ class Window(BaseWindow): def validate(self): super().validate() + if not isinstance(self.win_type, str): + raise ValueError(f"Invalid win_type {self.win_type}") + signal = import_optional_dependency( + "scipy.signal", extra="Scipy is required to generate window weight." + ) + self._scipy_weight_generator = getattr(signal, self.win_type, None) + if self._scipy_weight_generator is None: + raise ValueError(f"Invalid win_type {self.win_type}") + if isinstance(self.window, BaseIndexer): raise NotImplementedError( "BaseIndexer subclasses not implemented with win_types." ) - elif is_integer(self.window): - if self.window <= 0: - raise ValueError("window must be > 0 ") - sig = import_optional_dependency( - "scipy.signal", extra="Scipy is required to generate window weight." - ) - if not isinstance(self.win_type, str): - raise ValueError(f"Invalid win_type {self.win_type}") - if getattr(sig, self.win_type, None) is None: - raise ValueError(f"Invalid win_type {self.win_type}") - else: - raise ValueError(f"Invalid window {self.window}") + elif not is_integer(self.window) or self.window < 0: + raise ValueError("window must be an integer 0 or greater") def _center_window(self, result: np.ndarray, offset: int) -> np.ndarray: """ @@ -1047,11 +1041,7 @@ def _apply( ------- y : type of input """ - signal = import_optional_dependency( - "scipy.signal", extra="Scipy is required to generate window weight." - ) - assert self.win_type is not None # for mypy - window = getattr(signal, self.win_type)(self.window, **kwargs) + window = self._scipy_weight_generator(self.window, **kwargs) offset = (len(window) - 1) // 2 if self.center else 0 def homogeneous_func(values: np.ndarray): @@ -1669,9 +1659,7 @@ def cov(self, other=None, pairwise=None, ddof=1, **kwargs): # to the rolling constructors the data used when constructing self: # window width, frequency data, or a BaseIndexer subclass # GH 16058: offset window - window = ( - self._get_cov_corr_window(other) if not self.is_freq_type else self.win_freq - ) + window = self._get_cov_corr_window(other) def _get_cov(X, Y): # GH #12373 : rolling functions error on float32 data @@ -1814,9 +1802,7 @@ def corr(self, other=None, pairwise=None, **kwargs): # to the rolling constructors the data used when constructing self: # window width, frequency data, or a BaseIndexer subclass # GH 16058: offset window - window = ( - self._get_cov_corr_window(other) if not self.is_freq_type else self.win_freq - ) + window = self._get_cov_corr_window(other) def _get_corr(a, b): a = a.rolling( @@ -1878,9 +1864,17 @@ def validate(self): ) # this will raise ValueError on non-fixed freqs - self.win_freq = self.window - self.window = self._determine_window_length() - self.win_type = "freq" + try: + freq = to_offset(self.window) + except (TypeError, ValueError) as err: + raise ValueError( + f"passed window {self.window} is not " + "compatible with a datetimelike index" + ) from err + if isinstance(self._on, ABCPeriodIndex): + self._win_freq_i8 = freq.nanos / (self._on.freq.nanos / self._on.freq.n) + else: + self._win_freq_i8 = freq.nanos # min_periods must be an integer if self.min_periods is None: @@ -1889,20 +1883,8 @@ def validate(self): elif isinstance(self.window, BaseIndexer): # Passed BaseIndexer subclass should handle all other rolling kwargs return - elif not is_integer(self.window): - raise ValueError("window must be an integer") - elif self.window < 0: - raise ValueError("window must be non-negative") - - def _determine_window_length(self) -> Union[int, float]: - """ - Calculate freq for PeriodIndexes based on Index freq. Can not use - nanos, because asi8 of PeriodIndex is not in nanos - """ - freq = self._validate_freq() - if isinstance(self._on, ABCPeriodIndex): - return freq.nanos / (self._on.freq.nanos / self._on.freq.n) - return freq.nanos + elif not is_integer(self.window) or self.window < 0: + raise ValueError("window must be an integer 0 or greater") def _validate_monotonic(self): """ @@ -1917,18 +1899,6 @@ def _raise_monotonic_error(self): formatted = "index" raise ValueError(f"{formatted} must be monotonic") - def _validate_freq(self): - """ - Validate & return window frequency. - """ - try: - return to_offset(self.window) - except (TypeError, ValueError) as err: - raise ValueError( - f"passed window {self.window} is not " - "compatible with a datetimelike index" - ) from err - _agg_see_also_doc = dedent( """ See Also @@ -2138,8 +2108,9 @@ def _get_window_indexer(self) -> GroupbyIndexer: # We'll be using the index of each group later indexer_kwargs.pop("index_array", None) window = 0 - elif self.is_freq_type: + elif self._win_freq_i8 is not None: rolling_indexer = VariableWindowIndexer + window = self._win_freq_i8 else: rolling_indexer = FixedWindowIndexer index_array = None diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index 1723330ec40e1..94bc755f300a2 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -73,17 +73,6 @@ def get_window_bounds(self, num_values, min_periods, center, closed): tm.assert_frame_equal(result, expected) -def test_win_type_not_implemented(): - class CustomIndexer(BaseIndexer): - def get_window_bounds(self, num_values, min_periods, center, closed): - return np.array([0, 1]), np.array([1, 2]) - - df = DataFrame({"values": range(2)}) - indexer = CustomIndexer() - with pytest.raises(NotImplementedError, match="BaseIndexer subclasses not"): - df.rolling(indexer, win_type="boxcar") - - @pytest.mark.parametrize("constructor", [Series, DataFrame]) @pytest.mark.parametrize( "func,np_func,expected,np_kwargs", diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 10b23cadfe279..f75700a48c795 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -44,7 +44,7 @@ def test_constructor(frame_or_series): # GH 13383 - msg = "window must be non-negative" + msg = "window must be an integer 0 or greater" with pytest.raises(ValueError, match=msg): c(-1) diff --git a/pandas/tests/window/test_win_type.py b/pandas/tests/window/test_win_type.py index 091b5914a7c3e..f09feef54fa16 100644 --- a/pandas/tests/window/test_win_type.py +++ b/pandas/tests/window/test_win_type.py @@ -4,8 +4,9 @@ from pandas.errors import UnsupportedFunctionCall import pandas.util._test_decorators as td -from pandas import DataFrame, Series, concat +from pandas import DataFrame, Series, Timedelta, concat import pandas._testing as tm +from pandas.api.indexers import BaseIndexer @td.skip_if_no_scipy @@ -89,7 +90,7 @@ def test_constructor_with_win_type_invalid(frame_or_series): # GH 13383 c = frame_or_series(range(5)).rolling - msg = "window must be > 0" + msg = "window must be an integer 0 or greater" with pytest.raises(ValueError, match=msg): c(-1, win_type="boxcar") @@ -117,3 +118,24 @@ def b(x): expected.columns = ["a", "b"] result = r.aggregate([a, b]) tm.assert_frame_equal(result, expected) + + +@td.skip_if_no_scipy +@pytest.mark.parametrize("arg", [2000000000, "2s", Timedelta("2s")]) +def test_consistent_win_type_freq(arg): + # GH 15969 + s = Series(range(1)) + with pytest.raises(ValueError, match="Invalid win_type freq"): + s.rolling(arg, win_type="freq") + + +@td.skip_if_no_scipy +def test_win_type_not_implemented(): + class CustomIndexer(BaseIndexer): + def get_window_bounds(self, num_values, min_periods, center, closed): + return np.array([0, 1]), np.array([1, 2]) + + df = DataFrame({"values": range(2)}) + indexer = CustomIndexer() + with pytest.raises(NotImplementedError, match="BaseIndexer subclasses not"): + df.rolling(indexer, win_type="boxcar")