From 181124c272e279e125d077962aafe564b753b3f9 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 21 Dec 2020 21:29:22 -0800 Subject: [PATCH 1/9] Enforce stricter win_type values --- pandas/core/window/rolling.py | 46 ++++++++++++----------------------- 1 file changed, 15 insertions(+), 31 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index a45fc9e89a7f7..4f414e2c5834c 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -108,7 +108,7 @@ def __init__( self.min_periods = min_periods self.center = center self.win_type = win_type - self.win_freq = None + self._win_freq_i8 = None self.axis = obj._get_axis_number(axis) if axis is not None else None self.validate() @@ -120,10 +120,6 @@ def is_datetimelike(self) -> Optional[bool]: def _on(self): return None - @property - def is_freq_type(self) -> bool: - return self.win_type == "freq" - def validate(self) -> None: if self.center is not None and not is_bool(self.center): raise ValueError("center must be a boolean") @@ -329,9 +325,9 @@ def _get_window_indexer(self) -> BaseIndexer: """ if isinstance(self.window, BaseIndexer): return self.window - if self.is_freq_type: + if self._win_freq_i8 is not None: return VariableWindowIndexer( - index_array=self._index_array, window_size=self.window + index_array=self._index_array, window_size=self._win_freq_i8 ) return FixedWindowIndexer(window_size=self.window) @@ -816,7 +812,6 @@ def _gotitem(self, key, ndim, subset=None): # when we do the splitting for the groupby if self.on is not None: self.obj = self.obj.set_index(self._on) - self.on = None return super()._gotitem(key, ndim, subset=subset) def _validate_monotonic(self): @@ -1669,9 +1664,7 @@ def cov(self, other=None, pairwise=None, ddof=1, **kwargs): # to the rolling constructors the data used when constructing self: # window width, frequency data, or a BaseIndexer subclass # GH 16058: offset window - window = ( - self._get_cov_corr_window(other) if not self.is_freq_type else self.win_freq - ) + window = self._get_cov_corr_window(other) def _get_cov(X, Y): # GH #12373 : rolling functions error on float32 data @@ -1814,9 +1807,7 @@ def corr(self, other=None, pairwise=None, **kwargs): # to the rolling constructors the data used when constructing self: # window width, frequency data, or a BaseIndexer subclass # GH 16058: offset window - window = ( - self._get_cov_corr_window(other) if not self.is_freq_type else self.win_freq - ) + window = self._get_cov_corr_window(other) def _get_corr(a, b): a = a.rolling( @@ -1878,9 +1869,7 @@ def validate(self): ) # this will raise ValueError on non-fixed freqs - self.win_freq = self.window - self.window = self._determine_window_length() - self.win_type = "freq" + self._win_freq_i8 = self._determine_window_length() # min_periods must be an integer if self.min_periods is None: @@ -1899,7 +1888,13 @@ def _determine_window_length(self) -> Union[int, float]: Calculate freq for PeriodIndexes based on Index freq. Can not use nanos, because asi8 of PeriodIndex is not in nanos """ - freq = self._validate_freq() + try: + freq = to_offset(self.window) + except (TypeError, ValueError) as err: + raise ValueError( + f"passed window {self.window} is not " + "compatible with a datetimelike index" + ) from err if isinstance(self._on, ABCPeriodIndex): return freq.nanos / (self._on.freq.nanos / self._on.freq.n) return freq.nanos @@ -1917,18 +1912,6 @@ def _raise_monotonic_error(self): formatted = "index" raise ValueError(f"{formatted} must be monotonic") - def _validate_freq(self): - """ - Validate & return window frequency. - """ - try: - return to_offset(self.window) - except (TypeError, ValueError) as err: - raise ValueError( - f"passed window {self.window} is not " - "compatible with a datetimelike index" - ) from err - _agg_see_also_doc = dedent( """ See Also @@ -2138,8 +2121,9 @@ def _get_window_indexer(self) -> GroupbyIndexer: # We'll be using the index of each group later indexer_kwargs.pop("index_array", None) window = 0 - elif self.is_freq_type: + elif self._win_freq_i8 is not None: rolling_indexer = VariableWindowIndexer + window = self._win_freq_i8 else: rolling_indexer = FixedWindowIndexer index_array = None From 554e58a29eaf5f2f55fa7283c14d99da1cf1ed9c Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 21 Dec 2020 21:50:41 -0800 Subject: [PATCH 2/9] Move _win_freq_i8 to different location --- pandas/core/window/rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 4f414e2c5834c..8e07a8c86b062 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -108,8 +108,8 @@ def __init__( self.min_periods = min_periods self.center = center self.win_type = win_type - self._win_freq_i8 = None self.axis = obj._get_axis_number(axis) if axis is not None else None + self._win_freq_i8 = None self.validate() @property From ae164513706e890310c4ff1c615626166adbe3be Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 21 Dec 2020 22:07:28 -0800 Subject: [PATCH 3/9] fix mypy --- pandas/core/window/rolling.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 8e07a8c86b062..0d9ab3afdb4a0 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -109,7 +109,7 @@ def __init__( self.center = center self.win_type = win_type self.axis = obj._get_axis_number(axis) if axis is not None else None - self._win_freq_i8 = None + self._win_freq_i8: Optional[Union[int]] = None self.validate() @property @@ -1883,7 +1883,7 @@ def validate(self): elif self.window < 0: raise ValueError("window must be non-negative") - def _determine_window_length(self) -> Union[int, float]: + def _determine_window_length(self) -> int: """ Calculate freq for PeriodIndexes based on Index freq. Can not use nanos, because asi8 of PeriodIndex is not in nanos From 2da6ed4e8821d821c82916d2f6dc85b0a93203ea Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 21 Dec 2020 22:18:49 -0800 Subject: [PATCH 4/9] remove single used helper function --- pandas/core/window/rolling.py | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 0d9ab3afdb4a0..bd6db054eb7ee 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -109,7 +109,7 @@ def __init__( self.center = center self.win_type = win_type self.axis = obj._get_axis_number(axis) if axis is not None else None - self._win_freq_i8: Optional[Union[int]] = None + self._win_freq_i8 = None self.validate() @property @@ -1869,7 +1869,17 @@ def validate(self): ) # this will raise ValueError on non-fixed freqs - self._win_freq_i8 = self._determine_window_length() + try: + freq = to_offset(self.window) + except (TypeError, ValueError) as err: + raise ValueError( + f"passed window {self.window} is not " + "compatible with a datetimelike index" + ) from err + if isinstance(self._on, ABCPeriodIndex): + self._win_freq_i8 = freq.nanos / (self._on.freq.nanos / self._on.freq.n) + else: + self._win_freq_i8 = freq.nanos # min_periods must be an integer if self.min_periods is None: @@ -1883,22 +1893,6 @@ def validate(self): elif self.window < 0: raise ValueError("window must be non-negative") - def _determine_window_length(self) -> int: - """ - Calculate freq for PeriodIndexes based on Index freq. Can not use - nanos, because asi8 of PeriodIndex is not in nanos - """ - try: - freq = to_offset(self.window) - except (TypeError, ValueError) as err: - raise ValueError( - f"passed window {self.window} is not " - "compatible with a datetimelike index" - ) from err - if isinstance(self._on, ABCPeriodIndex): - return freq.nanos / (self._on.freq.nanos / self._on.freq.n) - return freq.nanos - def _validate_monotonic(self): """ Validate monotonic (increasing or decreasing). From 8c75fdee0051d153a583a9193b66bbeef710ce0d Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 21 Dec 2020 23:26:57 -0800 Subject: [PATCH 5/9] make win_type errors more consistent --- pandas/core/window/rolling.py | 36 ++++++++++++---------------- pandas/tests/window/test_rolling.py | 2 +- pandas/tests/window/test_win_type.py | 2 +- 3 files changed, 17 insertions(+), 23 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index bd6db054eb7ee..749a60194c702 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -110,6 +110,7 @@ def __init__( self.win_type = win_type self.axis = obj._get_axis_number(axis) if axis is not None else None self._win_freq_i8 = None + self._scipy_weight_generator = None self.validate() @property @@ -991,18 +992,17 @@ def validate(self): raise NotImplementedError( "BaseIndexer subclasses not implemented with win_types." ) - elif is_integer(self.window): - if self.window <= 0: - raise ValueError("window must be > 0 ") - sig = import_optional_dependency( - "scipy.signal", extra="Scipy is required to generate window weight." - ) - if not isinstance(self.win_type, str): - raise ValueError(f"Invalid win_type {self.win_type}") - if getattr(sig, self.win_type, None) is None: - raise ValueError(f"Invalid win_type {self.win_type}") - else: - raise ValueError(f"Invalid window {self.window}") + elif not is_integer(self.window) or self.window < 0: + raise ValueError("window must be an integer 0 or greater") + + if not isinstance(self.win_type, str): + raise ValueError(f"Invalid win_type {self.win_type}") + signal = import_optional_dependency( + "scipy.signal", extra="Scipy is required to generate window weight." + ) + self._scipy_weight_generator = getattr(signal, self.win_type, None) + if self._scipy_weight_generator is None: + raise ValueError(f"Invalid win_type {self.win_type}") def _center_window(self, result: np.ndarray, offset: int) -> np.ndarray: """ @@ -1042,11 +1042,7 @@ def _apply( ------- y : type of input """ - signal = import_optional_dependency( - "scipy.signal", extra="Scipy is required to generate window weight." - ) - assert self.win_type is not None # for mypy - window = getattr(signal, self.win_type)(self.window, **kwargs) + window = self._scipy_weight_generator(self.window, **kwargs) offset = (len(window) - 1) // 2 if self.center else 0 def homogeneous_func(values: np.ndarray): @@ -1888,10 +1884,8 @@ def validate(self): elif isinstance(self.window, BaseIndexer): # Passed BaseIndexer subclass should handle all other rolling kwargs return - elif not is_integer(self.window): - raise ValueError("window must be an integer") - elif self.window < 0: - raise ValueError("window must be non-negative") + elif not is_integer(self.window) or self.window < 0: + raise ValueError("window must be an integer 0 or greater") def _validate_monotonic(self): """ diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 10b23cadfe279..f75700a48c795 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -44,7 +44,7 @@ def test_constructor(frame_or_series): # GH 13383 - msg = "window must be non-negative" + msg = "window must be an integer 0 or greater" with pytest.raises(ValueError, match=msg): c(-1) diff --git a/pandas/tests/window/test_win_type.py b/pandas/tests/window/test_win_type.py index 091b5914a7c3e..dab4c526ad0d2 100644 --- a/pandas/tests/window/test_win_type.py +++ b/pandas/tests/window/test_win_type.py @@ -89,7 +89,7 @@ def test_constructor_with_win_type_invalid(frame_or_series): # GH 13383 c = frame_or_series(range(5)).rolling - msg = "window must be > 0" + msg = "window must be an integer 0 or greater" with pytest.raises(ValueError, match=msg): c(-1, win_type="boxcar") From 7749b7d615e50fbc88e8f1d7270cbf1250002dbf Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 22 Dec 2020 12:33:07 -0800 Subject: [PATCH 6/9] Remove attribute --- pandas/core/window/rolling.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 749a60194c702..6897e39f1bdfb 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -110,7 +110,6 @@ def __init__( self.win_type = win_type self.axis = obj._get_axis_number(axis) if axis is not None else None self._win_freq_i8 = None - self._scipy_weight_generator = None self.validate() @property From b6009fae173a2f56563d3aef401640133c3cb015 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 22 Dec 2020 13:19:32 -0800 Subject: [PATCH 7/9] Test consistent error with win_type=freq --- pandas/core/window/rolling.py | 14 +++++++------- pandas/tests/window/test_win_type.py | 11 ++++++++++- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 6897e39f1bdfb..43d09fbaace3b 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -987,13 +987,6 @@ class Window(BaseWindow): def validate(self): super().validate() - if isinstance(self.window, BaseIndexer): - raise NotImplementedError( - "BaseIndexer subclasses not implemented with win_types." - ) - elif not is_integer(self.window) or self.window < 0: - raise ValueError("window must be an integer 0 or greater") - if not isinstance(self.win_type, str): raise ValueError(f"Invalid win_type {self.win_type}") signal = import_optional_dependency( @@ -1003,6 +996,13 @@ def validate(self): if self._scipy_weight_generator is None: raise ValueError(f"Invalid win_type {self.win_type}") + if isinstance(self.window, BaseIndexer): + raise NotImplementedError( + "BaseIndexer subclasses not implemented with win_types." + ) + elif not is_integer(self.window) or self.window < 0: + raise ValueError("window must be an integer 0 or greater") + def _center_window(self, result: np.ndarray, offset: int) -> np.ndarray: """ Center the result in the window for weighted rolling aggregations. diff --git a/pandas/tests/window/test_win_type.py b/pandas/tests/window/test_win_type.py index dab4c526ad0d2..fa0219bea8042 100644 --- a/pandas/tests/window/test_win_type.py +++ b/pandas/tests/window/test_win_type.py @@ -4,7 +4,7 @@ from pandas.errors import UnsupportedFunctionCall import pandas.util._test_decorators as td -from pandas import DataFrame, Series, concat +from pandas import DataFrame, Series, Timedelta, concat import pandas._testing as tm @@ -117,3 +117,12 @@ def b(x): expected.columns = ["a", "b"] result = r.aggregate([a, b]) tm.assert_frame_equal(result, expected) + + +@td.skip_if_no_scipy +@pytest.mark.parametrize("arg", [2000000000, "2s", Timedelta("2s")]) +def test_consistent_win_type_freq(arg): + # GH 15969 + s = Series(range(1)) + with pytest.raises(ValueError, match="Invalid win_type freq"): + s.rolling(arg, win_type="freq") From bd47ea9f5a3a6aedfa9f2752d1fab462574900e8 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 22 Dec 2020 13:21:20 -0800 Subject: [PATCH 8/9] add whatsnew --- doc/source/whatsnew/v1.3.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index fbd2c2b5345fc..37ac9b0d90f90 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -42,6 +42,7 @@ Other enhancements - Added :meth:`MultiIndex.dtypes` (:issue:`37062`) - Added ``end`` and ``end_day`` options for ``origin`` in :meth:`DataFrame.resample` (:issue:`37804`) - Improve error message when ``usecols`` and ``names`` do not match for :func:`read_csv` and ``engine="c"`` (:issue:`29042`) +- Improved consistency of error message when passing an invalid ``win_type`` argument in :class:`Window` (:issue:`15969`) .. --------------------------------------------------------------------------- From 8e27e0b6014c198365672e7db915835dd675f5e1 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 22 Dec 2020 17:34:43 -0800 Subject: [PATCH 9/9] Move baseindexer test that uses win_type --- pandas/tests/window/test_base_indexer.py | 11 ----------- pandas/tests/window/test_win_type.py | 13 +++++++++++++ 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index 1723330ec40e1..94bc755f300a2 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -73,17 +73,6 @@ def get_window_bounds(self, num_values, min_periods, center, closed): tm.assert_frame_equal(result, expected) -def test_win_type_not_implemented(): - class CustomIndexer(BaseIndexer): - def get_window_bounds(self, num_values, min_periods, center, closed): - return np.array([0, 1]), np.array([1, 2]) - - df = DataFrame({"values": range(2)}) - indexer = CustomIndexer() - with pytest.raises(NotImplementedError, match="BaseIndexer subclasses not"): - df.rolling(indexer, win_type="boxcar") - - @pytest.mark.parametrize("constructor", [Series, DataFrame]) @pytest.mark.parametrize( "func,np_func,expected,np_kwargs", diff --git a/pandas/tests/window/test_win_type.py b/pandas/tests/window/test_win_type.py index fa0219bea8042..f09feef54fa16 100644 --- a/pandas/tests/window/test_win_type.py +++ b/pandas/tests/window/test_win_type.py @@ -6,6 +6,7 @@ from pandas import DataFrame, Series, Timedelta, concat import pandas._testing as tm +from pandas.api.indexers import BaseIndexer @td.skip_if_no_scipy @@ -126,3 +127,15 @@ def test_consistent_win_type_freq(arg): s = Series(range(1)) with pytest.raises(ValueError, match="Invalid win_type freq"): s.rolling(arg, win_type="freq") + + +@td.skip_if_no_scipy +def test_win_type_not_implemented(): + class CustomIndexer(BaseIndexer): + def get_window_bounds(self, num_values, min_periods, center, closed): + return np.array([0, 1]), np.array([1, 2]) + + df = DataFrame({"values": range(2)}) + indexer = CustomIndexer() + with pytest.raises(NotImplementedError, match="BaseIndexer subclasses not"): + df.rolling(indexer, win_type="boxcar")