Skip to content

Commit d94b482

Browse files
mroeschkeluckyvs1
authored andcommitted
ENH/ERR: More consistent error reporting with invalid win_type in rolling (pandas-dev#38641)
1 parent 127b6fa commit d94b482

File tree

5 files changed

+58
-75
lines changed

5 files changed

+58
-75
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ Other enhancements
4242
- Added :meth:`MultiIndex.dtypes` (:issue:`37062`)
4343
- Added ``end`` and ``end_day`` options for ``origin`` in :meth:`DataFrame.resample` (:issue:`37804`)
4444
- Improve error message when ``usecols`` and ``names`` do not match for :func:`read_csv` and ``engine="c"`` (:issue:`29042`)
45+
- Improved consistency of error message when passing an invalid ``win_type`` argument in :class:`Window` (:issue:`15969`)
4546

4647
.. ---------------------------------------------------------------------------
4748

pandas/core/window/rolling.py

+32-61
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,8 @@ def __init__(
108108
self.min_periods = min_periods
109109
self.center = center
110110
self.win_type = win_type
111-
self.win_freq = None
112111
self.axis = obj._get_axis_number(axis) if axis is not None else None
112+
self._win_freq_i8 = None
113113
self.validate()
114114

115115
@property
@@ -120,10 +120,6 @@ def is_datetimelike(self) -> Optional[bool]:
120120
def _on(self):
121121
return None
122122

123-
@property
124-
def is_freq_type(self) -> bool:
125-
return self.win_type == "freq"
126-
127123
def validate(self) -> None:
128124
if self.center is not None and not is_bool(self.center):
129125
raise ValueError("center must be a boolean")
@@ -329,9 +325,9 @@ def _get_window_indexer(self) -> BaseIndexer:
329325
"""
330326
if isinstance(self.window, BaseIndexer):
331327
return self.window
332-
if self.is_freq_type:
328+
if self._win_freq_i8 is not None:
333329
return VariableWindowIndexer(
334-
index_array=self._index_array, window_size=self.window
330+
index_array=self._index_array, window_size=self._win_freq_i8
335331
)
336332
return FixedWindowIndexer(window_size=self.window)
337333

@@ -816,7 +812,6 @@ def _gotitem(self, key, ndim, subset=None):
816812
# when we do the splitting for the groupby
817813
if self.on is not None:
818814
self.obj = self.obj.set_index(self._on)
819-
self.on = None
820815
return super()._gotitem(key, ndim, subset=subset)
821816

822817
def _validate_monotonic(self):
@@ -992,22 +987,21 @@ class Window(BaseWindow):
992987
def validate(self):
993988
super().validate()
994989

990+
if not isinstance(self.win_type, str):
991+
raise ValueError(f"Invalid win_type {self.win_type}")
992+
signal = import_optional_dependency(
993+
"scipy.signal", extra="Scipy is required to generate window weight."
994+
)
995+
self._scipy_weight_generator = getattr(signal, self.win_type, None)
996+
if self._scipy_weight_generator is None:
997+
raise ValueError(f"Invalid win_type {self.win_type}")
998+
995999
if isinstance(self.window, BaseIndexer):
9961000
raise NotImplementedError(
9971001
"BaseIndexer subclasses not implemented with win_types."
9981002
)
999-
elif is_integer(self.window):
1000-
if self.window <= 0:
1001-
raise ValueError("window must be > 0 ")
1002-
sig = import_optional_dependency(
1003-
"scipy.signal", extra="Scipy is required to generate window weight."
1004-
)
1005-
if not isinstance(self.win_type, str):
1006-
raise ValueError(f"Invalid win_type {self.win_type}")
1007-
if getattr(sig, self.win_type, None) is None:
1008-
raise ValueError(f"Invalid win_type {self.win_type}")
1009-
else:
1010-
raise ValueError(f"Invalid window {self.window}")
1003+
elif not is_integer(self.window) or self.window < 0:
1004+
raise ValueError("window must be an integer 0 or greater")
10111005

10121006
def _center_window(self, result: np.ndarray, offset: int) -> np.ndarray:
10131007
"""
@@ -1047,11 +1041,7 @@ def _apply(
10471041
-------
10481042
y : type of input
10491043
"""
1050-
signal = import_optional_dependency(
1051-
"scipy.signal", extra="Scipy is required to generate window weight."
1052-
)
1053-
assert self.win_type is not None # for mypy
1054-
window = getattr(signal, self.win_type)(self.window, **kwargs)
1044+
window = self._scipy_weight_generator(self.window, **kwargs)
10551045
offset = (len(window) - 1) // 2 if self.center else 0
10561046

10571047
def homogeneous_func(values: np.ndarray):
@@ -1669,9 +1659,7 @@ def cov(self, other=None, pairwise=None, ddof=1, **kwargs):
16691659
# to the rolling constructors the data used when constructing self:
16701660
# window width, frequency data, or a BaseIndexer subclass
16711661
# GH 16058: offset window
1672-
window = (
1673-
self._get_cov_corr_window(other) if not self.is_freq_type else self.win_freq
1674-
)
1662+
window = self._get_cov_corr_window(other)
16751663

16761664
def _get_cov(X, Y):
16771665
# GH #12373 : rolling functions error on float32 data
@@ -1814,9 +1802,7 @@ def corr(self, other=None, pairwise=None, **kwargs):
18141802
# to the rolling constructors the data used when constructing self:
18151803
# window width, frequency data, or a BaseIndexer subclass
18161804
# GH 16058: offset window
1817-
window = (
1818-
self._get_cov_corr_window(other) if not self.is_freq_type else self.win_freq
1819-
)
1805+
window = self._get_cov_corr_window(other)
18201806

18211807
def _get_corr(a, b):
18221808
a = a.rolling(
@@ -1878,9 +1864,17 @@ def validate(self):
18781864
)
18791865

18801866
# this will raise ValueError on non-fixed freqs
1881-
self.win_freq = self.window
1882-
self.window = self._determine_window_length()
1883-
self.win_type = "freq"
1867+
try:
1868+
freq = to_offset(self.window)
1869+
except (TypeError, ValueError) as err:
1870+
raise ValueError(
1871+
f"passed window {self.window} is not "
1872+
"compatible with a datetimelike index"
1873+
) from err
1874+
if isinstance(self._on, ABCPeriodIndex):
1875+
self._win_freq_i8 = freq.nanos / (self._on.freq.nanos / self._on.freq.n)
1876+
else:
1877+
self._win_freq_i8 = freq.nanos
18841878

18851879
# min_periods must be an integer
18861880
if self.min_periods is None:
@@ -1889,20 +1883,8 @@ def validate(self):
18891883
elif isinstance(self.window, BaseIndexer):
18901884
# Passed BaseIndexer subclass should handle all other rolling kwargs
18911885
return
1892-
elif not is_integer(self.window):
1893-
raise ValueError("window must be an integer")
1894-
elif self.window < 0:
1895-
raise ValueError("window must be non-negative")
1896-
1897-
def _determine_window_length(self) -> Union[int, float]:
1898-
"""
1899-
Calculate freq for PeriodIndexes based on Index freq. Can not use
1900-
nanos, because asi8 of PeriodIndex is not in nanos
1901-
"""
1902-
freq = self._validate_freq()
1903-
if isinstance(self._on, ABCPeriodIndex):
1904-
return freq.nanos / (self._on.freq.nanos / self._on.freq.n)
1905-
return freq.nanos
1886+
elif not is_integer(self.window) or self.window < 0:
1887+
raise ValueError("window must be an integer 0 or greater")
19061888

19071889
def _validate_monotonic(self):
19081890
"""
@@ -1917,18 +1899,6 @@ def _raise_monotonic_error(self):
19171899
formatted = "index"
19181900
raise ValueError(f"{formatted} must be monotonic")
19191901

1920-
def _validate_freq(self):
1921-
"""
1922-
Validate & return window frequency.
1923-
"""
1924-
try:
1925-
return to_offset(self.window)
1926-
except (TypeError, ValueError) as err:
1927-
raise ValueError(
1928-
f"passed window {self.window} is not "
1929-
"compatible with a datetimelike index"
1930-
) from err
1931-
19321902
_agg_see_also_doc = dedent(
19331903
"""
19341904
See Also
@@ -2138,8 +2108,9 @@ def _get_window_indexer(self) -> GroupbyIndexer:
21382108
# We'll be using the index of each group later
21392109
indexer_kwargs.pop("index_array", None)
21402110
window = 0
2141-
elif self.is_freq_type:
2111+
elif self._win_freq_i8 is not None:
21422112
rolling_indexer = VariableWindowIndexer
2113+
window = self._win_freq_i8
21432114
else:
21442115
rolling_indexer = FixedWindowIndexer
21452116
index_array = None

pandas/tests/window/test_base_indexer.py

-11
Original file line numberDiff line numberDiff line change
@@ -73,17 +73,6 @@ def get_window_bounds(self, num_values, min_periods, center, closed):
7373
tm.assert_frame_equal(result, expected)
7474

7575

76-
def test_win_type_not_implemented():
77-
class CustomIndexer(BaseIndexer):
78-
def get_window_bounds(self, num_values, min_periods, center, closed):
79-
return np.array([0, 1]), np.array([1, 2])
80-
81-
df = DataFrame({"values": range(2)})
82-
indexer = CustomIndexer()
83-
with pytest.raises(NotImplementedError, match="BaseIndexer subclasses not"):
84-
df.rolling(indexer, win_type="boxcar")
85-
86-
8776
@pytest.mark.parametrize("constructor", [Series, DataFrame])
8877
@pytest.mark.parametrize(
8978
"func,np_func,expected,np_kwargs",

pandas/tests/window/test_rolling.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def test_constructor(frame_or_series):
4444

4545
# GH 13383
4646

47-
msg = "window must be non-negative"
47+
msg = "window must be an integer 0 or greater"
4848

4949
with pytest.raises(ValueError, match=msg):
5050
c(-1)

pandas/tests/window/test_win_type.py

+24-2
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44
from pandas.errors import UnsupportedFunctionCall
55
import pandas.util._test_decorators as td
66

7-
from pandas import DataFrame, Series, concat
7+
from pandas import DataFrame, Series, Timedelta, concat
88
import pandas._testing as tm
9+
from pandas.api.indexers import BaseIndexer
910

1011

1112
@td.skip_if_no_scipy
@@ -89,7 +90,7 @@ def test_constructor_with_win_type_invalid(frame_or_series):
8990
# GH 13383
9091
c = frame_or_series(range(5)).rolling
9192

92-
msg = "window must be > 0"
93+
msg = "window must be an integer 0 or greater"
9394

9495
with pytest.raises(ValueError, match=msg):
9596
c(-1, win_type="boxcar")
@@ -117,3 +118,24 @@ def b(x):
117118
expected.columns = ["a", "b"]
118119
result = r.aggregate([a, b])
119120
tm.assert_frame_equal(result, expected)
121+
122+
123+
@td.skip_if_no_scipy
124+
@pytest.mark.parametrize("arg", [2000000000, "2s", Timedelta("2s")])
125+
def test_consistent_win_type_freq(arg):
126+
# GH 15969
127+
s = Series(range(1))
128+
with pytest.raises(ValueError, match="Invalid win_type freq"):
129+
s.rolling(arg, win_type="freq")
130+
131+
132+
@td.skip_if_no_scipy
133+
def test_win_type_not_implemented():
134+
class CustomIndexer(BaseIndexer):
135+
def get_window_bounds(self, num_values, min_periods, center, closed):
136+
return np.array([0, 1]), np.array([1, 2])
137+
138+
df = DataFrame({"values": range(2)})
139+
indexer = CustomIndexer()
140+
with pytest.raises(NotImplementedError, match="BaseIndexer subclasses not"):
141+
df.rolling(indexer, win_type="boxcar")

0 commit comments

Comments
 (0)