Skip to content

ENH/ERR: More consistent error reporting with invalid win_type in rolling #38641

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Dec 23, 2020
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ Other enhancements
- Added :meth:`MultiIndex.dtypes` (:issue:`37062`)
- Added ``end`` and ``end_day`` options for ``origin`` in :meth:`DataFrame.resample` (:issue:`37804`)
- Improve error message when ``usecols`` and ``names`` do not match for :func:`read_csv` and ``engine="c"`` (:issue:`29042`)
- Improved consistency of error message when passing an invalid ``win_type`` argument in :class:`Window` (:issue:`15969`)

.. ---------------------------------------------------------------------------

Expand Down
93 changes: 32 additions & 61 deletions pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ def __init__(
self.min_periods = min_periods
self.center = center
self.win_type = win_type
self.win_freq = None
self.axis = obj._get_axis_number(axis) if axis is not None else None
self._win_freq_i8 = None
self.validate()

@property
Expand All @@ -120,10 +120,6 @@ def is_datetimelike(self) -> Optional[bool]:
def _on(self):
return None

@property
def is_freq_type(self) -> bool:
return self.win_type == "freq"

def validate(self) -> None:
if self.center is not None and not is_bool(self.center):
raise ValueError("center must be a boolean")
Expand Down Expand Up @@ -329,9 +325,9 @@ def _get_window_indexer(self) -> BaseIndexer:
"""
if isinstance(self.window, BaseIndexer):
return self.window
if self.is_freq_type:
if self._win_freq_i8 is not None:
return VariableWindowIndexer(
index_array=self._index_array, window_size=self.window
index_array=self._index_array, window_size=self._win_freq_i8
)
return FixedWindowIndexer(window_size=self.window)

Expand Down Expand Up @@ -816,7 +812,6 @@ def _gotitem(self, key, ndim, subset=None):
# when we do the splitting for the groupby
if self.on is not None:
self.obj = self.obj.set_index(self._on)
self.on = None
return super()._gotitem(key, ndim, subset=subset)

def _validate_monotonic(self):
Expand Down Expand Up @@ -992,22 +987,21 @@ class Window(BaseWindow):
def validate(self):
super().validate()

if not isinstance(self.win_type, str):
raise ValueError(f"Invalid win_type {self.win_type}")
signal = import_optional_dependency(
"scipy.signal", extra="Scipy is required to generate window weight."
)
self._scipy_weight_generator = getattr(signal, self.win_type, None)
if self._scipy_weight_generator is None:
raise ValueError(f"Invalid win_type {self.win_type}")

if isinstance(self.window, BaseIndexer):
raise NotImplementedError(
"BaseIndexer subclasses not implemented with win_types."
)
elif is_integer(self.window):
if self.window <= 0:
raise ValueError("window must be > 0 ")
sig = import_optional_dependency(
"scipy.signal", extra="Scipy is required to generate window weight."
)
if not isinstance(self.win_type, str):
raise ValueError(f"Invalid win_type {self.win_type}")
if getattr(sig, self.win_type, None) is None:
raise ValueError(f"Invalid win_type {self.win_type}")
else:
raise ValueError(f"Invalid window {self.window}")
elif not is_integer(self.window) or self.window < 0:
raise ValueError("window must be an integer 0 or greater")

def _center_window(self, result: np.ndarray, offset: int) -> np.ndarray:
"""
Expand Down Expand Up @@ -1047,11 +1041,7 @@ def _apply(
-------
y : type of input
"""
signal = import_optional_dependency(
"scipy.signal", extra="Scipy is required to generate window weight."
)
assert self.win_type is not None # for mypy
window = getattr(signal, self.win_type)(self.window, **kwargs)
window = self._scipy_weight_generator(self.window, **kwargs)
offset = (len(window) - 1) // 2 if self.center else 0

def homogeneous_func(values: np.ndarray):
Expand Down Expand Up @@ -1669,9 +1659,7 @@ def cov(self, other=None, pairwise=None, ddof=1, **kwargs):
# to the rolling constructors the data used when constructing self:
# window width, frequency data, or a BaseIndexer subclass
# GH 16058: offset window
window = (
self._get_cov_corr_window(other) if not self.is_freq_type else self.win_freq
)
window = self._get_cov_corr_window(other)

def _get_cov(X, Y):
# GH #12373 : rolling functions error on float32 data
Expand Down Expand Up @@ -1814,9 +1802,7 @@ def corr(self, other=None, pairwise=None, **kwargs):
# to the rolling constructors the data used when constructing self:
# window width, frequency data, or a BaseIndexer subclass
# GH 16058: offset window
window = (
self._get_cov_corr_window(other) if not self.is_freq_type else self.win_freq
)
window = self._get_cov_corr_window(other)

def _get_corr(a, b):
a = a.rolling(
Expand Down Expand Up @@ -1878,9 +1864,17 @@ def validate(self):
)

# this will raise ValueError on non-fixed freqs
self.win_freq = self.window
self.window = self._determine_window_length()
self.win_type = "freq"
try:
freq = to_offset(self.window)
except (TypeError, ValueError) as err:
raise ValueError(
f"passed window {self.window} is not "
"compatible with a datetimelike index"
) from err
if isinstance(self._on, ABCPeriodIndex):
self._win_freq_i8 = freq.nanos / (self._on.freq.nanos / self._on.freq.n)
else:
self._win_freq_i8 = freq.nanos

# min_periods must be an integer
if self.min_periods is None:
Expand All @@ -1889,20 +1883,8 @@ def validate(self):
elif isinstance(self.window, BaseIndexer):
# Passed BaseIndexer subclass should handle all other rolling kwargs
return
elif not is_integer(self.window):
raise ValueError("window must be an integer")
elif self.window < 0:
raise ValueError("window must be non-negative")

def _determine_window_length(self) -> Union[int, float]:
"""
Calculate freq for PeriodIndexes based on Index freq. Can not use
nanos, because asi8 of PeriodIndex is not in nanos
"""
freq = self._validate_freq()
if isinstance(self._on, ABCPeriodIndex):
return freq.nanos / (self._on.freq.nanos / self._on.freq.n)
return freq.nanos
elif not is_integer(self.window) or self.window < 0:
raise ValueError("window must be an integer 0 or greater")

def _validate_monotonic(self):
"""
Expand All @@ -1917,18 +1899,6 @@ def _raise_monotonic_error(self):
formatted = "index"
raise ValueError(f"{formatted} must be monotonic")

def _validate_freq(self):
"""
Validate & return window frequency.
"""
try:
return to_offset(self.window)
except (TypeError, ValueError) as err:
raise ValueError(
f"passed window {self.window} is not "
"compatible with a datetimelike index"
) from err

_agg_see_also_doc = dedent(
"""
See Also
Expand Down Expand Up @@ -2138,8 +2108,9 @@ def _get_window_indexer(self) -> GroupbyIndexer:
# We'll be using the index of each group later
indexer_kwargs.pop("index_array", None)
window = 0
elif self.is_freq_type:
elif self._win_freq_i8 is not None:
rolling_indexer = VariableWindowIndexer
window = self._win_freq_i8
else:
rolling_indexer = FixedWindowIndexer
index_array = None
Expand Down
11 changes: 0 additions & 11 deletions pandas/tests/window/test_base_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,17 +73,6 @@ def get_window_bounds(self, num_values, min_periods, center, closed):
tm.assert_frame_equal(result, expected)


def test_win_type_not_implemented():
class CustomIndexer(BaseIndexer):
def get_window_bounds(self, num_values, min_periods, center, closed):
return np.array([0, 1]), np.array([1, 2])

df = DataFrame({"values": range(2)})
indexer = CustomIndexer()
with pytest.raises(NotImplementedError, match="BaseIndexer subclasses not"):
df.rolling(indexer, win_type="boxcar")


@pytest.mark.parametrize("constructor", [Series, DataFrame])
@pytest.mark.parametrize(
"func,np_func,expected,np_kwargs",
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/window/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def test_constructor(frame_or_series):

# GH 13383

msg = "window must be non-negative"
msg = "window must be an integer 0 or greater"

with pytest.raises(ValueError, match=msg):
c(-1)
Expand Down
26 changes: 24 additions & 2 deletions pandas/tests/window/test_win_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
from pandas.errors import UnsupportedFunctionCall
import pandas.util._test_decorators as td

from pandas import DataFrame, Series, concat
from pandas import DataFrame, Series, Timedelta, concat
import pandas._testing as tm
from pandas.api.indexers import BaseIndexer


@td.skip_if_no_scipy
Expand Down Expand Up @@ -89,7 +90,7 @@ def test_constructor_with_win_type_invalid(frame_or_series):
# GH 13383
c = frame_or_series(range(5)).rolling

msg = "window must be > 0"
msg = "window must be an integer 0 or greater"

with pytest.raises(ValueError, match=msg):
c(-1, win_type="boxcar")
Expand Down Expand Up @@ -117,3 +118,24 @@ def b(x):
expected.columns = ["a", "b"]
result = r.aggregate([a, b])
tm.assert_frame_equal(result, expected)


@td.skip_if_no_scipy
@pytest.mark.parametrize("arg", [2000000000, "2s", Timedelta("2s")])
def test_consistent_win_type_freq(arg):
# GH 15969
s = Series(range(1))
with pytest.raises(ValueError, match="Invalid win_type freq"):
s.rolling(arg, win_type="freq")


@td.skip_if_no_scipy
def test_win_type_not_implemented():
class CustomIndexer(BaseIndexer):
def get_window_bounds(self, num_values, min_periods, center, closed):
return np.array([0, 1]), np.array([1, 2])

df = DataFrame({"values": range(2)})
indexer = CustomIndexer()
with pytest.raises(NotImplementedError, match="BaseIndexer subclasses not"):
df.rolling(indexer, win_type="boxcar")