diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 1fdecbca32102..8b1588c35b4de 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -18,7 +18,8 @@ cdef extern from "src/headers/cmath" namespace "std": int signbit(float64_t) nogil float64_t sqrt(float64_t x) nogil -cimport pandas._libs.util as util +from pandas._libs.algos import is_monotonic + from pandas._libs.util cimport numeric from pandas._libs.skiplist cimport ( @@ -37,6 +38,9 @@ cdef: cdef inline int int_max(int a, int b): return a if a >= b else b cdef inline int int_min(int a, int b): return a if a <= b else b +cdef inline bint is_monotonic_start_end_bounds(ndarray[int64_t, ndim=1] start, + ndarray[int64_t, ndim=1] end): + return is_monotonic(start, False)[0] and is_monotonic(end, False)[0] # Cython implementations of rolling sum, mean, variance, skewness, # other statistical moment functions @@ -48,39 +52,6 @@ cdef inline int int_min(int a, int b): return a if a <= b else b # periodically revisited to see if it's still true. # - -def _check_minp(win, minp, N, floor=None) -> int: - """ - Parameters - ---------- - win: int - minp: int or None - N: len of window - floor: int, optional - default 1 - - Returns - ------- - minimum period - """ - - if minp is None: - minp = 1 - if not util.is_integer_object(minp): - raise ValueError("min_periods must be an integer") - if minp > win: - raise ValueError(f"min_periods (minp) must be <= " - f"window (win)") - elif minp > N: - minp = N + 1 - elif minp < 0: - raise ValueError('min_periods must be >= 0') - if floor is None: - floor = 1 - - return max(minp, floor) - - # original C implementation by N. Devillard. # This code in public domain. # Function : kth_smallest() @@ -96,7 +67,6 @@ def _check_minp(win, minp, N, floor=None) -> int: # Physical description: 366 p. # Series: Prentice-Hall Series in Automatic Computation - # ---------------------------------------------------------------------- # Rolling count # this is only an impl for index not None, IOW, freq aware @@ -183,14 +153,15 @@ cdef inline void remove_sum(float64_t val, int64_t *nobs, float64_t *sum_x) nogi def roll_sum_variable(ndarray[float64_t] values, ndarray[int64_t] start, - ndarray[int64_t] end, int64_t minp, - bint is_monotonic_bounds=True): + ndarray[int64_t] end, int64_t minp): cdef: float64_t sum_x = 0 int64_t s, e int64_t nobs = 0, i, j, N = len(values) ndarray[float64_t] output + bint is_monotonic_bounds + is_monotonic_bounds = is_monotonic_start_end_bounds(start, end) output = np.empty(N, dtype=float) with nogil: @@ -331,14 +302,15 @@ def roll_mean_fixed(ndarray[float64_t] values, ndarray[int64_t] start, def roll_mean_variable(ndarray[float64_t] values, ndarray[int64_t] start, - ndarray[int64_t] end, int64_t minp, - bint is_monotonic_bounds=True): + ndarray[int64_t] end, int64_t minp): cdef: float64_t val, sum_x = 0 int64_t s, e Py_ssize_t nobs = 0, i, j, neg_ct = 0, N = len(values) ndarray[float64_t] output + bint is_monotonic_bounds + is_monotonic_bounds = is_monotonic_start_end_bounds(start, end) output = np.empty(N, dtype=float) with nogil: @@ -493,8 +465,7 @@ def roll_var_fixed(ndarray[float64_t] values, ndarray[int64_t] start, def roll_var_variable(ndarray[float64_t] values, ndarray[int64_t] start, - ndarray[int64_t] end, int64_t minp, int ddof=1, - bint is_monotonic_bounds=True): + ndarray[int64_t] end, int64_t minp, int ddof=1): """ Numerically stable implementation using Welford's method. """ @@ -504,7 +475,9 @@ def roll_var_variable(ndarray[float64_t] values, ndarray[int64_t] start, int64_t s, e Py_ssize_t i, j, N = len(values) ndarray[float64_t] output + bint is_monotonic_bounds + is_monotonic_bounds = is_monotonic_start_end_bounds(start, end) output = np.empty(N, dtype=float) with nogil: @@ -641,15 +614,16 @@ def roll_skew_fixed(ndarray[float64_t] values, ndarray[int64_t] start, def roll_skew_variable(ndarray[float64_t] values, ndarray[int64_t] start, - ndarray[int64_t] end, int64_t minp, - bint is_monotonic_bounds=True): + ndarray[int64_t] end, int64_t minp): cdef: float64_t val, prev float64_t x = 0, xx = 0, xxx = 0 int64_t nobs = 0, i, j, N = len(values) int64_t s, e ndarray[float64_t] output + bint is_monotonic_bounds + is_monotonic_bounds = is_monotonic_start_end_bounds(start, end) output = np.empty(N, dtype=float) with nogil: @@ -794,14 +768,15 @@ def roll_kurt_fixed(ndarray[float64_t] values, ndarray[int64_t] start, def roll_kurt_variable(ndarray[float64_t] values, ndarray[int64_t] start, - ndarray[int64_t] end, int64_t minp, - bint is_monotonic_bounds=True): + ndarray[int64_t] end, int64_t minp): cdef: float64_t val, prev float64_t x = 0, xx = 0, xxx = 0, xxxx = 0 int64_t nobs = 0, i, j, s, e, N = len(values) ndarray[float64_t] output + bint is_monotonic_bounds + is_monotonic_bounds = is_monotonic_start_end_bounds(start, end) output = np.empty(N, dtype=float) with nogil: @@ -1030,8 +1005,7 @@ def roll_min_fixed(ndarray[float64_t] values, ndarray[int64_t] start, def roll_min_variable(ndarray[float64_t] values, ndarray[int64_t] start, - ndarray[int64_t] end, int64_t minp, - bint is_monotonic_bounds=True): + ndarray[int64_t] end, int64_t minp): """ Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. @@ -1424,10 +1398,7 @@ def roll_generic_variable(object obj, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp, int offset, object func, bint raw, - object args, object kwargs, - bint is_monotonic_bounds=True): - # is_monotonic_bounds unused since variable algorithm doesn't calculate - # adds/subtracts across windows, but matches other *_variable functions + object args, object kwargs): cdef: ndarray[float64_t] output, counts, bufarr ndarray[float64_t, cast=True] arr @@ -1501,7 +1472,15 @@ cdef ndarray[float64_t] _roll_weighted_sum_mean(float64_t[:] values, if avg: tot_wgt = np.zeros(in_n, dtype=np.float64) - minp = _check_minp(len(weights), minp, in_n) + if minp > win_n: + raise ValueError(f"min_periods (minp) must be <= " + f"window (win)") + elif minp > in_n: + minp = in_n + 1 + elif minp < 0: + raise ValueError('min_periods must be >= 0') + + minp = max(minp, 1) with nogil: if avg: diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 9f804584f532a..937da86e4df40 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -109,7 +109,7 @@ def _on(self): def is_freq_type(self) -> bool: return self.win_type == "freq" - def validate(self): + def validate(self) -> None: if self.center is not None and not is_bool(self.center): raise ValueError("center must be a boolean") if self.min_periods is not None and not is_integer(self.min_periods): @@ -412,7 +412,7 @@ def _get_roll_func(self, func_name: str) -> Callable: ) return window_func - def _get_cython_func_type(self, func): + def _get_cython_func_type(self, func: str) -> Callable: """ Return a variable or fixed cython function type. @@ -517,13 +517,6 @@ def calc(x): center=self.center, closed=self.closed, ) - if np.any(np.diff(start) < 0) or np.any(np.diff(end) < 0): - # Our "variable" algorithms assume start/end are - # monotonically increasing. A custom window indexer - # can produce a non monotonic start/end. - return func( - x, start, end, min_periods, is_monotonic_bounds=False - ) return func(x, start, end, min_periods) else: