From 07c00ed975a5fc2c0881286c4b49fad4b4a0adf7 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 7 Dec 2019 12:39:39 -0800 Subject: [PATCH 1/7] Move is_monotonic_bounds to the aggregation functions --- pandas/_libs/window/aggregations.pyx | 33 ++++++++++++++-------------- pandas/core/window/rolling.py | 7 ------ 2 files changed, 17 insertions(+), 23 deletions(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 1fdecbca32102..3ceba62648237 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -183,14 +183,15 @@ cdef inline void remove_sum(float64_t val, int64_t *nobs, float64_t *sum_x) nogi def roll_sum_variable(ndarray[float64_t] values, ndarray[int64_t] start, - ndarray[int64_t] end, int64_t minp, - bint is_monotonic_bounds=True): + ndarray[int64_t] end, int64_t minp): cdef: float64_t sum_x = 0 int64_t s, e int64_t nobs = 0, i, j, N = len(values) ndarray[float64_t] output + bint is_monotonic_bounds + is_monotonic_bounds = np.any(np.diff(start) < 0) or np.any(np.diff(end) < 0) output = np.empty(N, dtype=float) with nogil: @@ -331,14 +332,15 @@ def roll_mean_fixed(ndarray[float64_t] values, ndarray[int64_t] start, def roll_mean_variable(ndarray[float64_t] values, ndarray[int64_t] start, - ndarray[int64_t] end, int64_t minp, - bint is_monotonic_bounds=True): + ndarray[int64_t] end, int64_t minp): cdef: float64_t val, sum_x = 0 int64_t s, e Py_ssize_t nobs = 0, i, j, neg_ct = 0, N = len(values) ndarray[float64_t] output + bint is_monotonic_bounds + is_monotonic_bounds = np.any(np.diff(start) < 0) or np.any(np.diff(end) < 0) output = np.empty(N, dtype=float) with nogil: @@ -493,8 +495,7 @@ def roll_var_fixed(ndarray[float64_t] values, ndarray[int64_t] start, def roll_var_variable(ndarray[float64_t] values, ndarray[int64_t] start, - ndarray[int64_t] end, int64_t minp, int ddof=1, - bint is_monotonic_bounds=True): + ndarray[int64_t] end, int64_t minp, int ddof=1): """ Numerically stable implementation using Welford's method. """ @@ -504,7 +505,9 @@ def roll_var_variable(ndarray[float64_t] values, ndarray[int64_t] start, int64_t s, e Py_ssize_t i, j, N = len(values) ndarray[float64_t] output + bint is_monotonic_bounds + is_monotonic_bounds = np.any(np.diff(start) < 0) or np.any(np.diff(end) < 0) output = np.empty(N, dtype=float) with nogil: @@ -641,15 +644,16 @@ def roll_skew_fixed(ndarray[float64_t] values, ndarray[int64_t] start, def roll_skew_variable(ndarray[float64_t] values, ndarray[int64_t] start, - ndarray[int64_t] end, int64_t minp, - bint is_monotonic_bounds=True): + ndarray[int64_t] end, int64_t minp): cdef: float64_t val, prev float64_t x = 0, xx = 0, xxx = 0 int64_t nobs = 0, i, j, N = len(values) int64_t s, e ndarray[float64_t] output + bint is_monotonic_bounds + is_monotonic_bounds = np.any(np.diff(start) < 0) or np.any(np.diff(end) < 0) output = np.empty(N, dtype=float) with nogil: @@ -794,14 +798,15 @@ def roll_kurt_fixed(ndarray[float64_t] values, ndarray[int64_t] start, def roll_kurt_variable(ndarray[float64_t] values, ndarray[int64_t] start, - ndarray[int64_t] end, int64_t minp, - bint is_monotonic_bounds=True): + ndarray[int64_t] end, int64_t minp): cdef: float64_t val, prev float64_t x = 0, xx = 0, xxx = 0, xxxx = 0 int64_t nobs = 0, i, j, s, e, N = len(values) ndarray[float64_t] output + bint is_monotonic_bounds + is_monotonic_bounds = np.any(np.diff(start) < 0) or np.any(np.diff(end) < 0) output = np.empty(N, dtype=float) with nogil: @@ -1030,8 +1035,7 @@ def roll_min_fixed(ndarray[float64_t] values, ndarray[int64_t] start, def roll_min_variable(ndarray[float64_t] values, ndarray[int64_t] start, - ndarray[int64_t] end, int64_t minp, - bint is_monotonic_bounds=True): + ndarray[int64_t] end, int64_t minp): """ Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. @@ -1424,10 +1428,7 @@ def roll_generic_variable(object obj, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp, int offset, object func, bint raw, - object args, object kwargs, - bint is_monotonic_bounds=True): - # is_monotonic_bounds unused since variable algorithm doesn't calculate - # adds/subtracts across windows, but matches other *_variable functions + object args, object kwargs): cdef: ndarray[float64_t] output, counts, bufarr ndarray[float64_t, cast=True] arr diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 9f804584f532a..2182871119120 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -517,13 +517,6 @@ def calc(x): center=self.center, closed=self.closed, ) - if np.any(np.diff(start) < 0) or np.any(np.diff(end) < 0): - # Our "variable" algorithms assume start/end are - # monotonically increasing. A custom window indexer - # can produce a non monotonic start/end. - return func( - x, start, end, min_periods, is_monotonic_bounds=False - ) return func(x, start, end, min_periods) else: From ba95bd60094056fd1ba1b7c16c4fc98df3828d66 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 7 Dec 2019 12:47:22 -0800 Subject: [PATCH 2/7] Remove single usage of _check_min in aggregations.pyx --- pandas/_libs/window/aggregations.pyx | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 3ceba62648237..20d8efd14873b 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -1502,7 +1502,15 @@ cdef ndarray[float64_t] _roll_weighted_sum_mean(float64_t[:] values, if avg: tot_wgt = np.zeros(in_n, dtype=np.float64) - minp = _check_minp(len(weights), minp, in_n) + if minp > win_n: + raise ValueError(f"min_periods (minp) must be <= " + f"window (win)") + elif minp > in_n: + minp = in_n + 1 + elif minp < 0: + raise ValueError('min_periods must be >= 0') + + minp = max(minp, 1) with nogil: if avg: From a73d7b380776b44e56ef03b2c8b4281f47fe9915 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 7 Dec 2019 12:48:29 -0800 Subject: [PATCH 3/7] remove _check_min --- pandas/_libs/window/aggregations.pyx | 34 ---------------------------- 1 file changed, 34 deletions(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 20d8efd14873b..1910495b089ab 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -18,7 +18,6 @@ cdef extern from "src/headers/cmath" namespace "std": int signbit(float64_t) nogil float64_t sqrt(float64_t x) nogil -cimport pandas._libs.util as util from pandas._libs.util cimport numeric from pandas._libs.skiplist cimport ( @@ -48,39 +47,6 @@ cdef inline int int_min(int a, int b): return a if a <= b else b # periodically revisited to see if it's still true. # - -def _check_minp(win, minp, N, floor=None) -> int: - """ - Parameters - ---------- - win: int - minp: int or None - N: len of window - floor: int, optional - default 1 - - Returns - ------- - minimum period - """ - - if minp is None: - minp = 1 - if not util.is_integer_object(minp): - raise ValueError("min_periods must be an integer") - if minp > win: - raise ValueError(f"min_periods (minp) must be <= " - f"window (win)") - elif minp > N: - minp = N + 1 - elif minp < 0: - raise ValueError('min_periods must be >= 0') - if floor is None: - floor = 1 - - return max(minp, floor) - - # original C implementation by N. Devillard. # This code in public domain. # Function : kth_smallest() From 32015b25ed0ff2da35f7e158621303ba96789ed1 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 7 Dec 2019 16:05:04 -0800 Subject: [PATCH 4/7] Add some typing --- pandas/core/window/rolling.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 2182871119120..937da86e4df40 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -109,7 +109,7 @@ def _on(self): def is_freq_type(self) -> bool: return self.win_type == "freq" - def validate(self): + def validate(self) -> None: if self.center is not None and not is_bool(self.center): raise ValueError("center must be a boolean") if self.min_periods is not None and not is_integer(self.min_periods): @@ -412,7 +412,7 @@ def _get_roll_func(self, func_name: str) -> Callable: ) return window_func - def _get_cython_func_type(self, func): + def _get_cython_func_type(self, func: str) -> Callable: """ Return a variable or fixed cython function type. From e776c678ff5993ece3335d0edd2b8e03dac5c435 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 7 Dec 2019 16:23:36 -0800 Subject: [PATCH 5/7] fix condition --- pandas/_libs/window/aggregations.pyx | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 1910495b089ab..9b954d76ca6c4 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -157,7 +157,7 @@ def roll_sum_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[float64_t] output bint is_monotonic_bounds - is_monotonic_bounds = np.any(np.diff(start) < 0) or np.any(np.diff(end) < 0) + is_monotonic_bounds = np.all(np.diff(start) > 0) and np.all(np.diff(end) > 0) output = np.empty(N, dtype=float) with nogil: @@ -306,7 +306,7 @@ def roll_mean_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[float64_t] output bint is_monotonic_bounds - is_monotonic_bounds = np.any(np.diff(start) < 0) or np.any(np.diff(end) < 0) + is_monotonic_bounds = np.all(np.diff(start) > 0) and np.all(np.diff(end) > 0) output = np.empty(N, dtype=float) with nogil: @@ -473,7 +473,7 @@ def roll_var_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[float64_t] output bint is_monotonic_bounds - is_monotonic_bounds = np.any(np.diff(start) < 0) or np.any(np.diff(end) < 0) + is_monotonic_bounds = np.all(np.diff(start) > 0) and np.all(np.diff(end) > 0) output = np.empty(N, dtype=float) with nogil: @@ -619,7 +619,7 @@ def roll_skew_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[float64_t] output bint is_monotonic_bounds - is_monotonic_bounds = np.any(np.diff(start) < 0) or np.any(np.diff(end) < 0) + is_monotonic_bounds = np.all(np.diff(start) > 0) and np.all(np.diff(end) > 0) output = np.empty(N, dtype=float) with nogil: @@ -772,7 +772,7 @@ def roll_kurt_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[float64_t] output bint is_monotonic_bounds - is_monotonic_bounds = np.any(np.diff(start) < 0) or np.any(np.diff(end) < 0) + is_monotonic_bounds = np.all(np.diff(start) > 0) and np.all(np.diff(end) > 0) output = np.empty(N, dtype=float) with nogil: From 636166b74327d96ea0e2578877ad9eca02576518 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 8 Dec 2019 10:57:23 -0800 Subject: [PATCH 6/7] Use is_monotonic from _lib.algos --- pandas/_libs/window/aggregations.pyx | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 9b954d76ca6c4..a998bd0c684d0 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -18,6 +18,8 @@ cdef extern from "src/headers/cmath" namespace "std": int signbit(float64_t) nogil float64_t sqrt(float64_t x) nogil +from pandas._libs.algos import is_monotonic + from pandas._libs.util cimport numeric from pandas._libs.skiplist cimport ( @@ -157,7 +159,7 @@ def roll_sum_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[float64_t] output bint is_monotonic_bounds - is_monotonic_bounds = np.all(np.diff(start) > 0) and np.all(np.diff(end) > 0) + is_monotonic_bounds = is_monotonic(start, False)[0] and is_monotonic(end, False)[0] output = np.empty(N, dtype=float) with nogil: @@ -306,7 +308,7 @@ def roll_mean_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[float64_t] output bint is_monotonic_bounds - is_monotonic_bounds = np.all(np.diff(start) > 0) and np.all(np.diff(end) > 0) + is_monotonic_bounds = is_monotonic(start, False)[0] and is_monotonic(end, False)[0] output = np.empty(N, dtype=float) with nogil: @@ -473,7 +475,7 @@ def roll_var_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[float64_t] output bint is_monotonic_bounds - is_monotonic_bounds = np.all(np.diff(start) > 0) and np.all(np.diff(end) > 0) + is_monotonic_bounds = is_monotonic(start, False)[0] and is_monotonic(end, False)[0] output = np.empty(N, dtype=float) with nogil: @@ -619,7 +621,7 @@ def roll_skew_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[float64_t] output bint is_monotonic_bounds - is_monotonic_bounds = np.all(np.diff(start) > 0) and np.all(np.diff(end) > 0) + is_monotonic_bounds = is_monotonic(start, False)[0] and is_monotonic(end, False)[0] output = np.empty(N, dtype=float) with nogil: @@ -772,7 +774,7 @@ def roll_kurt_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[float64_t] output bint is_monotonic_bounds - is_monotonic_bounds = np.all(np.diff(start) > 0) and np.all(np.diff(end) > 0) + is_monotonic_bounds = is_monotonic(start, False)[0] and is_monotonic(end, False)[0] output = np.empty(N, dtype=float) with nogil: From 259b8b42dbb7b2ae9c160d866e790e1c81ec1ccf Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 8 Dec 2019 14:05:16 -0800 Subject: [PATCH 7/7] Add inline helper function --- pandas/_libs/window/aggregations.pyx | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index a998bd0c684d0..8b1588c35b4de 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -38,6 +38,9 @@ cdef: cdef inline int int_max(int a, int b): return a if a >= b else b cdef inline int int_min(int a, int b): return a if a <= b else b +cdef inline bint is_monotonic_start_end_bounds(ndarray[int64_t, ndim=1] start, + ndarray[int64_t, ndim=1] end): + return is_monotonic(start, False)[0] and is_monotonic(end, False)[0] # Cython implementations of rolling sum, mean, variance, skewness, # other statistical moment functions @@ -64,7 +67,6 @@ cdef inline int int_min(int a, int b): return a if a <= b else b # Physical description: 366 p. # Series: Prentice-Hall Series in Automatic Computation - # ---------------------------------------------------------------------- # Rolling count # this is only an impl for index not None, IOW, freq aware @@ -159,7 +161,7 @@ def roll_sum_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[float64_t] output bint is_monotonic_bounds - is_monotonic_bounds = is_monotonic(start, False)[0] and is_monotonic(end, False)[0] + is_monotonic_bounds = is_monotonic_start_end_bounds(start, end) output = np.empty(N, dtype=float) with nogil: @@ -308,7 +310,7 @@ def roll_mean_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[float64_t] output bint is_monotonic_bounds - is_monotonic_bounds = is_monotonic(start, False)[0] and is_monotonic(end, False)[0] + is_monotonic_bounds = is_monotonic_start_end_bounds(start, end) output = np.empty(N, dtype=float) with nogil: @@ -475,7 +477,7 @@ def roll_var_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[float64_t] output bint is_monotonic_bounds - is_monotonic_bounds = is_monotonic(start, False)[0] and is_monotonic(end, False)[0] + is_monotonic_bounds = is_monotonic_start_end_bounds(start, end) output = np.empty(N, dtype=float) with nogil: @@ -621,7 +623,7 @@ def roll_skew_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[float64_t] output bint is_monotonic_bounds - is_monotonic_bounds = is_monotonic(start, False)[0] and is_monotonic(end, False)[0] + is_monotonic_bounds = is_monotonic_start_end_bounds(start, end) output = np.empty(N, dtype=float) with nogil: @@ -774,7 +776,7 @@ def roll_kurt_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[float64_t] output bint is_monotonic_bounds - is_monotonic_bounds = is_monotonic(start, False)[0] and is_monotonic(end, False)[0] + is_monotonic_bounds = is_monotonic_start_end_bounds(start, end) output = np.empty(N, dtype=float) with nogil: