CLN/TYPE: window aggregation cleanups and typing (pandas-dev#30137)

mroeschke · proost · commit 4b449f7e78f3 · 2019-12-20T01:22:45.000+09:00
* Move is_monotonic_bounds to the aggregation functions

* Remove single usage of _check_min in aggregations.pyx

* remove _check_min

* Add some typing

* fix condition

* Use is_monotonic from _lib.algos

* Add inline helper function
diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx
@@ -18,7 +18,8 @@ cdef extern from "src/headers/cmath" namespace "std":
     int signbit(float64_t) nogil
     float64_t sqrt(float64_t x) nogil
 
-cimport pandas._libs.util as util
+from pandas._libs.algos import is_monotonic
+
 from pandas._libs.util cimport numeric
 
 from pandas._libs.skiplist cimport (
@@ -37,6 +38,9 @@ cdef:
 cdef inline int int_max(int a, int b): return a if a >= b else b
 cdef inline int int_min(int a, int b): return a if a <= b else b
 
+cdef inline bint is_monotonic_start_end_bounds(ndarray[int64_t, ndim=1] start,
+                                               ndarray[int64_t, ndim=1] end):
+    return is_monotonic(start, False)[0] and is_monotonic(end, False)[0]
 
 # Cython implementations of rolling sum, mean, variance, skewness,
 # other statistical moment functions
@@ -48,39 +52,6 @@ cdef inline int int_min(int a, int b): return a if a <= b else b
 #   periodically revisited to see if it's still true.
 #
 
-
-def _check_minp(win, minp, N, floor=None) -> int:
-    """
-    Parameters
-    ----------
-    win: int
-    minp: int or None
-    N: len of window
-    floor: int, optional
-        default 1
-
-    Returns
-    -------
-    minimum period
-    """
-
-    if minp is None:
-        minp = 1
-    if not util.is_integer_object(minp):
-        raise ValueError("min_periods must be an integer")
-    if minp > win:
-        raise ValueError(f"min_periods (minp) must be <= "
-                         f"window (win)")
-    elif minp > N:
-        minp = N + 1
-    elif minp < 0:
-        raise ValueError('min_periods must be >= 0')
-    if floor is None:
-        floor = 1
-
-    return max(minp, floor)
-
-
 # original C implementation by N. Devillard.
 # This code in public domain.
 # Function :   kth_smallest()
@@ -96,7 +67,6 @@ def _check_minp(win, minp, N, floor=None) -> int:
 # Physical description: 366 p.
 #               Series: Prentice-Hall Series in Automatic Computation
 
-
 # ----------------------------------------------------------------------
 # Rolling count
 # this is only an impl for index not None, IOW, freq aware
@@ -183,14 +153,15 @@ cdef inline void remove_sum(float64_t val, int64_t *nobs, float64_t *sum_x) nogi
 
 
 def roll_sum_variable(ndarray[float64_t] values, ndarray[int64_t] start,
-                      ndarray[int64_t] end, int64_t minp,
-                      bint is_monotonic_bounds=True):
+                      ndarray[int64_t] end, int64_t minp):
     cdef:
         float64_t sum_x = 0
         int64_t s, e
         int64_t nobs = 0, i, j, N = len(values)
         ndarray[float64_t] output
+        bint is_monotonic_bounds
 
+    is_monotonic_bounds = is_monotonic_start_end_bounds(start, end)
     output = np.empty(N, dtype=float)
 
     with nogil:
@@ -331,14 +302,15 @@ def roll_mean_fixed(ndarray[float64_t] values, ndarray[int64_t] start,
 
 
 def roll_mean_variable(ndarray[float64_t] values, ndarray[int64_t] start,
-                       ndarray[int64_t] end, int64_t minp,
-                       bint is_monotonic_bounds=True):
+                       ndarray[int64_t] end, int64_t minp):
     cdef:
         float64_t val, sum_x = 0
         int64_t s, e
         Py_ssize_t nobs = 0, i, j, neg_ct = 0, N = len(values)
         ndarray[float64_t] output
+        bint is_monotonic_bounds
 
+    is_monotonic_bounds = is_monotonic_start_end_bounds(start, end)
     output = np.empty(N, dtype=float)
 
     with nogil:
@@ -493,8 +465,7 @@ def roll_var_fixed(ndarray[float64_t] values, ndarray[int64_t] start,
 
 
 def roll_var_variable(ndarray[float64_t] values, ndarray[int64_t] start,
-                      ndarray[int64_t] end, int64_t minp, int ddof=1,
-                      bint is_monotonic_bounds=True):
+                      ndarray[int64_t] end, int64_t minp, int ddof=1):
     """
     Numerically stable implementation using Welford's method.
     """
@@ -504,7 +475,9 @@ def roll_var_variable(ndarray[float64_t] values, ndarray[int64_t] start,
         int64_t s, e
         Py_ssize_t i, j, N = len(values)
         ndarray[float64_t] output
+        bint is_monotonic_bounds
 
+    is_monotonic_bounds = is_monotonic_start_end_bounds(start, end)
     output = np.empty(N, dtype=float)
 
     with nogil:
@@ -641,15 +614,16 @@ def roll_skew_fixed(ndarray[float64_t] values, ndarray[int64_t] start,
 
 
 def roll_skew_variable(ndarray[float64_t] values, ndarray[int64_t] start,
-                       ndarray[int64_t] end, int64_t minp,
-                       bint is_monotonic_bounds=True):
+                       ndarray[int64_t] end, int64_t minp):
     cdef:
         float64_t val, prev
         float64_t x = 0, xx = 0, xxx = 0
         int64_t nobs = 0, i, j, N = len(values)
         int64_t s, e
         ndarray[float64_t] output
+        bint is_monotonic_bounds
 
+    is_monotonic_bounds = is_monotonic_start_end_bounds(start, end)
     output = np.empty(N, dtype=float)
 
     with nogil:
@@ -794,14 +768,15 @@ def roll_kurt_fixed(ndarray[float64_t] values, ndarray[int64_t] start,
 
 
 def roll_kurt_variable(ndarray[float64_t] values, ndarray[int64_t] start,
-                       ndarray[int64_t] end, int64_t minp,
-                       bint is_monotonic_bounds=True):
+                       ndarray[int64_t] end, int64_t minp):
     cdef:
         float64_t val, prev
         float64_t x = 0, xx = 0, xxx = 0, xxxx = 0
         int64_t nobs = 0, i, j, s, e, N = len(values)
         ndarray[float64_t] output
+        bint is_monotonic_bounds
 
+    is_monotonic_bounds = is_monotonic_start_end_bounds(start, end)
     output = np.empty(N, dtype=float)
 
     with nogil:
@@ -1030,8 +1005,7 @@ def roll_min_fixed(ndarray[float64_t] values, ndarray[int64_t] start,
 
 
 def roll_min_variable(ndarray[float64_t] values, ndarray[int64_t] start,
-                      ndarray[int64_t] end, int64_t minp,
-                      bint is_monotonic_bounds=True):
+                      ndarray[int64_t] end, int64_t minp):
     """
     Moving max of 1d array of any numeric type along axis=0 ignoring NaNs.
 
@@ -1424,10 +1398,7 @@ def roll_generic_variable(object obj,
                           ndarray[int64_t] start, ndarray[int64_t] end,
                           int64_t minp,
                           int offset, object func, bint raw,
-                          object args, object kwargs,
-                          bint is_monotonic_bounds=True):
-    # is_monotonic_bounds unused since variable algorithm doesn't calculate
-    # adds/subtracts across windows, but matches other *_variable functions
+                          object args, object kwargs):
     cdef:
         ndarray[float64_t] output, counts, bufarr
         ndarray[float64_t, cast=True] arr
@@ -1501,7 +1472,15 @@ cdef ndarray[float64_t] _roll_weighted_sum_mean(float64_t[:] values,
     if avg:
         tot_wgt = np.zeros(in_n, dtype=np.float64)
 
-    minp = _check_minp(len(weights), minp, in_n)
+    if minp > win_n:
+        raise ValueError(f"min_periods (minp) must be <= "
+                         f"window (win)")
+    elif minp > in_n:
+        minp = in_n + 1
+    elif minp < 0:
+        raise ValueError('min_periods must be >= 0')
+
+    minp = max(minp, 1)
 
     with nogil:
         if avg:
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
@@ -109,7 +109,7 @@ def _on(self):
     def is_freq_type(self) -> bool:
         return self.win_type == "freq"
 
-    def validate(self):
+    def validate(self) -> None:
         if self.center is not None and not is_bool(self.center):
             raise ValueError("center must be a boolean")
         if self.min_periods is not None and not is_integer(self.min_periods):
@@ -412,7 +412,7 @@ def _get_roll_func(self, func_name: str) -> Callable:
             )
         return window_func
 
-    def _get_cython_func_type(self, func):
+    def _get_cython_func_type(self, func: str) -> Callable:
         """
         Return a variable or fixed cython function type.
 
@@ -517,13 +517,6 @@ def calc(x):
                         center=self.center,
                         closed=self.closed,
                     )
-                    if np.any(np.diff(start) < 0) or np.any(np.diff(end) < 0):
-                        # Our "variable" algorithms assume start/end are
-                        # monotonically increasing. A custom window indexer
-                        # can produce a non monotonic start/end.
-                        return func(
-                            x, start, end, min_periods, is_monotonic_bounds=False
-                        )
                     return func(x, start, end, min_periods)
 
             else: