CLN: rolling.py and window/aggregations.pyx (#38768)

mroeschke · web-flow · commit af428c952b4f · 2020-12-29T09:05:56.000-05:00
diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx
@@ -55,39 +55,11 @@ cdef:
 
     float64_t NaN = <float64_t>np.NaN
 
-cdef inline int int_max(int a, int b): return a if a >= b else b
-cdef inline int int_min(int a, int b): return a if a <= b else b
-
 cdef bint is_monotonic_increasing_start_end_bounds(
     ndarray[int64_t, ndim=1] start, ndarray[int64_t, ndim=1] end
 ):
     return is_monotonic(start, False)[0] and is_monotonic(end, False)[0]
 
-# Cython implementations of rolling sum, mean, variance, skewness,
-# other statistical moment functions
-#
-# Misc implementation notes
-# -------------------------
-#
-# - In Cython x * x is faster than x ** 2 for C types, this should be
-#   periodically revisited to see if it's still true.
-#
-
-# original C implementation by N. Devillard.
-# This code in public domain.
-# Function :   kth_smallest()
-# In       :   array of elements, # of elements in the array, rank k
-# Out      :   one element
-# Job      :   find the kth smallest element in the array
-
-#             Reference:
-
-#               Author: Wirth, Niklaus
-#                Title: Algorithms + data structures = programs
-#            Publisher: Englewood Cliffs: Prentice-Hall, 1976
-# Physical description: 366 p.
-#               Series: Prentice-Hall Series in Automatic Computation
-
 # ----------------------------------------------------------------------
 # Rolling sum
 
@@ -774,7 +746,6 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
 
 def roll_median_c(const float64_t[:] values, ndarray[int64_t] start,
                   ndarray[int64_t] end, int64_t minp):
-    # GH 32865. win argument kept for compatibility
     cdef:
         float64_t val, res, prev
         bint err = False
@@ -1167,9 +1138,8 @@ def roll_apply(object obj,
     arr = np.asarray(obj)
 
     # ndarray input
-    if raw:
-        if not arr.flags.c_contiguous:
-            arr = arr.copy('C')
+    if raw and not arr.flags.c_contiguous:
+        arr = arr.copy('C')
 
     counts = roll_sum(np.isfinite(arr).astype(float), start, end, minp)
 
@@ -1195,17 +1165,17 @@ def roll_apply(object obj,
 # Rolling sum and mean for weighted window
 
 
-def roll_weighted_sum(float64_t[:] values, float64_t[:] weights, int minp):
+def roll_weighted_sum(const float64_t[:] values, const float64_t[:] weights, int minp):
     return _roll_weighted_sum_mean(values, weights, minp, avg=0)
 
 
-def roll_weighted_mean(float64_t[:] values, float64_t[:] weights, int minp):
+def roll_weighted_mean(const float64_t[:] values, const float64_t[:] weights, int minp):
     return _roll_weighted_sum_mean(values, weights, minp, avg=1)
 
 
-cdef ndarray[float64_t] _roll_weighted_sum_mean(float64_t[:] values,
-                                                float64_t[:] weights,
-                                                int minp, bint avg):
+cdef float64_t[:] _roll_weighted_sum_mean(const float64_t[:] values,
+                                          const float64_t[:] weights,
+                                          int minp, bint avg):
     """
     Assume len(weights) << len(values)
     """
@@ -1270,7 +1240,7 @@ cdef ndarray[float64_t] _roll_weighted_sum_mean(float64_t[:] values,
                 if c < minp:
                     output[in_i] = NaN
 
-    return np.asarray(output)
+    return output
 
 
 # ----------------------------------------------------------------------
@@ -1424,7 +1394,7 @@ cdef inline void remove_weighted_var(float64_t val,
             mean[0] = 0
 
 
-def roll_weighted_var(float64_t[:] values, float64_t[:] weights,
+def roll_weighted_var(const float64_t[:] values, const float64_t[:] weights,
                       int64_t minp, unsigned int ddof):
     """
     Calculates weighted rolling variance using West's online algorithm.
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
@@ -174,9 +174,8 @@ def _create_data(self, obj: FrameOrSeries) -> FrameOrSeries:
         Split data into blocks & return conformed data.
         """
         # filter out the on from the object
-        if self.on is not None and not isinstance(self.on, Index):
-            if obj.ndim == 2:
-                obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False)
+        if self.on is not None and not isinstance(self.on, Index) and obj.ndim == 2:
+            obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False)
         if self.axis == 1:
             # GH: 20649 in case of mixed dtype and axis=1 we have to convert everything
             # to float to calculate the complete row at once. We exclude all non-numeric
@@ -238,10 +237,6 @@ def _get_cov_corr_window(
         """
         return self.window
 
-    @property
-    def _window_type(self) -> str:
-        return type(self).__name__
-
     def __repr__(self) -> str:
         """
         Provide a nice str repr of our rolling object.
@@ -252,7 +247,7 @@ def __repr__(self) -> str:
             if getattr(self, attr_name, None) is not None
         )
         attrs = ",".join(attrs_list)
-        return f"{self._window_type} [{attrs}]"
+        return f"{type(self).__name__} [{attrs}]"
 
     def __iter__(self):
         obj = self._create_data(self._selected_obj)
@@ -278,7 +273,7 @@ def _prep_values(self, values: Optional[np.ndarray] = None) -> np.ndarray:
 
         if needs_i8_conversion(values.dtype):
             raise NotImplementedError(
-                f"ops for {self._window_type} for this "
+                f"ops for {type(self).__name__} for this "
                 f"dtype {values.dtype} are not implemented"
             )
         else:
@@ -464,7 +459,6 @@ def calc(x):
                     result = np.apply_along_axis(calc, self.axis, values)
                 else:
                     result = calc(values)
-                    result = np.asarray(result)
 
             if numba_cache_key is not None:
                 NUMBA_FUNC_CACHE[numba_cache_key] = func
@@ -1102,8 +1096,8 @@ def calc(x):
                 if values.ndim > 1:
                     result = np.apply_along_axis(calc, self.axis, values)
                 else:
-                    result = calc(values)
-                    result = np.asarray(result)
+                    # Our weighted aggregations return memoryviews
+                    result = np.asarray(calc(values))
 
             if self.center:
                 result = self._center_window(result, offset)
@@ -2158,7 +2152,7 @@ def _validate_monotonic(self):
         """
         Validate that on is monotonic;
         in this case we have to check only for nans, because
-        monotonicy was already validated at a higher level.
+        monotonicity was already validated at a higher level.
         """
         if self._on.hasnans:
             self._raise_monotonic_error()