Refactored and cleaned up (pandas-devGH-45647)

rtpsw · rtpsw · commit a7d687895436 · 2022-01-28T04:53:09.000-05:00
diff --git a/doc/source/whatsnew/v1.4.1.rst b/doc/source/whatsnew/v1.4.1.rst
@@ -25,7 +25,7 @@ Fixed regressions
 Bug fixes
 ~~~~~~~~~
 - Fixed segfault in :meth:``DataFrame.to_json`` when dumping tz-aware datetimes in Python 3.10 (:issue:`42130`)
-- Fixed window aggregations to skip over unused elements (:issue:`45647`)
+- Fixed window aggregations in :meth:`DataFrame.rolling` and :meth:`Series.rolling` to skip over unused elements (:issue:`45647`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx
@@ -780,7 +780,7 @@ def roll_median_c(const float64_t[:] values, ndarray[int64_t] start,
         Py_ssize_t i, j
         bint err = False, is_monotonic_increasing_bounds
         int midpoint, ret = 0
-        int64_t nobs, N = len(start), s, e, win
+        int64_t nobs = 0, N = len(start), s, e, win
         float64_t val, res, prev
         skiplist_t *sl
         ndarray[float64_t] output
@@ -809,9 +809,10 @@ def roll_median_c(const float64_t[:] values, ndarray[int64_t] start,
 
             if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]:
 
-                skiplist_destroy(sl)
-                sl = skiplist_init(<int>win)
-                nobs = 0
+                if i != 0:
+                    skiplist_destroy(sl)
+                    sl = skiplist_init(<int>win)
+                    nobs = 0
                 # setup
                 for j in range(s, e):
                     val = values[j]
@@ -1088,7 +1089,7 @@ def roll_quantile(const float64_t[:] values, ndarray[int64_t] start,
             e = end[i]
 
             if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]:
-                if not is_monotonic_increasing_bounds or s >= end[i - 1]:
+                if i != 0:
                     nobs = 0
                     skiplist_destroy(skiplist)
                     skiplist = skiplist_init(<int>win)
@@ -1213,7 +1214,7 @@ def roll_rank(const float64_t[:] values, ndarray[int64_t] start,
             e = end[i]
 
             if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]:
-                if not is_monotonic_increasing_bounds or s >= end[i - 1]:
+                if i != 0:
                     nobs = 0
                     skiplist_destroy(skiplist)
                     skiplist = skiplist_init(<int>win)
diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py
@@ -2,12 +2,10 @@
     datetime,
     timedelta,
 )
-from functools import partial
 
 import numpy as np
 import pytest
 
-import pandas._libs.window.aggregations as window_aggregations
 import pandas.util._test_decorators as td
 
 from pandas import (
@@ -261,62 +259,3 @@ def frame():
         index=bdate_range(datetime(2009, 1, 1), periods=100),
         columns=np.arange(10),
     )
-
-
-def _named_func(name_and_func):
-    name, func = name_and_func
-    if not hasattr(func, "func"):
-        func = partial(func)
-    func.__name__ = name
-    return func
-
-
-@pytest.fixture(
-    params=[
-        _named_func(x)
-        for x in [
-            ("roll_sum", window_aggregations.roll_sum),
-            ("roll_mean", window_aggregations.roll_mean),
-        ]
-        + [
-            (f"roll_var({ddof})", partial(window_aggregations.roll_var, ddof=ddof))
-            for ddof in [0, 1]
-        ]
-        + [
-            ("roll_skew", window_aggregations.roll_skew),
-            ("roll_kurt", window_aggregations.roll_kurt),
-            ("roll_median_c", window_aggregations.roll_median_c),
-            ("roll_max", window_aggregations.roll_max),
-            ("roll_min", window_aggregations.roll_min),
-        ]
-        + [
-            (
-                f"roll_quantile({quantile},{interpolation})",
-                partial(
-                    window_aggregations.roll_quantile,
-                    quantile=quantile,
-                    interpolation=interpolation,
-                ),
-            )
-            for quantile in [0.0001, 0.5, 0.9999]
-            for interpolation in window_aggregations.interpolation_types
-        ]
-        + [
-            (
-                f"roll_rank({percentile},{method},{ascending})",
-                partial(
-                    window_aggregations.roll_rank,
-                    percentile=percentile,
-                    method=method,
-                    ascending=ascending,
-                ),
-            )
-            for percentile in [True, False]
-            for method in window_aggregations.rolling_rank_tiebreakers.keys()
-            for ascending in [True, False]
-        ]
-    ]
-)
-def rolling_aggregation(request):
-    """Make a named rolling aggregation function as fixture."""
-    return request.param
diff --git a/pandas/tests/window/test_cython_aggregations.py b/pandas/tests/window/test_cython_aggregations.py
@@ -0,0 +1,111 @@
+from functools import partial
+import sys
+
+import numpy as np
+import pytest
+
+import pandas._libs.window.aggregations as window_aggregations
+
+from pandas import Series
+import pandas._testing as tm
+
+
+def _get_rolling_aggregations():
+    # list pairs of name and function
+    # each function has this signature:
+    # (const float64_t[:] values, ndarray[int64_t] start,
+    #  ndarray[int64_t] end, int64_t minp) -> np.ndarray
+    named_roll_aggs = (
+        [
+            ("roll_sum", window_aggregations.roll_sum),
+            ("roll_mean", window_aggregations.roll_mean),
+        ]
+        + [
+            (f"roll_var({ddof})", partial(window_aggregations.roll_var, ddof=ddof))
+            for ddof in [0, 1]
+        ]
+        + [
+            ("roll_skew", window_aggregations.roll_skew),
+            ("roll_kurt", window_aggregations.roll_kurt),
+            ("roll_median_c", window_aggregations.roll_median_c),
+            ("roll_max", window_aggregations.roll_max),
+            ("roll_min", window_aggregations.roll_min),
+        ]
+        + [
+            (
+                f"roll_quantile({quantile},{interpolation})",
+                partial(
+                    window_aggregations.roll_quantile,
+                    quantile=quantile,
+                    interpolation=interpolation,
+                ),
+            )
+            for quantile in [0.0001, 0.5, 0.9999]
+            for interpolation in window_aggregations.interpolation_types
+        ]
+        + [
+            (
+                f"roll_rank({percentile},{method},{ascending})",
+                partial(
+                    window_aggregations.roll_rank,
+                    percentile=percentile,
+                    method=method,
+                    ascending=ascending,
+                ),
+            )
+            for percentile in [True, False]
+            for method in window_aggregations.rolling_rank_tiebreakers.keys()
+            for ascending in [True, False]
+        ]
+    )
+    # unzip to a list of 2 tuples, names and functions
+    unzipped = list(zip(*named_roll_aggs))
+    return {"ids": unzipped[0], "params": unzipped[1]}
+
+
+_rolling_aggregations = _get_rolling_aggregations()
+
+
+@pytest.fixture(
+    params=_rolling_aggregations["params"], ids=_rolling_aggregations["ids"]
+)
+def rolling_aggregation(request):
+    """Make a rolling aggregation function as fixture."""
+    return request.param
+
+
+def test_rolling_aggregation_boundary_consistency(rolling_aggregation):
+    # GH-45647
+    minp, step, width, size, selection = 0, 1, 3, 11, [2, 7]
+    values = np.arange(1, 1 + size, dtype=np.float64)
+    end = np.arange(width, size, step, dtype=np.int64)
+    start = end - width
+    selarr = np.array(selection, dtype=np.int32)
+    result = Series(rolling_aggregation(values, start[selarr], end[selarr], minp))
+    expected = Series(rolling_aggregation(values, start, end, minp)[selarr])
+    tm.assert_equal(expected, result)
+
+
+def test_rolling_aggregation_with_unused_elements(rolling_aggregation):
+    # GH-45647
+    minp, width = 0, 5  # width at least 4 for kurt
+    size = 2 * width + 5
+    values = np.arange(1, size + 1, dtype=np.float64)
+    values[width : width + 2] = sys.float_info.min
+    values[width + 2] = np.nan
+    values[width + 3 : width + 5] = sys.float_info.max
+    start = np.array([0, size - width], dtype=np.int64)
+    end = np.array([width, size], dtype=np.int64)
+    loc = np.array(
+        [j for i in range(len(start)) for j in range(start[i], end[i])],
+        dtype=np.int32,
+    )
+    result = Series(rolling_aggregation(values, start, end, minp))
+    compact_values = np.array(values[loc], dtype=np.float64)
+    compact_start = np.arange(0, len(start) * width, width, dtype=np.int64)
+    compact_end = compact_start + width
+    expected = Series(
+        rolling_aggregation(compact_values, compact_start, compact_end, minp)
+    )
+    assert np.isfinite(expected.values).all(), "Not all expected values are finite"
+    tm.assert_equal(expected, result)
diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py
@@ -2,7 +2,6 @@
     datetime,
     timedelta,
 )
-import sys
 
 import numpy as np
 import pytest
@@ -1735,38 +1734,3 @@ def test_rolling_std_neg_sqrt():
 
     b = a.ewm(span=3).std()
     assert np.isfinite(b[2:]).all()
-
-
-def test_rolling_aggregation_boundary_consistency(rolling_aggregation):
-    # GH-45647
-    minp, step, width, size, selection = 0, 1, 3, 11, [2, 7]
-    s = Series(np.arange(1, 1 + size, dtype=np.float64))
-    end = np.arange(width, size, step, dtype=np.int64)
-    start = end - width
-    selarr = np.array(selection, dtype=np.int32)
-    result = Series(rolling_aggregation(s.values, start[selarr], end[selarr], minp))
-    expected = Series(rolling_aggregation(s.values, start, end, minp)[selarr])
-    tm.assert_equal(expected, result)
-
-
-def test_rolling_aggregation_with_unused_elements(rolling_aggregation):
-    # GH-45647
-    minp, width = 0, 5  # width at least 4 for kurt
-    size = 2 * width + 5
-    s = Series(np.arange(1, size + 1, dtype=np.float64))
-    s[width : width + 2] = sys.float_info.min
-    s[width + 2] = np.nan
-    s[width + 3 : width + 5] = sys.float_info.max
-    start = np.array([0, size - width], dtype=np.int64)
-    end = np.array([width, size], dtype=np.int64)
-    loc = np.array(
-        [j for i in range(len(start)) for j in range(start[i], end[i])],
-        dtype=np.int32,
-    )
-    result = Series(rolling_aggregation(s.values, start, end, minp))
-    compact_s = np.array(s.iloc[loc], dtype=np.float64)
-    compact_start = np.arange(0, len(start) * width, width, dtype=np.int64)
-    compact_end = compact_start + width
-    expected = Series(rolling_aggregation(compact_s, compact_start, compact_end, minp))
-    assert np.isfinite(expected.values).all(), "Not all expected values are finite"
-    tm.assert_equal(expected, result)

Original file line number	Diff line number	Diff line change
`@@ -25,7 +25,7 @@ Fixed regressions`
`25`	`25`	`Bug fixes`
`26`	`26`	`~~~~~~~~~`
`27`	`27`	- Fixed segfault in :meth:``DataFrame.to_json`` when dumping tz-aware datetimes in Python 3.10 (:issue:`42130`)
`28`		-- Fixed window aggregations to skip over unused elements (:issue:`45647`)
	`28`	+- Fixed window aggregations in :meth:`DataFrame.rolling` and :meth:`Series.rolling` to skip over unused elements (:issue:`45647`)
`29`	`29`	`-`
`30`	`30`
`31`	`31`	`.. ---------------------------------------------------------------------------`