diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index b50eaf800533a..2c315ca13e563 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -58,7 +58,7 @@ cdef: cdef inline int int_max(int a, int b): return a if a >= b else b cdef inline int int_min(int a, int b): return a if a <= b else b -cdef bint is_monotonic_start_end_bounds( +cdef bint is_monotonic_increasing_start_end_bounds( ndarray[int64_t, ndim=1] start, ndarray[int64_t, ndim=1] end ): return is_monotonic(start, False)[0] and is_monotonic(end, False)[0] @@ -143,9 +143,11 @@ def roll_sum(ndarray[float64_t] values, ndarray[int64_t] start, int64_t s, e int64_t nobs = 0, i, j, N = len(values) ndarray[float64_t] output - bint is_monotonic_bounds + bint is_monotonic_increasing_bounds - is_monotonic_bounds = is_monotonic_start_end_bounds(start, end) + is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds( + start, end + ) output = np.empty(N, dtype=float) with nogil: @@ -154,7 +156,7 @@ def roll_sum(ndarray[float64_t] values, ndarray[int64_t] start, s = start[i] e = end[i] - if i == 0 or not is_monotonic_bounds: + if i == 0 or not is_monotonic_increasing_bounds: # setup @@ -173,9 +175,10 @@ def roll_sum(ndarray[float64_t] values, ndarray[int64_t] start, output[i] = calc_sum(minp, nobs, sum_x) - if not is_monotonic_bounds: - for j in range(s, e): - remove_sum(values[j], &nobs, &sum_x, &compensation_remove) + if not is_monotonic_increasing_bounds: + nobs = 0 + sum_x = 0.0 + compensation_remove = 0.0 return output @@ -244,9 +247,11 @@ def roll_mean(ndarray[float64_t] values, ndarray[int64_t] start, int64_t s, e Py_ssize_t nobs = 0, i, j, neg_ct = 0, N = len(values) ndarray[float64_t] output - bint is_monotonic_bounds + bint is_monotonic_increasing_bounds - is_monotonic_bounds = is_monotonic_start_end_bounds(start, end) + is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds( + start, end + ) output = np.empty(N, dtype=float) with nogil: @@ -255,7 +260,7 @@ def roll_mean(ndarray[float64_t] values, ndarray[int64_t] start, s = start[i] e = end[i] - if i == 0 or not is_monotonic_bounds: + if i == 0 or not is_monotonic_increasing_bounds: # setup for j in range(s, e): @@ -276,10 +281,11 @@ def roll_mean(ndarray[float64_t] values, ndarray[int64_t] start, output[i] = calc_mean(minp, nobs, neg_ct, sum_x) - if not is_monotonic_bounds: - for j in range(s, e): - val = values[j] - remove_mean(val, &nobs, &sum_x, &neg_ct, &compensation_remove) + if not is_monotonic_increasing_bounds: + nobs = 0 + neg_ct = 0 + sum_x = 0.0 + compensation_remove = 0.0 return output # ---------------------------------------------------------------------- @@ -367,10 +373,12 @@ def roll_var(ndarray[float64_t] values, ndarray[int64_t] start, int64_t s, e Py_ssize_t i, j, N = len(values) ndarray[float64_t] output - bint is_monotonic_bounds + bint is_monotonic_increasing_bounds minp = max(minp, 1) - is_monotonic_bounds = is_monotonic_start_end_bounds(start, end) + is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds( + start, end + ) output = np.empty(N, dtype=float) with nogil: @@ -382,7 +390,7 @@ def roll_var(ndarray[float64_t] values, ndarray[int64_t] start, # Over the first window, observations can only be added # never removed - if i == 0 or not is_monotonic_bounds: + if i == 0 or not is_monotonic_increasing_bounds: for j in range(s, e): add_var(values[j], &nobs, &mean_x, &ssqdm_x, &compensation_add) @@ -403,10 +411,11 @@ def roll_var(ndarray[float64_t] values, ndarray[int64_t] start, output[i] = calc_var(minp, ddof, nobs, ssqdm_x) - if not is_monotonic_bounds: - for j in range(s, e): - remove_var(values[j], &nobs, &mean_x, &ssqdm_x, - &compensation_remove) + if not is_monotonic_increasing_bounds: + nobs = 0.0 + mean_x = 0.0 + ssqdm_x = 0.0 + compensation_remove = 0.0 return output @@ -486,10 +495,12 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start, int64_t nobs = 0, i, j, N = len(values) int64_t s, e ndarray[float64_t] output - bint is_monotonic_bounds + bint is_monotonic_increasing_bounds minp = max(minp, 3) - is_monotonic_bounds = is_monotonic_start_end_bounds(start, end) + is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds( + start, end + ) output = np.empty(N, dtype=float) with nogil: @@ -501,7 +512,7 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start, # Over the first window, observations can only be added # never removed - if i == 0 or not is_monotonic_bounds: + if i == 0 or not is_monotonic_increasing_bounds: for j in range(s, e): val = values[j] @@ -524,10 +535,11 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start, output[i] = calc_skew(minp, nobs, x, xx, xxx) - if not is_monotonic_bounds: - for j in range(s, e): - val = values[j] - remove_skew(val, &nobs, &x, &xx, &xxx) + if not is_monotonic_increasing_bounds: + nobs = 0 + x = 0.0 + xx = 0.0 + xxx = 0.0 return output @@ -611,10 +623,12 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start, float64_t x = 0, xx = 0, xxx = 0, xxxx = 0 int64_t nobs = 0, i, j, s, e, N = len(values) ndarray[float64_t] output - bint is_monotonic_bounds + bint is_monotonic_increasing_bounds minp = max(minp, 4) - is_monotonic_bounds = is_monotonic_start_end_bounds(start, end) + is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds( + start, end + ) output = np.empty(N, dtype=float) with nogil: @@ -626,7 +640,7 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start, # Over the first window, observations can only be added # never removed - if i == 0 or not is_monotonic_bounds: + if i == 0 or not is_monotonic_increasing_bounds: for j in range(s, e): add_kurt(values[j], &nobs, &x, &xx, &xxx, &xxxx) @@ -646,9 +660,12 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start, output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx) - if not is_monotonic_bounds: - for j in range(s, e): - remove_kurt(values[j], &nobs, &x, &xx, &xxx, &xxxx) + if not is_monotonic_increasing_bounds: + nobs = 0 + x = 0.0 + xx = 0.0 + xxx = 0.0 + xxxx = 0.0 return output diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 9bba6d084f9c9..e919812be9fce 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -917,3 +917,98 @@ def test_rolling_var_numerical_issues(func, third_value, values): result = getattr(ds.rolling(2), func)() expected = Series([np.nan] + values) tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("method", ["var", "sum", "mean", "skew", "kurt", "min", "max"]) +def test_rolling_decreasing_indices(method): + """ + Make sure that decreasing indices give the same results as increasing indices. + + GH 36933 + """ + df = DataFrame({"values": np.arange(-15, 10) ** 2}) + df_reverse = DataFrame({"values": df["values"][::-1]}, index=df.index[::-1]) + + increasing = getattr(df.rolling(window=5), method)() + decreasing = getattr(df_reverse.rolling(window=5), method)() + + assert np.abs(decreasing.values[::-1][:-4] - increasing.values[4:]).max() < 1e-12 + + +@pytest.mark.parametrize( + "method,expected", + [ + ( + "var", + [ + float("nan"), + 43.0, + float("nan"), + 136.333333, + 43.5, + 94.966667, + 182.0, + 318.0, + ], + ), + ("mean", [float("nan"), 7.5, float("nan"), 21.5, 6.0, 9.166667, 13.0, 17.5]), + ("sum", [float("nan"), 30.0, float("nan"), 86.0, 30.0, 55.0, 91.0, 140.0]), + ( + "skew", + [ + float("nan"), + 0.709296, + float("nan"), + 0.407073, + 0.984656, + 0.919184, + 0.874674, + 0.842418, + ], + ), + ( + "kurt", + [ + float("nan"), + -0.5916711736073559, + float("nan"), + -1.0028993131317954, + -0.06103844629409494, + -0.254143227116194, + -0.37362637362637585, + -0.45439658241367054, + ], + ), + ], +) +def test_rolling_non_monotonic(method, expected): + """ + Make sure the (rare) branch of non-monotonic indices is covered by a test. + + output from 1.1.3 is assumed to be the expected output. Output of sum/mean has + manually been verified. + + GH 36933. + """ + # Based on an example found in computation.rst + use_expanding = [True, False, True, False, True, True, True, True] + df = DataFrame({"values": np.arange(len(use_expanding)) ** 2}) + + class CustomIndexer(pd.api.indexers.BaseIndexer): + def get_window_bounds(self, num_values, min_periods, center, closed): + start = np.empty(num_values, dtype=np.int64) + end = np.empty(num_values, dtype=np.int64) + for i in range(num_values): + if self.use_expanding[i]: + start[i] = 0 + end[i] = i + 1 + else: + start[i] = i + end[i] = i + self.window_size + return start, end + + indexer = CustomIndexer(window_size=4, use_expanding=use_expanding) + + result = getattr(df.rolling(indexer), method)() + expected = DataFrame({"values": expected}) + tm.assert_frame_equal(result, expected)