From 4d77973fe2c939f65daad230178ffce3d2dccac7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20=C5=A0koda?= Date: Wed, 11 Mar 2020 02:51:03 +0100 Subject: [PATCH] Backport PR #32386: BUG: Fix rolling functions with variable windows on decreasing index --- doc/source/whatsnew/v1.0.2.rst | 4 +++ pandas/_libs/window/aggregations.pyx | 4 +-- pandas/_libs/window/indexers.pyx | 10 ++++--- pandas/tests/window/test_timeseries_window.py | 27 +++++++++++-------- 4 files changed, 29 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 9841df0507138..3e4dfd4e75a66 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -89,6 +89,10 @@ Bug fixes - Using ``pd.NA`` with :meth:`Series.str.repeat` now correctly outputs a null value instead of raising error for vector inputs (:issue:`31632`) +**Rolling** + +- Fixed rolling operations with variable window (defined by time duration) on decreasing time index (:issue:`32385`). + .. --------------------------------------------------------------------------- .. _whatsnew_102.contributors: diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 0348843abc129..495b436030120 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -1008,7 +1008,7 @@ def roll_max_variable(ndarray[float64_t] values, ndarray[int64_t] start, def roll_min_fixed(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp, int64_t win): """ - Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. + Moving min of 1d array of any numeric type along axis=0 ignoring NaNs. Parameters ---------- @@ -1025,7 +1025,7 @@ def roll_min_fixed(ndarray[float64_t] values, ndarray[int64_t] start, def roll_min_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp): """ - Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. + Moving min of 1d array of any numeric type along axis=0 ignoring NaNs. Parameters ---------- diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx index 2d01d1964c043..8a1e7feb57ace 100644 --- a/pandas/_libs/window/indexers.pyx +++ b/pandas/_libs/window/indexers.pyx @@ -44,6 +44,7 @@ def calculate_variable_window_bounds( cdef: bint left_closed = False bint right_closed = False + int index_growth_sign = 1 ndarray[int64_t, ndim=1] start, end int64_t start_bound, end_bound Py_ssize_t i, j @@ -58,6 +59,9 @@ def calculate_variable_window_bounds( if closed in ['left', 'both']: left_closed = True + if index[num_values - 1] < index[0]: + index_growth_sign = -1 + start = np.empty(num_values, dtype='int64') start.fill(-1) end = np.empty(num_values, dtype='int64') @@ -78,7 +82,7 @@ def calculate_variable_window_bounds( # end is end of slice interval (not including) for i in range(1, num_values): end_bound = index[i] - start_bound = index[i] - window_size + start_bound = index[i] - index_growth_sign * window_size # left endpoint is closed if left_closed: @@ -88,13 +92,13 @@ def calculate_variable_window_bounds( # within the constraint start[i] = i for j in range(start[i - 1], i): - if index[j] > start_bound: + if (index[j] - start_bound) * index_growth_sign > 0: start[i] = j break # end bound is previous end # or current index - if index[end[i - 1]] <= end_bound: + if (index[end[i - 1]] - end_bound) * index_growth_sign <= 0: end[i] = i + 1 else: end[i] = end[i - 1] diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py index 5f5e10b5dd497..0c5289cd78fed 100644 --- a/pandas/tests/window/test_timeseries_window.py +++ b/pandas/tests/window/test_timeseries_window.py @@ -709,20 +709,25 @@ def test_rolling_cov_offset(self): tm.assert_series_equal(result, expected2) def test_rolling_on_decreasing_index(self): - # GH-19248 + # GH-19248, GH-32385 index = [ - Timestamp("20190101 09:00:00"), - Timestamp("20190101 09:00:02"), - Timestamp("20190101 09:00:03"), - Timestamp("20190101 09:00:05"), - Timestamp("20190101 09:00:06"), + Timestamp("20190101 09:00:30"), + Timestamp("20190101 09:00:27"), + Timestamp("20190101 09:00:20"), + Timestamp("20190101 09:00:18"), + Timestamp("20190101 09:00:10"), ] - df = DataFrame({"column": [3, 4, 4, 2, 1]}, index=reversed(index)) - result = df.rolling("2s").min() - expected = DataFrame( - {"column": [3.0, 3.0, 3.0, 2.0, 1.0]}, index=reversed(index) - ) + df = DataFrame({"column": [3, 4, 4, 5, 6]}, index=index) + result = df.rolling("5s").min() + expected = DataFrame({"column": [3.0, 3.0, 4.0, 4.0, 6.0]}, index=index) + tm.assert_frame_equal(result, expected) + + def test_rolling_on_empty(self): + # GH-32385 + df = DataFrame({"column": []}, index=[]) + result = df.rolling("5s").min() + expected = DataFrame({"column": []}, index=[]) tm.assert_frame_equal(result, expected) def test_rolling_on_multi_index_level(self):