From a919128d89b5d79202bc2bb2edb592d7f9a7330e Mon Sep 17 00:00:00 2001 From: ihsan Date: Mon, 17 Jun 2019 22:29:09 +0300 Subject: [PATCH 1/5] TST: Test rolling median with closed options using datetime index --- pandas/tests/test_window.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index bc6946cbade4c..8fb3d047ad1c3 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -594,6 +594,20 @@ def test_closed_min_max_minp(self, func, closed, expected): expected = pd.Series(expected, index=ser.index) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("closed,expected", [ + ('right', [0, 0.5, 1, 2, 3, 4, 5, 6, 7, 8]), + ('both', [0, 0.5, 1, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]), + ('neither', [np.nan, 0, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]), + ('left', [np.nan, 0, 0.5, 1, 2, 3, 4, 5, 6, 7]) + ]) + def test_closed_median(self, closed, expected): + # GH 26005 + ser = pd.Series(data=np.arange(10), + index=pd.date_range('2000', periods=10)) + result = ser.rolling('3D', closed=closed).median() + expected = pd.Series(expected, index=ser.index) + tm.assert_series_equal(result, expected) + @pytest.mark.parametrize('roller', ['1s', 1]) def tests_empty_df_rolling(self, roller): # GH 15819 Verifies that datetime and integer rolling windows can be From 371b2daf8300cbcc63e390a9f7f418ff7006f19a Mon Sep 17 00:00:00 2001 From: ihsan Date: Mon, 17 Jun 2019 22:51:19 +0300 Subject: [PATCH 2/5] BUG: Insert elements from first start to end at setup Adding the first element at setup produce incorrect results when closed option is left or neither. Instead iterating from first start to end at setup is a proper option same as in roll_mean --- pandas/_libs/window.pyx | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index 48b554ca02a9d..61f52989234a8 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -1112,12 +1112,13 @@ def roll_median_c(ndarray[float64_t] values, int64_t win, int64_t minp, if i == 0: # setup - val = values[i] - if notnan(val): - nobs += 1 - err = skiplist_insert(sl, val) != 1 - if err: - break + for j in range(s, e): + val = values[j] + if notnan(val): + nobs += 1 + err = skiplist_insert(sl, val) != 1 + if err: + break else: From 0891a36354930d42ee428b321ef35a511581ca04 Mon Sep 17 00:00:00 2001 From: ihsan Date: Mon, 17 Jun 2019 23:13:40 +0300 Subject: [PATCH 3/5] BUG: Change the order of adds and deletes When a window is empty actual element in the order of window is removed from skiplist. But since it is added after removing it is never removed and remains in the skiplist. Instead removing after adding is a proper option --- pandas/_libs/window.pyx | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index 61f52989234a8..1446e0e3a3ca5 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -1122,13 +1122,6 @@ def roll_median_c(ndarray[float64_t] values, int64_t win, int64_t minp, else: - # calculate deletes - for j in range(start[i - 1], s): - val = values[j] - if notnan(val): - skiplist_remove(sl, val) - nobs -= 1 - # calculate adds for j in range(end[i - 1], e): val = values[j] @@ -1138,6 +1131,13 @@ def roll_median_c(ndarray[float64_t] values, int64_t win, int64_t minp, if err: break + # calculate deletes + for j in range(start[i - 1], s): + val = values[j] + if notnan(val): + skiplist_remove(sl, val) + nobs -= 1 + if nobs >= minp: midpoint = (nobs / 2) if nobs % 2: From a15a1bc5bdae82478381673c9e68a5586bac1de5 Mon Sep 17 00:00:00 2001 From: ihsan Date: Mon, 17 Jun 2019 23:37:02 +0300 Subject: [PATCH 4/5] Add whatsnew --- doc/source/whatsnew/v0.25.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 2b1a61186dca6..a920b2300ca4b 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -709,6 +709,7 @@ Groupby/Resample/Rolling - Bug in :meth:`pandas.core.groupby.SeriesGroupBy.transform` where transforming an empty group would raise a ``ValueError`` (:issue:`26208`) - Bug in :meth:`pandas.core.frame.DataFrame.groupby` where passing a :class:`pandas.core.groupby.grouper.Grouper` would return incorrect groups when using the ``.groups`` accessor (:issue:`26326`) - Bug in :meth:`pandas.core.groupby.GroupBy.agg` where incorrect results are returned for uint64 columns. (:issue:`26310`) +- Bug in :meth:`pandas.core.window.Rolling.median` where incorrect results are returned with ``closed='left'`` and ``closed='neither'`` (:issue:`26005`) Reshaping ^^^^^^^^^ From 51e89c4b367c18fefe98a419c4c1f8bf7c70dc40 Mon Sep 17 00:00:00 2001 From: ihsan Date: Fri, 21 Jun 2019 10:13:07 +0300 Subject: [PATCH 5/5] BUG: Fix the same issue with quantile --- doc/source/whatsnew/v0.25.0.rst | 2 +- pandas/_libs/window.pyx | 23 ++++++++++++----------- pandas/tests/test_window.py | 9 +++++++-- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 1c2f151f3586e..114dd463e729c 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -744,7 +744,7 @@ Groupby/Resample/Rolling - Bug in :meth:`pandas.core.frame.DataFrame.groupby` where passing a :class:`pandas.core.groupby.grouper.Grouper` would return incorrect groups when using the ``.groups`` accessor (:issue:`26326`) - Bug in :meth:`pandas.core.groupby.GroupBy.agg` where incorrect results are returned for uint64 columns. (:issue:`26310`) - Bug in :meth:`pandas.core.window.Rolling.median` and :meth:`pandas.core.window.Rolling.quantile` where MemoryError is raised with empty window (:issue:`26005`) -- Bug in :meth:`pandas.core.window.Rolling.median` where incorrect results are returned with ``closed='left'`` and ``closed='neither'`` (:issue:`26005`) +- Bug in :meth:`pandas.core.window.Rolling.median` and :meth:`pandas.core.window.Rolling.quantile` where incorrect results are returned with ``closed='left'`` and ``closed='neither'`` (:issue:`26005`) Reshaping ^^^^^^^^^ diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index 448a7f28ce776..df86f395d6097 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -1508,19 +1508,13 @@ def roll_quantile(ndarray[float64_t, cast=True] values, int64_t win, if i == 0: # setup - val = values[i] - if notnan(val): - nobs += 1 - skiplist_insert(skiplist, val) - - else: - - # calculate deletes - for j in range(start[i - 1], s): + for j in range(s, e): val = values[j] if notnan(val): - skiplist_remove(skiplist, val) - nobs -= 1 + nobs += 1 + skiplist_insert(skiplist, val) + + else: # calculate adds for j in range(end[i - 1], e): @@ -1529,6 +1523,13 @@ def roll_quantile(ndarray[float64_t, cast=True] values, int64_t win, nobs += 1 skiplist_insert(skiplist, val) + # calculate deletes + for j in range(start[i - 1], s): + val = values[j] + if notnan(val): + skiplist_remove(skiplist, val) + nobs -= 1 + if nobs >= minp: if nobs == 1: # Single value in skip list diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index ed898dd60faf3..9524a78dae16c 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -600,12 +600,17 @@ def test_closed_min_max_minp(self, func, closed, expected): ('neither', [np.nan, 0, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]), ('left', [np.nan, 0, 0.5, 1, 2, 3, 4, 5, 6, 7]) ]) - def test_closed_median(self, closed, expected): + def test_closed_median_quantile(self, closed, expected): # GH 26005 ser = pd.Series(data=np.arange(10), index=pd.date_range('2000', periods=10)) - result = ser.rolling('3D', closed=closed).median() + roll = ser.rolling('3D', closed=closed) expected = pd.Series(expected, index=ser.index) + + result = roll.median() + tm.assert_series_equal(result, expected) + + result = roll.quantile(0.5) tm.assert_series_equal(result, expected) @pytest.mark.parametrize('roller', ['1s', 1])