From 357f21c42e513acba9903007ffd504d664928f8c Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 18 Apr 2020 11:15:29 +0300 Subject: [PATCH 01/10] TST: add test for median with and without min_periods --- pandas/tests/window/test_base_indexer.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index 1a3fe865d2a7a..665da73e7fdb2 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -141,6 +141,12 @@ def get_window_bounds(self, num_values, min_periods, center, closed): ], {"ddof": 1}, ), + ( + "median", + np.median, + [1.0, 2.0, 3.0, 4.0, 6.0, 7.0, 7.0, 8.0, 8.5, np.nan], + {}, + ), ], ) def test_rolling_forward_window(constructor, func, np_func, expected, np_kwargs): @@ -166,3 +172,8 @@ def test_rolling_forward_window(constructor, func, np_func, expected, np_kwargs) tm.assert_equal(result, expected) expected2 = constructor(rolling.apply(lambda x: np_func(x, **np_kwargs))) tm.assert_equal(result, expected2) + # Check that function works without min_periods supplied + rolling3 = constructor(values).rolling(window=indexer) + result3 = getattr(rolling3, func)() + expected3 = constructor(rolling3.apply(lambda x: np_func(x, **np_kwargs))) + tm.assert_equal(result3, expected3) From ce82372f479140c25bd2843d30b5e19a5d192f68 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 18 Apr 2020 11:16:26 +0300 Subject: [PATCH 02/10] BUG: add win_size calc for custom BaseIndexer in median --- pandas/core/window/rolling.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 62f470060b039..c529a585e05d4 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1429,7 +1429,23 @@ def mean(self, *args, **kwargs): def median(self, **kwargs): window_func = self._get_roll_func("roll_median_c") - window_func = partial(window_func, win=self._get_window()) + + # GH 32865. For correct median calculation we need window_size + # only to build a skiplist of appropriate number of levels + # so for custom BaseIndexer subclasses we pick max(end - start) + if isinstance(self.window, BaseIndexer): + values = getattr(self._selected_obj, "values", self._selected_obj) + start, end = self.window.get_window_bounds( + num_values=len(values), + min_periods=self.min_periods, + center=self.center, + closed=self.closed, + ) + win = (end - start).max() + else: + win = self._get_window() + + window_func = partial(window_func, win=win) return self._apply(window_func, center=self.center, name="median", **kwargs) def std(self, ddof=1, *args, **kwargs): From 16fe728cf8b8fb19881ff25bdf0b3baa3b78c0c0 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 18 Apr 2020 13:32:24 +0300 Subject: [PATCH 03/10] DOC: add whatsnew entry --- doc/source/whatsnew/v1.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index a797090a83444..915827660a110 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -175,7 +175,7 @@ Other API changes - :meth:`Groupby.groups` now returns an abbreviated representation when called on large dataframes (:issue:`1135`) - ``loc`` lookups with an object-dtype :class:`Index` and an integer key will now raise ``KeyError`` instead of ``TypeError`` when key is missing (:issue:`31905`) - Using a :func:`pandas.api.indexers.BaseIndexer` with ``skew``, ``cov``, ``corr`` will now raise a ``NotImplementedError`` (:issue:`32865`) -- Using a :func:`pandas.api.indexers.BaseIndexer` with ``count``, ``min``, ``max`` will now return correct results for any monotonic :func:`pandas.api.indexers.BaseIndexer` descendant (:issue:`32865`) +- Using a :func:`pandas.api.indexers.BaseIndexer` with ``count``, ``min``, ``max``, ``median`` will now return correct results for any monotonic :func:`pandas.api.indexers.BaseIndexer` descendant (:issue:`32865`) - Added a :func:`pandas.api.indexers.FixedForwardWindowIndexer` class to support forward-looking windows during ``rolling`` operations. - From c3dfefb3e2ba8aad36f618d4c48e06bdf387ce15 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sun, 19 Apr 2020 09:02:24 +0300 Subject: [PATCH 04/10] move the logic to roll_median_c in aggregations.pyx --- pandas/_libs/window/aggregations.pyx | 2 +- pandas/core/window/rolling.py | 20 ++++---------------- 2 files changed, 5 insertions(+), 17 deletions(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index f3889039c095e..559763b04222b 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -858,7 +858,7 @@ def roll_median_c(ndarray[float64_t] values, ndarray[int64_t] start, # actual skiplist ops outweigh any window computation costs output = np.empty(N, dtype=float) - if win == 0 or (end - start).max() == 0: + if (end - start).max() == 0: output[:] = NaN return output win = (end - start).max() diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index c529a585e05d4..34b3d667f89f3 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1429,22 +1429,10 @@ def mean(self, *args, **kwargs): def median(self, **kwargs): window_func = self._get_roll_func("roll_median_c") - - # GH 32865. For correct median calculation we need window_size - # only to build a skiplist of appropriate number of levels - # so for custom BaseIndexer subclasses we pick max(end - start) - if isinstance(self.window, BaseIndexer): - values = getattr(self._selected_obj, "values", self._selected_obj) - start, end = self.window.get_window_bounds( - num_values=len(values), - min_periods=self.min_periods, - center=self.center, - closed=self.closed, - ) - win = (end - start).max() - else: - win = self._get_window() - + # GH 32865. For custom BaseIndexer implementations, _get_window returns + # 0 or min_periods, while it should return max window size + # We override it inside the median function implementation + win = self._get_window() window_func = partial(window_func, win=win) return self._apply(window_func, center=self.center, name="median", **kwargs) From 83311b36d4583a5d768555f1fa2575cee674dfb8 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sun, 19 Apr 2020 09:16:08 +0300 Subject: [PATCH 05/10] CLN: remove unnecessary _get_window call in median --- pandas/_libs/window/aggregations.pyx | 3 ++- pandas/core/window/rolling.py | 7 ++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 559763b04222b..673820fd8464a 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -843,7 +843,8 @@ def roll_kurt_variable(ndarray[float64_t] values, ndarray[int64_t] start, def roll_median_c(ndarray[float64_t] values, ndarray[int64_t] start, - ndarray[int64_t] end, int64_t minp, int64_t win): + ndarray[int64_t] end, int64_t minp, int64_t win=0): + # GH 32865. win argument kept for compatibility cdef: float64_t val, res, prev bint err = False diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 34b3d667f89f3..d4234770617b3 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1429,11 +1429,8 @@ def mean(self, *args, **kwargs): def median(self, **kwargs): window_func = self._get_roll_func("roll_median_c") - # GH 32865. For custom BaseIndexer implementations, _get_window returns - # 0 or min_periods, while it should return max window size - # We override it inside the median function implementation - win = self._get_window() - window_func = partial(window_func, win=win) + # GH 32865. Move max window size calculation to + # the median function implementation return self._apply(window_func, center=self.center, name="median", **kwargs) def std(self, ddof=1, *args, **kwargs): From 9172979ab8302bdf122d603a737c4e8aef95c27c Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sun, 19 Apr 2020 10:48:05 +0300 Subject: [PATCH 06/10] restart tests From f54ccfc26645fd496773b1a440e73b2a845c3932 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sun, 19 Apr 2020 18:07:57 +0300 Subject: [PATCH 07/10] CLN: fix typo --- pandas/tests/window/test_base_indexer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index 665da73e7fdb2..493c930fd75db 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -172,7 +172,7 @@ def test_rolling_forward_window(constructor, func, np_func, expected, np_kwargs) tm.assert_equal(result, expected) expected2 = constructor(rolling.apply(lambda x: np_func(x, **np_kwargs))) tm.assert_equal(result, expected2) - # Check that function works without min_periods supplied + # Check that the function works without min_periods supplied rolling3 = constructor(values).rolling(window=indexer) result3 = getattr(rolling3, func)() expected3 = constructor(rolling3.apply(lambda x: np_func(x, **np_kwargs))) From f582ce8262021a34c55a30da40780be7a830d4c5 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 20 Apr 2020 09:35:08 +0300 Subject: [PATCH 08/10] DOC: add comments to test cases --- pandas/tests/window/test_base_indexer.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index 493c930fd75db..35a7617b75e98 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -168,10 +168,16 @@ def test_rolling_forward_window(constructor, func, np_func, expected, np_kwargs) rolling = constructor(values).rolling(window=indexer, min_periods=2) result = getattr(rolling, func)() + + # Check that the function output matches the explicitly provided array expected = constructor(expected) tm.assert_equal(result, expected) + + # Check that the rolling function gives the same result + # as applying an alternative function to the rolling window object expected2 = constructor(rolling.apply(lambda x: np_func(x, **np_kwargs))) tm.assert_equal(result, expected2) + # Check that the function works without min_periods supplied rolling3 = constructor(values).rolling(window=indexer) result3 = getattr(rolling3, func)() From c179d5fa48e756e49d6e00aeb837dd040bb25697 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 20 Apr 2020 09:36:47 +0300 Subject: [PATCH 09/10] DOC: reword the comments for readability --- pandas/tests/window/test_base_indexer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index 35a7617b75e98..c891969efb37c 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -173,8 +173,8 @@ def test_rolling_forward_window(constructor, func, np_func, expected, np_kwargs) expected = constructor(expected) tm.assert_equal(result, expected) - # Check that the rolling function gives the same result - # as applying an alternative function to the rolling window object + # Check that the rolling function output matches applying an alternative + # function to the rolling window object expected2 = constructor(rolling.apply(lambda x: np_func(x, **np_kwargs))) tm.assert_equal(result, expected2) From 1ad8d65e946109a6c476330a1ccaf22e20d89216 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 20 Apr 2020 09:39:59 +0300 Subject: [PATCH 10/10] DOC: reword the final comment to match --- pandas/tests/window/test_base_indexer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index c891969efb37c..aee47a085eb9c 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -178,7 +178,8 @@ def test_rolling_forward_window(constructor, func, np_func, expected, np_kwargs) expected2 = constructor(rolling.apply(lambda x: np_func(x, **np_kwargs))) tm.assert_equal(result, expected2) - # Check that the function works without min_periods supplied + # Check that the function output matches applying an alternative function + # if min_periods isn't specified rolling3 = constructor(values).rolling(window=indexer) result3 = getattr(rolling3, func)() expected3 = constructor(rolling3.apply(lambda x: np_func(x, **np_kwargs)))