From 477773de846b74d166f8c93fb6b1b07851e121f7 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 9 Aug 2020 23:59:26 -0700 Subject: [PATCH 1/3] BUG: Support custom BaseIndexers in groupby.rolling --- doc/source/whatsnew/v1.1.1.rst | 1 + pandas/core/window/indexers.py | 12 +++++++++--- pandas/core/window/rolling.py | 16 +++++++++++++--- pandas/tests/window/test_grouper.py | 23 +++++++++++++++++++++++ 4 files changed, 46 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.1.1.rst b/doc/source/whatsnew/v1.1.1.rst index f0ad9d1ca3b0f..35339dbccbd0c 100644 --- a/doc/source/whatsnew/v1.1.1.rst +++ b/doc/source/whatsnew/v1.1.1.rst @@ -20,6 +20,7 @@ Fixed regressions - Fixed regression in :class:`pandas.core.groupby.RollingGroupby` where column selection was ignored (:issue:`35486`) - Fixed regression in :meth:`DataFrame.shift` with ``axis=1`` and heterogeneous dtypes (:issue:`35488`) - Fixed regression in ``.groupby(..).rolling(..)`` where a segfault would occur with ``center=True`` and an odd number of values (:issue:`35552`) +- Fixed regression in ``.groupby(..).rolling(..)`` where a custom ``BaseIndexer`` would be ignored (:issue:`35557`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/window/indexers.py b/pandas/core/window/indexers.py index bc36bdca982e8..f4243a5ee862a 100644 --- a/pandas/core/window/indexers.py +++ b/pandas/core/window/indexers.py @@ -265,7 +265,8 @@ def __init__( index_array: Optional[np.ndarray], window_size: int, groupby_indicies: Dict, - rolling_indexer: Union[Type[FixedWindowIndexer], Type[VariableWindowIndexer]], + rolling_indexer: Type[BaseIndexer], + indexer_kwargs: Optional[Dict], **kwargs, ): """ @@ -276,7 +277,10 @@ def __init__( """ self.groupby_indicies = groupby_indicies self.rolling_indexer = rolling_indexer - super().__init__(index_array, window_size, **kwargs) + self.indexer_kwargs = indexer_kwargs or {} + super().__init__( + index_array, self.indexer_kwargs.pop("window_size", window_size), **kwargs + ) @Appender(get_window_bounds_doc) def get_window_bounds( @@ -298,7 +302,9 @@ def get_window_bounds( else: index_array = self.index_array indexer = self.rolling_indexer( - index_array=index_array, window_size=self.window_size, + index_array=index_array, + window_size=self.window_size, + **self.indexer_kwargs, ) start, end = indexer.get_window_bounds( len(indicies), min_periods, center, closed diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index a04d68a6d6745..777f0fee01034 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -145,7 +145,7 @@ class _Window(PandasObject, ShallowMixin, SelectionMixin): def __init__( self, - obj, + obj: FrameOrSeries, window=None, min_periods: Optional[int] = None, center: bool = False, @@ -2275,18 +2275,28 @@ def _get_window_indexer(self, window: int) -> GroupbyRollingIndexer: ------- GroupbyRollingIndexer """ - rolling_indexer: Union[Type[FixedWindowIndexer], Type[VariableWindowIndexer]] - if self.is_freq_type: + rolling_indexer: Type[BaseIndexer] + indexer_kwargs: Optional[Dict] + if isinstance(self.window, BaseIndexer): + rolling_indexer = type(self.window) + indexer_kwargs = self.window.__dict__ + # We'll be using the index of each group later + indexer_kwargs.pop("index_array", None) + index_array = self._groupby._selected_obj.index.asi8 + elif self.is_freq_type: rolling_indexer = VariableWindowIndexer + indexer_kwargs = None index_array = self._groupby._selected_obj.index.asi8 else: rolling_indexer = FixedWindowIndexer + indexer_kwargs = None index_array = None window_indexer = GroupbyRollingIndexer( index_array=index_array, window_size=window, groupby_indicies=self._groupby.indices, rolling_indexer=rolling_indexer, + indexer_kwargs=indexer_kwargs, ) return window_indexer diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py index 5241b9548a442..917f78bff9794 100644 --- a/pandas/tests/window/test_grouper.py +++ b/pandas/tests/window/test_grouper.py @@ -304,3 +304,26 @@ def test_groupby_subselect_rolling(self): name="b", ) tm.assert_series_equal(result, expected) + + def test_groupby_rolling_custom_indexer(self): + # GH 35557 + class SimpleIndexer(pd.api.indexers.BaseIndexer): + def get_window_bounds( + self, num_values=0, min_periods=None, center=None, closed=None + ): + min_periods = self.window_size if min_periods is None else 0 + end = np.arange(num_values, dtype=np.int64) + 1 + start = end.copy() - self.window_size + start[start < 0] = min_periods + return start, end + + df = pd.DataFrame( + {"a": [1.0, 2.0, 3.0, 4.0, 5.0] * 3}, index=[0] * 5 + [1] * 5 + [2] * 5 + ) + result = ( + df.groupby(df.index) + .rolling(SimpleIndexer(window_size=3), min_periods=1) + .sum() + ) + expected = df.groupby(df.index).rolling(window=3, min_periods=1).sum() + tm.assert_frame_equal(result, expected) From 1ff439419ab13d647a1087d3860eec6a621e44bd Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 10 Aug 2020 08:31:36 -0700 Subject: [PATCH 2/3] Remove unused import --- pandas/core/window/indexers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/indexers.py b/pandas/core/window/indexers.py index f4243a5ee862a..7cbe34cdebf9f 100644 --- a/pandas/core/window/indexers.py +++ b/pandas/core/window/indexers.py @@ -1,6 +1,6 @@ """Indexer objects for computing start/end window bounds for rolling operations""" from datetime import timedelta -from typing import Dict, Optional, Tuple, Type, Union +from typing import Dict, Optional, Tuple, Type import numpy as np From 8d1f65ff7075b7b4eb27f2c0096d7ee74f86180a Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 10 Aug 2020 10:49:58 -0700 Subject: [PATCH 3/3] Simplify some logic --- pandas/core/window/rolling.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index de5bd5c227485..0306d4de2fc73 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -2272,20 +2272,17 @@ def _get_window_indexer(self, window: int) -> GroupbyRollingIndexer: GroupbyRollingIndexer """ rolling_indexer: Type[BaseIndexer] - indexer_kwargs: Optional[Dict] + indexer_kwargs: Optional[Dict] = None + index_array = self.obj.index.asi8 if isinstance(self.window, BaseIndexer): rolling_indexer = type(self.window) indexer_kwargs = self.window.__dict__ # We'll be using the index of each group later indexer_kwargs.pop("index_array", None) - index_array = self.obj.index.asi8 elif self.is_freq_type: rolling_indexer = VariableWindowIndexer - indexer_kwargs = None - index_array = self.obj.index.asi8 else: rolling_indexer = FixedWindowIndexer - indexer_kwargs = None index_array = None window_indexer = GroupbyRollingIndexer( index_array=index_array,