Skip to content

Backport PR #35647 on branch 1.1.x (BUG: Support custom BaseIndexers in groupby.rolling) #35699

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Fixed regressions
- Fixed regression in :meth:`DataFrame.shift` with ``axis=1`` and heterogeneous dtypes (:issue:`35488`)
- Fixed regression in ``.groupby(..).rolling(..)`` where a segfault would occur with ``center=True`` and an odd number of values (:issue:`35552`)
- Fixed regression in :meth:`DataFrame.apply` where functions that altered the input in-place only operated on a single row (:issue:`35462`)
- Fixed regression in ``.groupby(..).rolling(..)`` where a custom ``BaseIndexer`` would be ignored (:issue:`35557`)

.. ---------------------------------------------------------------------------

Expand Down
14 changes: 10 additions & 4 deletions pandas/core/window/indexers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Indexer objects for computing start/end window bounds for rolling operations"""
from datetime import timedelta
from typing import Dict, Optional, Tuple, Type, Union
from typing import Dict, Optional, Tuple, Type

import numpy as np

Expand Down Expand Up @@ -265,7 +265,8 @@ def __init__(
index_array: Optional[np.ndarray],
window_size: int,
groupby_indicies: Dict,
rolling_indexer: Union[Type[FixedWindowIndexer], Type[VariableWindowIndexer]],
rolling_indexer: Type[BaseIndexer],
indexer_kwargs: Optional[Dict],
**kwargs,
):
"""
Expand All @@ -276,7 +277,10 @@ def __init__(
"""
self.groupby_indicies = groupby_indicies
self.rolling_indexer = rolling_indexer
super().__init__(index_array, window_size, **kwargs)
self.indexer_kwargs = indexer_kwargs or {}
super().__init__(
index_array, self.indexer_kwargs.pop("window_size", window_size), **kwargs
)

@Appender(get_window_bounds_doc)
def get_window_bounds(
Expand All @@ -298,7 +302,9 @@ def get_window_bounds(
else:
index_array = self.index_array
indexer = self.rolling_indexer(
index_array=index_array, window_size=self.window_size,
index_array=index_array,
window_size=self.window_size,
**self.indexer_kwargs,
)
start, end = indexer.get_window_bounds(
len(indicies), min_periods, center, closed
Expand Down
15 changes: 11 additions & 4 deletions pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ class _Window(PandasObject, ShallowMixin, SelectionMixin):

def __init__(
self,
obj,
obj: FrameOrSeries,
window=None,
min_periods: Optional[int] = None,
center: bool = False,
Expand Down Expand Up @@ -2255,10 +2255,16 @@ def _get_window_indexer(self, window: int) -> GroupbyRollingIndexer:
-------
GroupbyRollingIndexer
"""
rolling_indexer: Union[Type[FixedWindowIndexer], Type[VariableWindowIndexer]]
if self.is_freq_type:
rolling_indexer: Type[BaseIndexer]
indexer_kwargs: Optional[Dict] = None
index_array = self.obj.index.asi8
if isinstance(self.window, BaseIndexer):
rolling_indexer = type(self.window)
indexer_kwargs = self.window.__dict__
# We'll be using the index of each group later
indexer_kwargs.pop("index_array", None)
elif self.is_freq_type:
rolling_indexer = VariableWindowIndexer
index_array = self.obj.index.asi8
else:
rolling_indexer = FixedWindowIndexer
index_array = None
Expand All @@ -2267,6 +2273,7 @@ def _get_window_indexer(self, window: int) -> GroupbyRollingIndexer:
window_size=window,
groupby_indicies=self._groupby.indices,
rolling_indexer=rolling_indexer,
indexer_kwargs=indexer_kwargs,
)
return window_indexer

Expand Down
23 changes: 23 additions & 0 deletions pandas/tests/window/test_grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,29 @@ def test_groupby_subselect_rolling(self):
)
tm.assert_series_equal(result, expected)

def test_groupby_rolling_custom_indexer(self):
# GH 35557
class SimpleIndexer(pd.api.indexers.BaseIndexer):
def get_window_bounds(
self, num_values=0, min_periods=None, center=None, closed=None
):
min_periods = self.window_size if min_periods is None else 0
end = np.arange(num_values, dtype=np.int64) + 1
start = end.copy() - self.window_size
start[start < 0] = min_periods
return start, end

df = pd.DataFrame(
{"a": [1.0, 2.0, 3.0, 4.0, 5.0] * 3}, index=[0] * 5 + [1] * 5 + [2] * 5
)
result = (
df.groupby(df.index)
.rolling(SimpleIndexer(window_size=3), min_periods=1)
.sum()
)
expected = df.groupby(df.index).rolling(window=3, min_periods=1).sum()
tm.assert_frame_equal(result, expected)

def test_groupby_rolling_subset_with_closed(self):
# GH 35549
df = pd.DataFrame(
Expand Down