Skip to content

Commit 717d805

Browse files
authored
ENH: provide standard BaseIndexers in pandas.api.indexers (#33236)
1 parent d857cd1 commit 717d805

File tree

8 files changed

+114
-13
lines changed

8 files changed

+114
-13
lines changed

asv_bench/benchmarks/rolling.py

+22
Original file line numberDiff line numberDiff line change
@@ -165,4 +165,26 @@ def peakmem_fixed(self):
165165
self.roll.max()
166166

167167

168+
class ForwardWindowMethods:
169+
params = (
170+
["DataFrame", "Series"],
171+
[10, 1000],
172+
["int", "float"],
173+
["median", "mean", "max", "min", "kurt", "sum"],
174+
)
175+
param_names = ["constructor", "window_size", "dtype", "method"]
176+
177+
def setup(self, constructor, window_size, dtype, method):
178+
N = 10 ** 5
179+
arr = np.random.random(N).astype(dtype)
180+
indexer = pd.api.indexers.FixedForwardWindowIndexer(window_size=window_size)
181+
self.roll = getattr(pd, constructor)(arr).rolling(window=indexer)
182+
183+
def time_rolling(self, constructor, window_size, dtype, method):
184+
getattr(self.roll, method)()
185+
186+
def peakmem_rolling(self, constructor, window_size, dtype, method):
187+
getattr(self.roll, method)()
188+
189+
168190
from .pandas_vb_common import setup # noqa: F401 isort:skip

doc/source/reference/window.rst

+1
Original file line numberDiff line numberDiff line change
@@ -85,3 +85,4 @@ Base class for defining custom window boundaries.
8585
:toctree: api/
8686

8787
api.indexers.BaseIndexer
88+
api.indexers.FixedForwardWindowIndexer

doc/source/user_guide/computation.rst

+14
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,20 @@ and we want to use an expanding window where ``use_expanding`` is ``True`` other
571571
3 3.0
572572
4 10.0
573573
574+
.. versionadded:: 1.1
575+
576+
For some problems knowledge of the future is available for analysis. For example, this occurs when
577+
each data point is a full time series read from an experiment, and the task is to extract underlying
578+
conditions. In these cases it can be useful to perform forward-looking rolling window computations.
579+
:func:`FixedForwardWindowIndexer <pandas.api.indexers.FixedForwardWindowIndexer>` class is available for this purpose.
580+
This :func:`BaseIndexer <pandas.api.indexers.BaseIndexer>` subclass implements a closed fixed-width
581+
forward-looking rolling window, and we can use it as follows:
582+
583+
.. ipython:: ipython
584+
585+
from pandas.api.indexers import FixedForwardWindowIndexer
586+
indexer = FixedForwardWindowIndexer(window_size=2)
587+
df.rolling(indexer, min_periods=1).sum()
574588

575589
.. _stats.rolling_window.endpoints:
576590

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ Other API changes
109109
- ``loc`` lookups with an object-dtype :class:`Index` and an integer key will now raise ``KeyError`` instead of ``TypeError`` when key is missing (:issue:`31905`)
110110
- Using a :func:`pandas.api.indexers.BaseIndexer` with ``std``, ``var``, ``count``, ``skew``, ``cov``, ``corr`` will now raise a ``NotImplementedError`` (:issue:`32865`)
111111
- Using a :func:`pandas.api.indexers.BaseIndexer` with ``min``, ``max`` will now return correct results for any monotonic :func:`pandas.api.indexers.BaseIndexer` descendant (:issue:`32865`)
112+
- Added a :func:`pandas.api.indexers.FixedForwardWindowIndexer` class to support forward-looking windows during ``rolling`` operations.
112113
-
113114

114115
Backwards incompatible API changes

pandas/api/indexers/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,6 @@
33
"""
44

55
from pandas.core.indexers import check_array_indexer
6-
from pandas.core.window.indexers import BaseIndexer
6+
from pandas.core.window.indexers import BaseIndexer, FixedForwardWindowIndexer
77

8-
__all__ = ["check_array_indexer", "BaseIndexer"]
8+
__all__ = ["check_array_indexer", "BaseIndexer", "FixedForwardWindowIndexer"]

pandas/core/window/indexers.py

+50
Original file line numberDiff line numberDiff line change
@@ -120,3 +120,53 @@ def get_window_bounds(
120120
np.zeros(num_values, dtype=np.int64),
121121
np.arange(1, num_values + 1, dtype=np.int64),
122122
)
123+
124+
125+
class FixedForwardWindowIndexer(BaseIndexer):
126+
"""
127+
Creates window boundaries for fixed-length windows that include the
128+
current row.
129+
130+
Examples
131+
--------
132+
>>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]})
133+
>>> df
134+
B
135+
0 0.0
136+
1 1.0
137+
2 2.0
138+
3 NaN
139+
4 4.0
140+
141+
>>> indexer = pd.api.indexers.FixedForwardWindowIndexer(window_size=2)
142+
>>> df.rolling(window=indexer, min_periods=1).sum()
143+
B
144+
0 1.0
145+
1 3.0
146+
2 2.0
147+
3 4.0
148+
4 4.0
149+
"""
150+
151+
@Appender(get_window_bounds_doc)
152+
def get_window_bounds(
153+
self,
154+
num_values: int = 0,
155+
min_periods: Optional[int] = None,
156+
center: Optional[bool] = None,
157+
closed: Optional[str] = None,
158+
) -> Tuple[np.ndarray, np.ndarray]:
159+
160+
if center:
161+
raise ValueError("Forward-looking windows can't have center=True")
162+
if closed is not None:
163+
raise ValueError(
164+
"Forward-looking windows don't support setting the closed argument"
165+
)
166+
167+
start = np.arange(num_values, dtype="int64")
168+
end_s = start[: -self.window_size] + self.window_size
169+
end_e = np.full(self.window_size, num_values, dtype="int64")
170+
end = np.concatenate([end_s, end_e])
171+
172+
return start, end

pandas/core/window/rolling.py

+11
Original file line numberDiff line numberDiff line change
@@ -900,6 +900,17 @@ class Window(_Window):
900900
3 2.0
901901
4 4.0
902902
903+
Same as above, but with forward-looking windows
904+
905+
>>> indexer = pd.api.indexers.FixedForwardWindowIndexer(window_size=2)
906+
>>> df.rolling(window=indexer, min_periods=1).sum()
907+
B
908+
0 1.0
909+
1 3.0
910+
2 2.0
911+
3 4.0
912+
4 4.0
913+
903914
A ragged (meaning not-a-regular frequency), time-indexed DataFrame
904915
905916
>>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]},

pandas/tests/window/test_base_indexer.py

+13-11
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from pandas import DataFrame, Series
55
import pandas._testing as tm
6-
from pandas.api.indexers import BaseIndexer
6+
from pandas.api.indexers import BaseIndexer, FixedForwardWindowIndexer
77
from pandas.core.window.indexers import ExpandingIndexer
88

99

@@ -105,19 +105,21 @@ def get_window_bounds(self, num_values, min_periods, center, closed):
105105
)
106106
def test_rolling_forward_window(constructor, func, alt_func, expected):
107107
# GH 32865
108-
class ForwardIndexer(BaseIndexer):
109-
def get_window_bounds(self, num_values, min_periods, center, closed):
110-
start = np.arange(num_values, dtype="int64")
111-
end_s = start[: -self.window_size] + self.window_size
112-
end_e = np.full(self.window_size, num_values, dtype="int64")
113-
end = np.concatenate([end_s, end_e])
114-
115-
return start, end
116-
117108
values = np.arange(10)
118109
values[5] = 100.0
119110

120-
indexer = ForwardIndexer(window_size=3)
111+
indexer = FixedForwardWindowIndexer(window_size=3)
112+
113+
match = "Forward-looking windows can't have center=True"
114+
with pytest.raises(ValueError, match=match):
115+
rolling = constructor(values).rolling(window=indexer, center=True)
116+
result = getattr(rolling, func)()
117+
118+
match = "Forward-looking windows don't support setting the closed argument"
119+
with pytest.raises(ValueError, match=match):
120+
rolling = constructor(values).rolling(window=indexer, closed="right")
121+
result = getattr(rolling, func)()
122+
121123
rolling = constructor(values).rolling(window=indexer, min_periods=2)
122124
result = getattr(rolling, func)()
123125
expected = constructor(expected)

0 commit comments

Comments
 (0)