Skip to content

Commit e23bd26

Browse files
authored
DOC: Demonstrate custom rolling indexer with Businessday (#34947)
1 parent 4ffd1f1 commit e23bd26

File tree

3 files changed

+109
-3
lines changed

3 files changed

+109
-3
lines changed

doc/source/user_guide/computation.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -561,7 +561,7 @@ For example, if we have the following ``DataFrame``:
561561
df
562562
563563
and we want to use an expanding window where ``use_expanding`` is ``True`` otherwise a window of size
564-
1, we can create the following ``BaseIndexer``:
564+
1, we can create the following ``BaseIndexer`` subclass:
565565

566566
.. code-block:: ipython
567567
@@ -593,6 +593,8 @@ and we want to use an expanding window where ``use_expanding`` is ``True`` other
593593
3 3.0
594594
4 10.0
595595
596+
You can view other examples of ``BaseIndexer`` subclasses `here <https://github.com/pandas-dev/pandas/blob/master/pandas/core/window/indexers.py>`__
597+
596598
.. versionadded:: 1.1
597599

598600
For some problems knowledge of the future is available for analysis. For example, this occurs when

pandas/core/window/indexers.py

+85
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
"""Indexer objects for computing start/end window bounds for rolling operations"""
2+
from datetime import timedelta
23
from typing import Dict, Optional, Tuple, Type, Union
34

45
import numpy as np
56

67
from pandas._libs.window.indexers import calculate_variable_window_bounds
78
from pandas.util._decorators import Appender
89

10+
from pandas.tseries.offsets import Nano
11+
912
get_window_bounds_doc = """
1013
Computes the bounds of a window.
1114
@@ -104,6 +107,88 @@ def get_window_bounds(
104107
)
105108

106109

110+
class NonFixedVariableWindowIndexer(BaseIndexer):
111+
"""Calculate window boundaries based on a non-fixed offset such as a BusinessDay"""
112+
113+
def __init__(
114+
self,
115+
index_array: Optional[np.ndarray] = None,
116+
window_size: int = 0,
117+
index=None,
118+
offset=None,
119+
**kwargs,
120+
):
121+
super().__init__(index_array, window_size, **kwargs)
122+
self.index = index
123+
self.offset = offset
124+
125+
@Appender(get_window_bounds_doc)
126+
def get_window_bounds(
127+
self,
128+
num_values: int = 0,
129+
min_periods: Optional[int] = None,
130+
center: Optional[bool] = None,
131+
closed: Optional[str] = None,
132+
) -> Tuple[np.ndarray, np.ndarray]:
133+
134+
# if windows is variable, default is 'right', otherwise default is 'both'
135+
if closed is None:
136+
closed = "right" if self.index is not None else "both"
137+
138+
right_closed = closed in ["right", "both"]
139+
left_closed = closed in ["left", "both"]
140+
141+
if self.index[num_values - 1] < self.index[0]:
142+
index_growth_sign = -1
143+
else:
144+
index_growth_sign = 1
145+
146+
start = np.empty(num_values, dtype="int64")
147+
start.fill(-1)
148+
end = np.empty(num_values, dtype="int64")
149+
end.fill(-1)
150+
151+
start[0] = 0
152+
153+
# right endpoint is closed
154+
if right_closed:
155+
end[0] = 1
156+
# right endpoint is open
157+
else:
158+
end[0] = 0
159+
160+
# start is start of slice interval (including)
161+
# end is end of slice interval (not including)
162+
for i in range(1, num_values):
163+
end_bound = self.index[i]
164+
start_bound = self.index[i] - index_growth_sign * self.offset
165+
166+
# left endpoint is closed
167+
if left_closed:
168+
start_bound -= Nano(1)
169+
170+
# advance the start bound until we are
171+
# within the constraint
172+
start[i] = i
173+
for j in range(start[i - 1], i):
174+
if (self.index[j] - start_bound) * index_growth_sign > timedelta(0):
175+
start[i] = j
176+
break
177+
178+
# end bound is previous end
179+
# or current index
180+
if (self.index[end[i - 1]] - end_bound) * index_growth_sign <= timedelta(0):
181+
end[i] = i + 1
182+
else:
183+
end[i] = end[i - 1]
184+
185+
# right endpoint is open
186+
if not right_closed:
187+
end[i] -= 1
188+
189+
return start, end
190+
191+
107192
class ExpandingIndexer(BaseIndexer):
108193
"""Calculate expanding window bounds, mimicking df.expanding()"""
109194

pandas/tests/window/test_base_indexer.py

+21-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
import numpy as np
22
import pytest
33

4-
from pandas import DataFrame, Series
4+
from pandas import DataFrame, Series, date_range
55
import pandas._testing as tm
66
from pandas.api.indexers import BaseIndexer, FixedForwardWindowIndexer
7-
from pandas.core.window.indexers import ExpandingIndexer
7+
from pandas.core.window.indexers import ExpandingIndexer, NonFixedVariableWindowIndexer
8+
9+
from pandas.tseries.offsets import BusinessDay
810

911

1012
def test_bad_get_window_bounds_signature():
@@ -234,3 +236,20 @@ def test_rolling_forward_cov_corr(func, expected):
234236
expected = Series(expected)
235237
expected.name = result.name
236238
tm.assert_equal(result, expected)
239+
240+
241+
@pytest.mark.parametrize(
242+
"closed,expected_data",
243+
[
244+
["right", [0.0, 1.0, 2.0, 3.0, 7.0, 12.0, 6.0, 7.0, 8.0, 9.0]],
245+
["left", [0.0, 0.0, 1.0, 2.0, 5.0, 9.0, 5.0, 6.0, 7.0, 8.0]],
246+
],
247+
)
248+
def test_non_fixed_variable_window_indexer(closed, expected_data):
249+
index = date_range("2020", periods=10)
250+
df = DataFrame(range(10), index=index)
251+
offset = BusinessDay(1)
252+
indexer = NonFixedVariableWindowIndexer(index=index, offset=offset)
253+
result = df.rolling(indexer, closed=closed).sum()
254+
expected = DataFrame(expected_data, index=index)
255+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)