Skip to content

Commit ccc94a9

Browse files
committed
ENH: Rolling window with step size (pandas-devGH-15354)
1 parent c64fbce commit ccc94a9

File tree

8 files changed

+294
-96
lines changed

8 files changed

+294
-96
lines changed

pandas/_libs/window/indexers.pyi

+2-1
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,6 @@ def calculate_variable_window_bounds(
88
min_periods,
99
center: bool,
1010
closed: str | None,
11+
step: int | None,
1112
index: np.ndarray, # const int64_t[:]
12-
) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]: ...
13+
) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64], npt.NDArray[np.int64]]: ...

pandas/_libs/window/indexers.pyx

+9-3
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ def calculate_variable_window_bounds(
1616
object min_periods, # unused but here to match get_window_bounds signature
1717
bint center,
1818
str closed,
19+
int64_t step,
1920
const int64_t[:] index
2021
):
2122
"""
@@ -38,17 +39,20 @@ def calculate_variable_window_bounds(
3839
closed : str
3940
string of side of the window that should be closed
4041
42+
step : int64
43+
Spacing between windows
44+
4145
index : ndarray[int64]
4246
time series index to roll over
4347
4448
Returns
4549
-------
46-
(ndarray[int64], ndarray[int64])
50+
(ndarray[int64], ndarray[int64], ndarray[int64])
4751
"""
4852
cdef:
4953
bint left_closed = False
5054
bint right_closed = False
51-
ndarray[int64_t, ndim=1] start, end
55+
ndarray[int64_t, ndim=1] start, end, ref
5256
int64_t start_bound, end_bound, index_growth_sign = 1
5357
Py_ssize_t i, j
5458

@@ -143,4 +147,6 @@ def calculate_variable_window_bounds(
143147
# right endpoint is open
144148
if not right_closed and not center:
145149
end[i] -= 1
146-
return start, end
150+
ref = (None if step is None or step == 1
151+
else np.arange(0, num_values, step, dtype='int64'))
152+
return start[::step], end[::step], ref

pandas/core/generic.py

+3
Original file line numberDiff line numberDiff line change
@@ -11263,6 +11263,7 @@ def rolling(
1126311263
on: str | None = None,
1126411264
axis: Axis = 0,
1126511265
closed: str | None = None,
11266+
step: int | None = None,
1126611267
method: str = "single",
1126711268
):
1126811269
axis = self._get_axis_number(axis)
@@ -11277,6 +11278,7 @@ def rolling(
1127711278
on=on,
1127811279
axis=axis,
1127911280
closed=closed,
11281+
step=step,
1128011282
method=method,
1128111283
)
1128211284

@@ -11289,6 +11291,7 @@ def rolling(
1128911291
on=on,
1129011292
axis=axis,
1129111293
closed=closed,
11294+
step=step,
1129211295
method=method,
1129311296
)
1129411297

pandas/core/indexers/objects.py

+114-32
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,17 @@
2727
center passed from the top level rolling API
2828
closed : str, default None
2929
closed passed from the top level rolling API
30+
step : int, default None
31+
step passed from the top level rolling API
3032
win_type : str, default None
3133
win_type passed from the top level rolling API
3234
3335
Returns
3436
-------
35-
A tuple of ndarray[int64]s, indicating the boundaries of each
36-
window
37+
A tuple of ndarray[int64]s:
38+
start : array of start boundaries
39+
end : array of end boundaries
40+
ref : array of window reference locations, or None indicating all if step is None or 1
3741
"""
3842

3943

@@ -55,6 +59,16 @@ def __init__(
5559
for key, value in kwargs.items():
5660
setattr(self, key, value)
5761

62+
def _get_default_ref(self, num_values: int = 0, step: int | None = None):
63+
"""
64+
Returns the default window reference locations.
65+
"""
66+
return (
67+
None
68+
if step is None or step == 1
69+
else np.arange(0, num_values, step, dtype="int64")
70+
)
71+
5872
@Appender(get_window_bounds_doc)
5973
def get_window_bounds(
6074
self,
@@ -66,9 +80,23 @@ def get_window_bounds(
6680

6781
raise NotImplementedError
6882

83+
@Appender(get_window_bounds_doc)
84+
def get_window_bounds2(
85+
self,
86+
num_values: int = 0,
87+
min_periods: int | None = None,
88+
center: bool | None = None,
89+
closed: str | None = None,
90+
step: int | None = None,
91+
) -> tuple[np.ndarray, np.ndarray, np.ndarray | None]:
92+
93+
start, end = self.get_window_bounds(num_values, min_periods, center, closed)
94+
ref = self._get_default_ref(num_values, step)
95+
return start[::step], end[::step], ref
6996

70-
class FixedWindowIndexer(BaseIndexer):
71-
"""Creates window boundaries that are of fixed length."""
97+
98+
class BaseIndexer2(BaseIndexer):
99+
"""Base class for window bounds calculations with step optimization."""
72100

73101
@Appender(get_window_bounds_doc)
74102
def get_window_bounds(
@@ -79,12 +107,43 @@ def get_window_bounds(
79107
closed: str | None = None,
80108
) -> tuple[np.ndarray, np.ndarray]:
81109

110+
start, end, ref = self.get_window_bounds2(
111+
num_values, min_periods, center, closed
112+
)
113+
return start, end
114+
115+
@Appender(get_window_bounds_doc)
116+
def get_window_bounds2(
117+
self,
118+
num_values: int = 0,
119+
min_periods: int | None = None,
120+
center: bool | None = None,
121+
closed: str | None = None,
122+
step: int | None = None,
123+
) -> tuple[np.ndarray, np.ndarray, np.ndarray | None]:
124+
125+
raise NotImplementedError
126+
127+
128+
class FixedWindowIndexer(BaseIndexer2):
129+
"""Creates window boundaries that are of fixed length."""
130+
131+
@Appender(get_window_bounds_doc)
132+
def get_window_bounds2(
133+
self,
134+
num_values: int = 0,
135+
min_periods: int | None = None,
136+
center: bool | None = None,
137+
closed: str | None = None,
138+
step: int | None = None,
139+
) -> tuple[np.ndarray, np.ndarray, np.ndarray | None]:
140+
82141
if center:
83142
offset = (self.window_size - 1) // 2
84143
else:
85144
offset = 0
86145

87-
end = np.arange(1 + offset, num_values + 1 + offset, dtype="int64")
146+
end = np.arange(1 + offset, num_values + 1 + offset, step, dtype="int64")
88147
start = end - self.window_size
89148
if closed in ["left", "both"]:
90149
start -= 1
@@ -94,20 +153,22 @@ def get_window_bounds(
94153
end = np.clip(end, 0, num_values)
95154
start = np.clip(start, 0, num_values)
96155

97-
return start, end
156+
ref = self._get_default_ref(num_values, step)
157+
return start, end, ref
98158

99159

100-
class VariableWindowIndexer(BaseIndexer):
160+
class VariableWindowIndexer(BaseIndexer2):
101161
"""Creates window boundaries that are of variable length, namely for time series."""
102162

103163
@Appender(get_window_bounds_doc)
104-
def get_window_bounds(
164+
def get_window_bounds2(
105165
self,
106166
num_values: int = 0,
107167
min_periods: int | None = None,
108168
center: bool | None = None,
109169
closed: str | None = None,
110-
) -> tuple[np.ndarray, np.ndarray]:
170+
step: int | None = None,
171+
) -> tuple[np.ndarray, np.ndarray, np.ndarray | None]:
111172

112173
# error: Argument 4 to "calculate_variable_window_bounds" has incompatible
113174
# type "Optional[bool]"; expected "bool"
@@ -119,6 +180,7 @@ def get_window_bounds(
119180
min_periods,
120181
center, # type: ignore[arg-type]
121182
closed,
183+
step if step is not None else 1,
122184
self.index_array, # type: ignore[arg-type]
123185
)
124186

@@ -205,25 +267,28 @@ def get_window_bounds(
205267
return start, end
206268

207269

208-
class ExpandingIndexer(BaseIndexer):
270+
class ExpandingIndexer(BaseIndexer2):
209271
"""Calculate expanding window bounds, mimicking df.expanding()"""
210272

211273
@Appender(get_window_bounds_doc)
212-
def get_window_bounds(
274+
def get_window_bounds2(
213275
self,
214276
num_values: int = 0,
215277
min_periods: int | None = None,
216278
center: bool | None = None,
217279
closed: str | None = None,
218-
) -> tuple[np.ndarray, np.ndarray]:
280+
step: int | None = None,
281+
) -> tuple[np.ndarray, np.ndarray, np.ndarray | None]:
219282

220-
return (
221-
np.zeros(num_values, dtype=np.int64),
222-
np.arange(1, num_values + 1, dtype=np.int64),
223-
)
283+
if step is None:
284+
step = 1
285+
end = np.arange(1, num_values + 1, step, dtype=np.int64)
286+
start = np.zeros(len(end), dtype=np.int64)
287+
ref = self._get_default_ref(num_values, step)
288+
return start, end, ref
224289

225290

226-
class FixedForwardWindowIndexer(BaseIndexer):
291+
class FixedForwardWindowIndexer(BaseIndexer2):
227292
"""
228293
Creates window boundaries for fixed-length windows that include the
229294
current row.
@@ -250,30 +315,34 @@ class FixedForwardWindowIndexer(BaseIndexer):
250315
"""
251316

252317
@Appender(get_window_bounds_doc)
253-
def get_window_bounds(
318+
def get_window_bounds2(
254319
self,
255320
num_values: int = 0,
256321
min_periods: int | None = None,
257322
center: bool | None = None,
258323
closed: str | None = None,
259-
) -> tuple[np.ndarray, np.ndarray]:
324+
step: int | None = None,
325+
) -> tuple[np.ndarray, np.ndarray, np.ndarray | None]:
260326

261327
if center:
262328
raise ValueError("Forward-looking windows can't have center=True")
263329
if closed is not None:
264330
raise ValueError(
265331
"Forward-looking windows don't support setting the closed argument"
266332
)
333+
if step is None:
334+
step = 1
267335

268-
start = np.arange(num_values, dtype="int64")
336+
start = np.arange(0, num_values, step, dtype="int64")
269337
end = start + self.window_size
270338
if self.window_size:
271-
end[-self.window_size :] = num_values
339+
end = np.clip(end, 0, num_values)
272340

273-
return start, end
341+
ref = self._get_default_ref(num_values, step)
342+
return start, end, ref
274343

275344

276-
class GroupbyIndexer(BaseIndexer):
345+
class GroupbyIndexer(BaseIndexer2):
277346
"""Calculate bounds to compute groupby rolling, mimicking df.groupby().rolling()"""
278347

279348
def __init__(
@@ -313,18 +382,21 @@ def __init__(
313382
)
314383

315384
@Appender(get_window_bounds_doc)
316-
def get_window_bounds(
385+
def get_window_bounds2(
317386
self,
318387
num_values: int = 0,
319388
min_periods: int | None = None,
320389
center: bool | None = None,
321390
closed: str | None = None,
322-
) -> tuple[np.ndarray, np.ndarray]:
391+
step: int | None = None,
392+
) -> tuple[np.ndarray, np.ndarray, np.ndarray | None]:
323393
# 1) For each group, get the indices that belong to the group
324394
# 2) Use the indices to calculate the start & end bounds of the window
325395
# 3) Append the window bounds in group order
326396
start_arrays = []
327397
end_arrays = []
398+
ref_arrays = []
399+
empty = np.array([], dtype=np.int64)
328400
window_indices_start = 0
329401
for key, indices in self.groupby_indices.items():
330402
index_array: np.ndarray | None
@@ -338,11 +410,12 @@ def get_window_bounds(
338410
window_size=self.window_size,
339411
**self.indexer_kwargs,
340412
)
341-
start, end = indexer.get_window_bounds(
342-
len(indices), min_periods, center, closed
413+
start, end, ref = indexer.get_window_bounds2(
414+
len(indices), min_periods, center, closed, step
343415
)
344416
start = start.astype(np.int64)
345417
end = end.astype(np.int64)
418+
ref = None if ref is None else ref.astype(np.int64)
346419
assert len(start) == len(
347420
end
348421
), "these should be equal in length from get_window_bounds"
@@ -358,21 +431,30 @@ def get_window_bounds(
358431
)
359432
start_arrays.append(window_indices.take(ensure_platform_int(start)))
360433
end_arrays.append(window_indices.take(ensure_platform_int(end)))
434+
ref_arrays.append(
435+
empty if ref is None else window_indices.take(ensure_platform_int(ref))
436+
)
361437
start = np.concatenate(start_arrays)
362438
end = np.concatenate(end_arrays)
363-
return start, end
439+
ref = None if step is None or step == 1 else np.concatenate(ref_arrays)
440+
return start, end, ref
364441

365442

366-
class ExponentialMovingWindowIndexer(BaseIndexer):
443+
class ExponentialMovingWindowIndexer(BaseIndexer2):
367444
"""Calculate ewm window bounds (the entire window)"""
368445

369446
@Appender(get_window_bounds_doc)
370-
def get_window_bounds(
447+
def get_window_bounds2(
371448
self,
372449
num_values: int = 0,
373450
min_periods: int | None = None,
374451
center: bool | None = None,
375452
closed: str | None = None,
376-
) -> tuple[np.ndarray, np.ndarray]:
453+
step: int | None = None,
454+
) -> tuple[np.ndarray, np.ndarray, np.ndarray | None]:
377455

378-
return np.array([0], dtype=np.int64), np.array([num_values], dtype=np.int64)
456+
return (
457+
np.array([0], dtype=np.int64),
458+
np.array([num_values], dtype=np.int64),
459+
None,
460+
)

0 commit comments

Comments
 (0)