27
27
center passed from the top level rolling API
28
28
closed : str, default None
29
29
closed passed from the top level rolling API
30
+ step : int, default None
31
+ step passed from the top level rolling API
30
32
win_type : str, default None
31
33
win_type passed from the top level rolling API
32
34
33
35
Returns
34
36
-------
35
- A tuple of ndarray[int64]s, indicating the boundaries of each
36
- window
37
+ A tuple of ndarray[int64]s:
38
+ start : array of start boundaries
39
+ end : array of end boundaries
40
+ ref : array of window reference locations, or None indicating all if step is None or 1
37
41
"""
38
42
39
43
@@ -55,6 +59,16 @@ def __init__(
55
59
for key , value in kwargs .items ():
56
60
setattr (self , key , value )
57
61
62
+ def _get_default_ref (self , num_values : int = 0 , step : int | None = None ):
63
+ """
64
+ Returns the default window reference locations.
65
+ """
66
+ return (
67
+ None
68
+ if step is None or step == 1
69
+ else np .arange (0 , num_values , step , dtype = "int64" )
70
+ )
71
+
58
72
@Appender (get_window_bounds_doc )
59
73
def get_window_bounds (
60
74
self ,
@@ -66,9 +80,23 @@ def get_window_bounds(
66
80
67
81
raise NotImplementedError
68
82
83
+ @Appender (get_window_bounds_doc )
84
+ def get_window_bounds2 (
85
+ self ,
86
+ num_values : int = 0 ,
87
+ min_periods : int | None = None ,
88
+ center : bool | None = None ,
89
+ closed : str | None = None ,
90
+ step : int | None = None ,
91
+ ) -> tuple [np .ndarray , np .ndarray , np .ndarray | None ]:
92
+
93
+ start , end = self .get_window_bounds (num_values , min_periods , center , closed )
94
+ ref = self ._get_default_ref (num_values , step )
95
+ return start [::step ], end [::step ], ref
69
96
70
- class FixedWindowIndexer (BaseIndexer ):
71
- """Creates window boundaries that are of fixed length."""
97
+
98
+ class BaseIndexer2 (BaseIndexer ):
99
+ """Base class for window bounds calculations with step optimization."""
72
100
73
101
@Appender (get_window_bounds_doc )
74
102
def get_window_bounds (
@@ -79,12 +107,43 @@ def get_window_bounds(
79
107
closed : str | None = None ,
80
108
) -> tuple [np .ndarray , np .ndarray ]:
81
109
110
+ start , end , ref = self .get_window_bounds2 (
111
+ num_values , min_periods , center , closed
112
+ )
113
+ return start , end
114
+
115
+ @Appender (get_window_bounds_doc )
116
+ def get_window_bounds2 (
117
+ self ,
118
+ num_values : int = 0 ,
119
+ min_periods : int | None = None ,
120
+ center : bool | None = None ,
121
+ closed : str | None = None ,
122
+ step : int | None = None ,
123
+ ) -> tuple [np .ndarray , np .ndarray , np .ndarray | None ]:
124
+
125
+ raise NotImplementedError
126
+
127
+
128
+ class FixedWindowIndexer (BaseIndexer2 ):
129
+ """Creates window boundaries that are of fixed length."""
130
+
131
+ @Appender (get_window_bounds_doc )
132
+ def get_window_bounds2 (
133
+ self ,
134
+ num_values : int = 0 ,
135
+ min_periods : int | None = None ,
136
+ center : bool | None = None ,
137
+ closed : str | None = None ,
138
+ step : int | None = None ,
139
+ ) -> tuple [np .ndarray , np .ndarray , np .ndarray | None ]:
140
+
82
141
if center :
83
142
offset = (self .window_size - 1 ) // 2
84
143
else :
85
144
offset = 0
86
145
87
- end = np .arange (1 + offset , num_values + 1 + offset , dtype = "int64" )
146
+ end = np .arange (1 + offset , num_values + 1 + offset , step , dtype = "int64" )
88
147
start = end - self .window_size
89
148
if closed in ["left" , "both" ]:
90
149
start -= 1
@@ -94,20 +153,22 @@ def get_window_bounds(
94
153
end = np .clip (end , 0 , num_values )
95
154
start = np .clip (start , 0 , num_values )
96
155
97
- return start , end
156
+ ref = self ._get_default_ref (num_values , step )
157
+ return start , end , ref
98
158
99
159
100
- class VariableWindowIndexer (BaseIndexer ):
160
+ class VariableWindowIndexer (BaseIndexer2 ):
101
161
"""Creates window boundaries that are of variable length, namely for time series."""
102
162
103
163
@Appender (get_window_bounds_doc )
104
- def get_window_bounds (
164
+ def get_window_bounds2 (
105
165
self ,
106
166
num_values : int = 0 ,
107
167
min_periods : int | None = None ,
108
168
center : bool | None = None ,
109
169
closed : str | None = None ,
110
- ) -> tuple [np .ndarray , np .ndarray ]:
170
+ step : int | None = None ,
171
+ ) -> tuple [np .ndarray , np .ndarray , np .ndarray | None ]:
111
172
112
173
# error: Argument 4 to "calculate_variable_window_bounds" has incompatible
113
174
# type "Optional[bool]"; expected "bool"
@@ -119,6 +180,7 @@ def get_window_bounds(
119
180
min_periods ,
120
181
center , # type: ignore[arg-type]
121
182
closed ,
183
+ step if step is not None else 1 ,
122
184
self .index_array , # type: ignore[arg-type]
123
185
)
124
186
@@ -205,25 +267,28 @@ def get_window_bounds(
205
267
return start , end
206
268
207
269
208
- class ExpandingIndexer (BaseIndexer ):
270
+ class ExpandingIndexer (BaseIndexer2 ):
209
271
"""Calculate expanding window bounds, mimicking df.expanding()"""
210
272
211
273
@Appender (get_window_bounds_doc )
212
- def get_window_bounds (
274
+ def get_window_bounds2 (
213
275
self ,
214
276
num_values : int = 0 ,
215
277
min_periods : int | None = None ,
216
278
center : bool | None = None ,
217
279
closed : str | None = None ,
218
- ) -> tuple [np .ndarray , np .ndarray ]:
280
+ step : int | None = None ,
281
+ ) -> tuple [np .ndarray , np .ndarray , np .ndarray | None ]:
219
282
220
- return (
221
- np .zeros (num_values , dtype = np .int64 ),
222
- np .arange (1 , num_values + 1 , dtype = np .int64 ),
223
- )
283
+ if step is None :
284
+ step = 1
285
+ end = np .arange (1 , num_values + 1 , step , dtype = np .int64 )
286
+ start = np .zeros (len (end ), dtype = np .int64 )
287
+ ref = self ._get_default_ref (num_values , step )
288
+ return start , end , ref
224
289
225
290
226
- class FixedForwardWindowIndexer (BaseIndexer ):
291
+ class FixedForwardWindowIndexer (BaseIndexer2 ):
227
292
"""
228
293
Creates window boundaries for fixed-length windows that include the
229
294
current row.
@@ -250,30 +315,34 @@ class FixedForwardWindowIndexer(BaseIndexer):
250
315
"""
251
316
252
317
@Appender (get_window_bounds_doc )
253
- def get_window_bounds (
318
+ def get_window_bounds2 (
254
319
self ,
255
320
num_values : int = 0 ,
256
321
min_periods : int | None = None ,
257
322
center : bool | None = None ,
258
323
closed : str | None = None ,
259
- ) -> tuple [np .ndarray , np .ndarray ]:
324
+ step : int | None = None ,
325
+ ) -> tuple [np .ndarray , np .ndarray , np .ndarray | None ]:
260
326
261
327
if center :
262
328
raise ValueError ("Forward-looking windows can't have center=True" )
263
329
if closed is not None :
264
330
raise ValueError (
265
331
"Forward-looking windows don't support setting the closed argument"
266
332
)
333
+ if step is None :
334
+ step = 1
267
335
268
- start = np .arange (num_values , dtype = "int64" )
336
+ start = np .arange (0 , num_values , step , dtype = "int64" )
269
337
end = start + self .window_size
270
338
if self .window_size :
271
- end [ - self . window_size :] = num_values
339
+ end = np . clip ( end , 0 , num_values )
272
340
273
- return start , end
341
+ ref = self ._get_default_ref (num_values , step )
342
+ return start , end , ref
274
343
275
344
276
- class GroupbyIndexer (BaseIndexer ):
345
+ class GroupbyIndexer (BaseIndexer2 ):
277
346
"""Calculate bounds to compute groupby rolling, mimicking df.groupby().rolling()"""
278
347
279
348
def __init__ (
@@ -313,18 +382,21 @@ def __init__(
313
382
)
314
383
315
384
@Appender (get_window_bounds_doc )
316
- def get_window_bounds (
385
+ def get_window_bounds2 (
317
386
self ,
318
387
num_values : int = 0 ,
319
388
min_periods : int | None = None ,
320
389
center : bool | None = None ,
321
390
closed : str | None = None ,
322
- ) -> tuple [np .ndarray , np .ndarray ]:
391
+ step : int | None = None ,
392
+ ) -> tuple [np .ndarray , np .ndarray , np .ndarray | None ]:
323
393
# 1) For each group, get the indices that belong to the group
324
394
# 2) Use the indices to calculate the start & end bounds of the window
325
395
# 3) Append the window bounds in group order
326
396
start_arrays = []
327
397
end_arrays = []
398
+ ref_arrays = []
399
+ empty = np .array ([], dtype = np .int64 )
328
400
window_indices_start = 0
329
401
for key , indices in self .groupby_indices .items ():
330
402
index_array : np .ndarray | None
@@ -338,11 +410,12 @@ def get_window_bounds(
338
410
window_size = self .window_size ,
339
411
** self .indexer_kwargs ,
340
412
)
341
- start , end = indexer .get_window_bounds (
342
- len (indices ), min_periods , center , closed
413
+ start , end , ref = indexer .get_window_bounds2 (
414
+ len (indices ), min_periods , center , closed , step
343
415
)
344
416
start = start .astype (np .int64 )
345
417
end = end .astype (np .int64 )
418
+ ref = None if ref is None else ref .astype (np .int64 )
346
419
assert len (start ) == len (
347
420
end
348
421
), "these should be equal in length from get_window_bounds"
@@ -358,21 +431,30 @@ def get_window_bounds(
358
431
)
359
432
start_arrays .append (window_indices .take (ensure_platform_int (start )))
360
433
end_arrays .append (window_indices .take (ensure_platform_int (end )))
434
+ ref_arrays .append (
435
+ empty if ref is None else window_indices .take (ensure_platform_int (ref ))
436
+ )
361
437
start = np .concatenate (start_arrays )
362
438
end = np .concatenate (end_arrays )
363
- return start , end
439
+ ref = None if step is None or step == 1 else np .concatenate (ref_arrays )
440
+ return start , end , ref
364
441
365
442
366
- class ExponentialMovingWindowIndexer (BaseIndexer ):
443
+ class ExponentialMovingWindowIndexer (BaseIndexer2 ):
367
444
"""Calculate ewm window bounds (the entire window)"""
368
445
369
446
@Appender (get_window_bounds_doc )
370
- def get_window_bounds (
447
+ def get_window_bounds2 (
371
448
self ,
372
449
num_values : int = 0 ,
373
450
min_periods : int | None = None ,
374
451
center : bool | None = None ,
375
452
closed : str | None = None ,
376
- ) -> tuple [np .ndarray , np .ndarray ]:
453
+ step : int | None = None ,
454
+ ) -> tuple [np .ndarray , np .ndarray , np .ndarray | None ]:
377
455
378
- return np .array ([0 ], dtype = np .int64 ), np .array ([num_values ], dtype = np .int64 )
456
+ return (
457
+ np .array ([0 ], dtype = np .int64 ),
458
+ np .array ([num_values ], dtype = np .int64 ),
459
+ None ,
460
+ )
0 commit comments