Skip to content

Commit 6caefb1

Browse files
authored
ENH: Rolling window with step size (GH-15354) (#45765)
1 parent 21a3b2f commit 6caefb1

20 files changed

+562
-262
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ Other enhancements
3838
- :meth:`to_numeric` now preserves float64 arrays when downcasting would generate values not representable in float32 (:issue:`43693`)
3939
- :meth:`Series.reset_index` and :meth:`DataFrame.reset_index` now support the argument ``allow_duplicates`` (:issue:`44410`)
4040
- :meth:`.GroupBy.min` and :meth:`.GroupBy.max` now supports `Numba <https://numba.pydata.org/>`_ execution with the ``engine`` keyword (:issue:`45428`)
41+
- :meth:`DataFrame.rolling` and :meth:`Series.rolling` now support a ``step`` parameter with fixed-length windows (:issue:`15354`)
4142
- Implemented a ``bool``-dtype :class:`Index`, passing a bool-dtype array-like to ``pd.Index`` will now retain ``bool`` dtype instead of casting to ``object`` (:issue:`45061`)
4243
- Implemented a complex-dtype :class:`Index`, passing a complex-dtype array-like to ``pd.Index`` will now retain complex dtype instead of casting to ``object`` (:issue:`45845`)
4344
- Improved error message in :class:`~pandas.core.window.Rolling` when ``window`` is a frequency and ``NaT`` is in the rolling axis (:issue:`46087`)

pandas/_libs/window/indexers.pyi

+1
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,6 @@ def calculate_variable_window_bounds(
88
min_periods,
99
center: bool,
1010
closed: str | None,
11+
step: int | None,
1112
index: np.ndarray, # const int64_t[:]
1213
) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]: ...

pandas/_libs/window/indexers.pyx

+8-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ def calculate_variable_window_bounds(
1616
object min_periods, # unused but here to match get_window_bounds signature
1717
bint center,
1818
str closed,
19+
int64_t step,
1920
const int64_t[:] index
2021
):
2122
"""
@@ -38,6 +39,9 @@ def calculate_variable_window_bounds(
3839
closed : str
3940
string of side of the window that should be closed
4041
42+
step : int64
43+
Spacing between windows
44+
4145
index : ndarray[int64]
4246
time series index to roll over
4347
@@ -52,6 +56,9 @@ def calculate_variable_window_bounds(
5256
int64_t start_bound, end_bound, index_growth_sign = 1
5357
Py_ssize_t i, j
5458

59+
if num_values <= 0:
60+
return np.empty(0, dtype='int64'), np.empty(0, dtype='int64')
61+
5562
# default is 'right'
5663
if closed is None:
5764
closed = 'right'
@@ -143,4 +150,4 @@ def calculate_variable_window_bounds(
143150
# right endpoint is open
144151
if not right_closed and not center:
145152
end[i] -= 1
146-
return start, end
153+
return start[::step], end[::step]

pandas/core/generic.py

+3
Original file line numberDiff line numberDiff line change
@@ -11261,6 +11261,7 @@ def rolling(
1126111261
on: str | None = None,
1126211262
axis: Axis = 0,
1126311263
closed: str | None = None,
11264+
step: int | None = None,
1126411265
method: str = "single",
1126511266
):
1126611267
axis = self._get_axis_number(axis)
@@ -11275,6 +11276,7 @@ def rolling(
1127511276
on=on,
1127611277
axis=axis,
1127711278
closed=closed,
11279+
step=step,
1127811280
method=method,
1127911281
)
1128011282

@@ -11287,6 +11289,7 @@ def rolling(
1128711289
on=on,
1128811290
axis=axis,
1128911291
closed=closed,
11292+
step=step,
1129011293
method=method,
1129111294
)
1129211295

pandas/core/indexers/objects.py

+45-9
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@
2727
center passed from the top level rolling API
2828
closed : str, default None
2929
closed passed from the top level rolling API
30+
step : int, default None
31+
step passed from the top level rolling API
32+
.. versionadded:: 1.5
3033
win_type : str, default None
3134
win_type passed from the top level rolling API
3235
@@ -62,6 +65,7 @@ def get_window_bounds(
6265
min_periods: int | None = None,
6366
center: bool | None = None,
6467
closed: str | None = None,
68+
step: int | None = None,
6569
) -> tuple[np.ndarray, np.ndarray]:
6670

6771
raise NotImplementedError
@@ -77,14 +81,15 @@ def get_window_bounds(
7781
min_periods: int | None = None,
7882
center: bool | None = None,
7983
closed: str | None = None,
84+
step: int | None = None,
8085
) -> tuple[np.ndarray, np.ndarray]:
8186

8287
if center:
8388
offset = (self.window_size - 1) // 2
8489
else:
8590
offset = 0
8691

87-
end = np.arange(1 + offset, num_values + 1 + offset, dtype="int64")
92+
end = np.arange(1 + offset, num_values + 1 + offset, step, dtype="int64")
8893
start = end - self.window_size
8994
if closed in ["left", "both"]:
9095
start -= 1
@@ -107,8 +112,12 @@ def get_window_bounds(
107112
min_periods: int | None = None,
108113
center: bool | None = None,
109114
closed: str | None = None,
115+
step: int | None = None,
110116
) -> tuple[np.ndarray, np.ndarray]:
111117

118+
if step is not None:
119+
raise NotImplementedError("step not implemented for variable window")
120+
112121
# error: Argument 4 to "calculate_variable_window_bounds" has incompatible
113122
# type "Optional[bool]"; expected "bool"
114123
# error: Argument 6 to "calculate_variable_window_bounds" has incompatible
@@ -119,6 +128,7 @@ def get_window_bounds(
119128
min_periods,
120129
center, # type: ignore[arg-type]
121130
closed,
131+
1,
122132
self.index_array, # type: ignore[arg-type]
123133
)
124134

@@ -145,8 +155,14 @@ def get_window_bounds(
145155
min_periods: int | None = None,
146156
center: bool | None = None,
147157
closed: str | None = None,
158+
step: int | None = None,
148159
) -> tuple[np.ndarray, np.ndarray]:
149160

161+
if step is not None:
162+
raise NotImplementedError("step not implemented for variable offset window")
163+
if num_values <= 0:
164+
return np.empty(0, dtype="int64"), np.empty(0, dtype="int64")
165+
150166
# if windows is variable, default is 'right', otherwise default is 'both'
151167
if closed is None:
152168
closed = "right" if self.index is not None else "both"
@@ -215,12 +231,15 @@ def get_window_bounds(
215231
min_periods: int | None = None,
216232
center: bool | None = None,
217233
closed: str | None = None,
234+
step: int | None = None,
218235
) -> tuple[np.ndarray, np.ndarray]:
219236

220-
return (
221-
np.zeros(num_values, dtype=np.int64),
222-
np.arange(1, num_values + 1, dtype=np.int64),
223-
)
237+
if step is not None:
238+
raise NotImplementedError("step not implemented for expanding window")
239+
240+
end = np.arange(1, num_values + 1, dtype=np.int64)
241+
start = np.zeros(len(end), dtype=np.int64)
242+
return start, end
224243

225244

226245
class FixedForwardWindowIndexer(BaseIndexer):
@@ -256,6 +275,7 @@ def get_window_bounds(
256275
min_periods: int | None = None,
257276
center: bool | None = None,
258277
closed: str | None = None,
278+
step: int | None = None,
259279
) -> tuple[np.ndarray, np.ndarray]:
260280

261281
if center:
@@ -264,11 +284,13 @@ def get_window_bounds(
264284
raise ValueError(
265285
"Forward-looking windows don't support setting the closed argument"
266286
)
287+
if step is None:
288+
step = 1
267289

268-
start = np.arange(num_values, dtype="int64")
290+
start = np.arange(0, num_values, step, dtype="int64")
269291
end = start + self.window_size
270292
if self.window_size:
271-
end[-self.window_size :] = num_values
293+
end = np.clip(end, 0, num_values)
272294

273295
return start, end
274296

@@ -319,7 +341,11 @@ def get_window_bounds(
319341
min_periods: int | None = None,
320342
center: bool | None = None,
321343
closed: str | None = None,
344+
step: int | None = None,
322345
) -> tuple[np.ndarray, np.ndarray]:
346+
if step is not None:
347+
raise NotImplementedError("step not implemented for groupby window")
348+
323349
# 1) For each group, get the indices that belong to the group
324350
# 2) Use the indices to calculate the start & end bounds of the window
325351
# 3) Append the window bounds in group order
@@ -339,7 +365,7 @@ def get_window_bounds(
339365
**self.indexer_kwargs,
340366
)
341367
start, end = indexer.get_window_bounds(
342-
len(indices), min_periods, center, closed
368+
len(indices), min_periods, center, closed, step
343369
)
344370
start = start.astype(np.int64)
345371
end = end.astype(np.int64)
@@ -358,6 +384,8 @@ def get_window_bounds(
358384
)
359385
start_arrays.append(window_indices.take(ensure_platform_int(start)))
360386
end_arrays.append(window_indices.take(ensure_platform_int(end)))
387+
if len(start_arrays) == 0:
388+
return np.array([], dtype=np.int64), np.array([], dtype=np.int64)
361389
start = np.concatenate(start_arrays)
362390
end = np.concatenate(end_arrays)
363391
return start, end
@@ -373,6 +401,14 @@ def get_window_bounds(
373401
min_periods: int | None = None,
374402
center: bool | None = None,
375403
closed: str | None = None,
404+
step: int | None = None,
376405
) -> tuple[np.ndarray, np.ndarray]:
377406

378-
return np.array([0], dtype=np.int64), np.array([num_values], dtype=np.int64)
407+
if step is not None:
408+
raise NotImplementedError(
409+
"step not implemented for exponentail moving window"
410+
)
411+
return (
412+
np.array([0], dtype=np.int64),
413+
np.array([num_values], dtype=np.int64),
414+
)

pandas/core/window/ewm.py

+2
Original file line numberDiff line numberDiff line change
@@ -732,6 +732,7 @@ def cov_func(x, y):
732732
min_periods=min_periods,
733733
center=self.center,
734734
closed=self.closed,
735+
step=self.step,
735736
)
736737
result = window_aggregations.ewmcov(
737738
x_array,
@@ -798,6 +799,7 @@ def cov_func(x, y):
798799
min_periods=min_periods,
799800
center=self.center,
800801
closed=self.closed,
802+
step=self.step,
801803
)
802804

803805
def _cov(X, Y):

pandas/core/window/numba_.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -220,8 +220,8 @@ def roll_table(
220220
minimum_periods: int,
221221
*args: Any,
222222
):
223-
result = np.empty(values.shape)
224-
min_periods_mask = np.empty(values.shape)
223+
result = np.empty((len(begin), values.shape[1]))
224+
min_periods_mask = np.empty(result.shape)
225225
for i in numba.prange(len(result)):
226226
start = begin[i]
227227
stop = end[i]

0 commit comments

Comments
 (0)