Skip to content

Commit ff6a898

Browse files
committed
ENH: Rolling window with step size (pandas-devGH-15354)
1 parent c64fbce commit ff6a898

22 files changed

+913
-493
lines changed

pandas/_libs/window/indexers.pyi

+2-1
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,6 @@ def calculate_variable_window_bounds(
88
min_periods,
99
center: bool,
1010
closed: str | None,
11+
step: int | None,
1112
index: np.ndarray, # const int64_t[:]
12-
) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]: ...
13+
) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64], npt.NDArray[np.int64]]: ...

pandas/_libs/window/indexers.pyx

+9-3
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ def calculate_variable_window_bounds(
1616
object min_periods, # unused but here to match get_window_bounds signature
1717
bint center,
1818
str closed,
19+
int64_t step,
1920
const int64_t[:] index
2021
):
2122
"""
@@ -38,17 +39,20 @@ def calculate_variable_window_bounds(
3839
closed : str
3940
string of side of the window that should be closed
4041
42+
step : int64
43+
Spacing between windows
44+
4145
index : ndarray[int64]
4246
time series index to roll over
4347
4448
Returns
4549
-------
46-
(ndarray[int64], ndarray[int64])
50+
(ndarray[int64], ndarray[int64], ndarray[int64])
4751
"""
4852
cdef:
4953
bint left_closed = False
5054
bint right_closed = False
51-
ndarray[int64_t, ndim=1] start, end
55+
ndarray[int64_t, ndim=1] start, end, ref
5256
int64_t start_bound, end_bound, index_growth_sign = 1
5357
Py_ssize_t i, j
5458

@@ -143,4 +147,6 @@ def calculate_variable_window_bounds(
143147
# right endpoint is open
144148
if not right_closed and not center:
145149
end[i] -= 1
146-
return start, end
150+
ref = (None if step is None or step == 1
151+
else np.arange(0, num_values, step, dtype='int64'))
152+
return start[::step], end[::step], ref

pandas/core/generic.py

+3
Original file line numberDiff line numberDiff line change
@@ -11263,6 +11263,7 @@ def rolling(
1126311263
on: str | None = None,
1126411264
axis: Axis = 0,
1126511265
closed: str | None = None,
11266+
step: int | None = None,
1126611267
method: str = "single",
1126711268
):
1126811269
axis = self._get_axis_number(axis)
@@ -11277,6 +11278,7 @@ def rolling(
1127711278
on=on,
1127811279
axis=axis,
1127911280
closed=closed,
11281+
step=step,
1128011282
method=method,
1128111283
)
1128211284

@@ -11289,6 +11291,7 @@ def rolling(
1128911291
on=on,
1129011292
axis=axis,
1129111293
closed=closed,
11294+
step=step,
1129211295
method=method,
1129311296
)
1129411297

pandas/core/indexers/objects.py

+67-24
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,18 @@
2727
center passed from the top level rolling API
2828
closed : str, default None
2929
closed passed from the top level rolling API
30+
step : int, default None
31+
step passed from the top level rolling API
3032
win_type : str, default None
3133
win_type passed from the top level rolling API
3234
3335
Returns
3436
-------
35-
A tuple of ndarray[int64]s, indicating the boundaries of each
36-
window
37+
A tuple of ndarray[int64]s:
38+
start : array of start boundaries
39+
end : array of end boundaries
40+
ref : array of window reference locations, or None indicating all
41+
must be None if step is None or 1
3742
"""
3843

3944

@@ -55,14 +60,25 @@ def __init__(
5560
for key, value in kwargs.items():
5661
setattr(self, key, value)
5762

63+
def _get_default_ref(self, num_values: int = 0, step: int | None = None):
64+
"""
65+
Returns the default window reference locations.
66+
"""
67+
return (
68+
None
69+
if step is None or step == 1
70+
else np.arange(0, num_values, step, dtype="int64")
71+
)
72+
5873
@Appender(get_window_bounds_doc)
5974
def get_window_bounds(
6075
self,
6176
num_values: int = 0,
6277
min_periods: int | None = None,
6378
center: bool | None = None,
6479
closed: str | None = None,
65-
) -> tuple[np.ndarray, np.ndarray]:
80+
step: int | None = None,
81+
) -> tuple[np.ndarray, np.ndarray, np.ndarray | None]:
6682

6783
raise NotImplementedError
6884

@@ -77,14 +93,15 @@ def get_window_bounds(
7793
min_periods: int | None = None,
7894
center: bool | None = None,
7995
closed: str | None = None,
80-
) -> tuple[np.ndarray, np.ndarray]:
96+
step: int | None = None,
97+
) -> tuple[np.ndarray, np.ndarray, np.ndarray | None]:
8198

8299
if center:
83100
offset = (self.window_size - 1) // 2
84101
else:
85102
offset = 0
86103

87-
end = np.arange(1 + offset, num_values + 1 + offset, dtype="int64")
104+
end = np.arange(1 + offset, num_values + 1 + offset, step, dtype="int64")
88105
start = end - self.window_size
89106
if closed in ["left", "both"]:
90107
start -= 1
@@ -94,7 +111,8 @@ def get_window_bounds(
94111
end = np.clip(end, 0, num_values)
95112
start = np.clip(start, 0, num_values)
96113

97-
return start, end
114+
ref = self._get_default_ref(num_values, step)
115+
return start, end, ref
98116

99117

100118
class VariableWindowIndexer(BaseIndexer):
@@ -107,7 +125,8 @@ def get_window_bounds(
107125
min_periods: int | None = None,
108126
center: bool | None = None,
109127
closed: str | None = None,
110-
) -> tuple[np.ndarray, np.ndarray]:
128+
step: int | None = None,
129+
) -> tuple[np.ndarray, np.ndarray, np.ndarray | None]:
111130

112131
# error: Argument 4 to "calculate_variable_window_bounds" has incompatible
113132
# type "Optional[bool]"; expected "bool"
@@ -119,6 +138,7 @@ def get_window_bounds(
119138
min_periods,
120139
center, # type: ignore[arg-type]
121140
closed,
141+
step if step is not None else 1,
122142
self.index_array, # type: ignore[arg-type]
123143
)
124144

@@ -145,11 +165,14 @@ def get_window_bounds(
145165
min_periods: int | None = None,
146166
center: bool | None = None,
147167
closed: str | None = None,
148-
) -> tuple[np.ndarray, np.ndarray]:
168+
step: int | None = None,
169+
) -> tuple[np.ndarray, np.ndarray, np.ndarray | None]:
149170

150171
# if windows is variable, default is 'right', otherwise default is 'both'
151172
if closed is None:
152173
closed = "right" if self.index is not None else "both"
174+
if step is None:
175+
step = 1
153176

154177
right_closed = closed in ["right", "both"]
155178
left_closed = closed in ["left", "both"]
@@ -202,7 +225,8 @@ def get_window_bounds(
202225
if not right_closed:
203226
end[i] -= 1
204227

205-
return start, end
228+
ref = self._get_default_ref(num_values, step)
229+
return start[::step], end[::step], ref
206230

207231

208232
class ExpandingIndexer(BaseIndexer):
@@ -215,12 +239,15 @@ def get_window_bounds(
215239
min_periods: int | None = None,
216240
center: bool | None = None,
217241
closed: str | None = None,
218-
) -> tuple[np.ndarray, np.ndarray]:
242+
step: int | None = None,
243+
) -> tuple[np.ndarray, np.ndarray, np.ndarray | None]:
219244

220-
return (
221-
np.zeros(num_values, dtype=np.int64),
222-
np.arange(1, num_values + 1, dtype=np.int64),
223-
)
245+
if step is None:
246+
step = 1
247+
end = np.arange(1, num_values + 1, step, dtype=np.int64)
248+
start = np.zeros(len(end), dtype=np.int64)
249+
ref = self._get_default_ref(num_values, step)
250+
return start, end, ref
224251

225252

226253
class FixedForwardWindowIndexer(BaseIndexer):
@@ -256,21 +283,25 @@ def get_window_bounds(
256283
min_periods: int | None = None,
257284
center: bool | None = None,
258285
closed: str | None = None,
259-
) -> tuple[np.ndarray, np.ndarray]:
286+
step: int | None = None,
287+
) -> tuple[np.ndarray, np.ndarray, np.ndarray | None]:
260288

261289
if center:
262290
raise ValueError("Forward-looking windows can't have center=True")
263291
if closed is not None:
264292
raise ValueError(
265293
"Forward-looking windows don't support setting the closed argument"
266294
)
295+
if step is None:
296+
step = 1
267297

268-
start = np.arange(num_values, dtype="int64")
298+
start = np.arange(num_values, step=step, dtype="int64")
269299
end = start + self.window_size
270300
if self.window_size:
271-
end[-self.window_size :] = num_values
301+
end = np.clip(end, 0, num_values)
272302

273-
return start, end
303+
ref = self._get_default_ref(num_values, step)
304+
return start, end, ref
274305

275306

276307
class GroupbyIndexer(BaseIndexer):
@@ -319,12 +350,14 @@ def get_window_bounds(
319350
min_periods: int | None = None,
320351
center: bool | None = None,
321352
closed: str | None = None,
322-
) -> tuple[np.ndarray, np.ndarray]:
353+
step: int | None = None,
354+
) -> tuple[np.ndarray, np.ndarray, np.ndarray | None]:
323355
# 1) For each group, get the indices that belong to the group
324356
# 2) Use the indices to calculate the start & end bounds of the window
325357
# 3) Append the window bounds in group order
326358
start_arrays = []
327359
end_arrays = []
360+
ref_arrays = []
328361
window_indices_start = 0
329362
for key, indices in self.groupby_indices.items():
330363
index_array: np.ndarray | None
@@ -338,11 +371,12 @@ def get_window_bounds(
338371
window_size=self.window_size,
339372
**self.indexer_kwargs,
340373
)
341-
start, end = indexer.get_window_bounds(
342-
len(indices), min_periods, center, closed
374+
start, end, ref = indexer.get_window_bounds(
375+
len(indices), min_periods, center, closed, step
343376
)
344377
start = start.astype(np.int64)
345378
end = end.astype(np.int64)
379+
ref = None if ref is None else ref.astype(np.int64)
346380
assert len(start) == len(
347381
end
348382
), "these should be equal in length from get_window_bounds"
@@ -358,9 +392,13 @@ def get_window_bounds(
358392
)
359393
start_arrays.append(window_indices.take(ensure_platform_int(start)))
360394
end_arrays.append(window_indices.take(ensure_platform_int(end)))
395+
ref_arrays.append(
396+
None if ref is None else window_indices.take(ensure_platform_int(ref))
397+
)
361398
start = np.concatenate(start_arrays)
362399
end = np.concatenate(end_arrays)
363-
return start, end
400+
ref = None if step is None or step == 1 else np.concatenate(ref_arrays)
401+
return start, end, ref
364402

365403

366404
class ExponentialMovingWindowIndexer(BaseIndexer):
@@ -373,6 +411,11 @@ def get_window_bounds(
373411
min_periods: int | None = None,
374412
center: bool | None = None,
375413
closed: str | None = None,
376-
) -> tuple[np.ndarray, np.ndarray]:
414+
step: int | None = None,
415+
) -> tuple[np.ndarray, np.ndarray, np.ndarray | None]:
377416

378-
return np.array([0], dtype=np.int64), np.array([num_values], dtype=np.int64)
417+
return (
418+
np.array([0], dtype=np.int64),
419+
np.array([num_values], dtype=np.int64),
420+
None,
421+
)

pandas/core/window/common.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@ def flex_binary_moment(arg1, arg2, f, pairwise=False):
2222
from pandas import DataFrame
2323

2424
def dataframe_from_int_dict(data, frame_template):
25-
result = DataFrame(data, index=frame_template.index)
25+
result = DataFrame(
26+
data, index=None if len(data) > 0 else frame_template.index
27+
)
2628
if len(result.columns) > 0:
2729
result.columns = frame_template.columns[result.columns]
2830
return result
@@ -42,13 +44,16 @@ def dataframe_from_int_dict(data, frame_template):
4244
raise ValueError("'arg2' columns are not unique")
4345
X, Y = arg1.align(arg2, join="outer")
4446
X, Y = prep_binary(X, Y)
47+
result_index = X.index
4548
res_columns = arg1.columns.union(arg2.columns)
4649
for col in res_columns:
4750
if col in X and col in Y:
4851
results[col] = f(X[col], Y[col])
49-
return DataFrame(results, index=X.index, columns=res_columns)
52+
result_index = results[col].index
53+
return DataFrame(results, index=result_index, columns=res_columns)
5054
elif pairwise is True:
5155
results = defaultdict(dict)
56+
result_index = arg1.index.union(arg2.index)
5257
for i in range(len(arg1.columns)):
5358
for j in range(len(arg2.columns)):
5459
if j < i and arg2 is arg1:
@@ -58,10 +63,10 @@ def dataframe_from_int_dict(data, frame_template):
5863
results[i][j] = f(
5964
*prep_binary(arg1.iloc[:, i], arg2.iloc[:, j])
6065
)
66+
result_index = results[i][j].index
6167

6268
from pandas import concat
6369

64-
result_index = arg1.index.union(arg2.index)
6570
if len(result_index):
6671

6772
# construct result frame

pandas/core/window/ewm.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,7 @@ def __init__(
418418
)
419419

420420
def _check_window_bounds(
421-
self, start: np.ndarray, end: np.ndarray, num_vals: int
421+
self, start: np.ndarray, end: np.ndarray, ref: np.ndarray, num_vals: int
422422
) -> None:
423423
# emw algorithms are iterative with each point
424424
# ExponentialMovingWindowIndexer "bounds" are the entire window
@@ -732,11 +732,12 @@ def cov_func(x, y):
732732
if self.min_periods is not None
733733
else window_indexer.window_size
734734
)
735-
start, end = window_indexer.get_window_bounds(
735+
start, end, ref = window_indexer.get_window_bounds(
736736
num_values=len(x_array),
737737
min_periods=min_periods,
738738
center=self.center,
739739
closed=self.closed,
740+
step=self.step,
740741
)
741742
result = window_aggregations.ewmcov(
742743
x_array,
@@ -798,11 +799,12 @@ def cov_func(x, y):
798799
if self.min_periods is not None
799800
else window_indexer.window_size
800801
)
801-
start, end = window_indexer.get_window_bounds(
802+
start, end, ref = window_indexer.get_window_bounds(
802803
num_values=len(x_array),
803804
min_periods=min_periods,
804805
center=self.center,
805806
closed=self.closed,
807+
step=self.step,
806808
)
807809

808810
def _cov(X, Y):

pandas/core/window/numba_.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -234,8 +234,8 @@ def roll_table(
234234
minimum_periods: int,
235235
*args: Any,
236236
):
237-
result = np.empty(values.shape)
238-
min_periods_mask = np.empty(values.shape)
237+
result = np.empty((len(begin), values.shape[1]))
238+
min_periods_mask = np.empty(result.shape)
239239
for i in numba.prange(len(result)):
240240
start = begin[i]
241241
stop = end[i]

0 commit comments

Comments
 (0)