Skip to content

Commit e38aab9

Browse files
mroeschkeyehoshuadimarsky
authored andcommitted
CLN: rolling step followups (pandas-dev#46191)
1 parent 9d43a89 commit e38aab9

File tree

7 files changed

+95
-185
lines changed

7 files changed

+95
-185
lines changed

pandas/_libs/window/indexers.pyi

-1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,5 @@ def calculate_variable_window_bounds(
88
min_periods,
99
center: bool,
1010
closed: str | None,
11-
step: int | None,
1211
index: np.ndarray, # const int64_t[:]
1312
) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]: ...

pandas/_libs/window/indexers.pyx

+1-5
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ def calculate_variable_window_bounds(
1616
object min_periods, # unused but here to match get_window_bounds signature
1717
bint center,
1818
str closed,
19-
int64_t step,
2019
const int64_t[:] index
2120
):
2221
"""
@@ -39,9 +38,6 @@ def calculate_variable_window_bounds(
3938
closed : str
4039
string of side of the window that should be closed
4140
42-
step : int64
43-
Spacing between windows
44-
4541
index : ndarray[int64]
4642
time series index to roll over
4743
@@ -150,4 +146,4 @@ def calculate_variable_window_bounds(
150146
# right endpoint is open
151147
if not right_closed and not center:
152148
end[i] -= 1
153-
return start[::step], end[::step]
149+
return start, end

pandas/core/indexers/objects.py

+5-20
Original file line numberDiff line numberDiff line change
@@ -115,9 +115,6 @@ def get_window_bounds(
115115
step: int | None = None,
116116
) -> tuple[np.ndarray, np.ndarray]:
117117

118-
if step is not None:
119-
raise NotImplementedError("step not implemented for variable window")
120-
121118
# error: Argument 4 to "calculate_variable_window_bounds" has incompatible
122119
# type "Optional[bool]"; expected "bool"
123120
# error: Argument 6 to "calculate_variable_window_bounds" has incompatible
@@ -128,7 +125,6 @@ def get_window_bounds(
128125
min_periods,
129126
center, # type: ignore[arg-type]
130127
closed,
131-
1,
132128
self.index_array, # type: ignore[arg-type]
133129
)
134130

@@ -234,12 +230,10 @@ def get_window_bounds(
234230
step: int | None = None,
235231
) -> tuple[np.ndarray, np.ndarray]:
236232

237-
if step is not None:
238-
raise NotImplementedError("step not implemented for expanding window")
239-
240-
end = np.arange(1, num_values + 1, dtype=np.int64)
241-
start = np.zeros(len(end), dtype=np.int64)
242-
return start, end
233+
return (
234+
np.zeros(num_values, dtype=np.int64),
235+
np.arange(1, num_values + 1, dtype=np.int64),
236+
)
243237

244238

245239
class FixedForwardWindowIndexer(BaseIndexer):
@@ -343,8 +337,6 @@ def get_window_bounds(
343337
closed: str | None = None,
344338
step: int | None = None,
345339
) -> tuple[np.ndarray, np.ndarray]:
346-
if step is not None:
347-
raise NotImplementedError("step not implemented for groupby window")
348340

349341
# 1) For each group, get the indices that belong to the group
350342
# 2) Use the indices to calculate the start & end bounds of the window
@@ -404,11 +396,4 @@ def get_window_bounds(
404396
step: int | None = None,
405397
) -> tuple[np.ndarray, np.ndarray]:
406398

407-
if step is not None:
408-
raise NotImplementedError(
409-
"step not implemented for exponentail moving window"
410-
)
411-
return (
412-
np.array([0], dtype=np.int64),
413-
np.array([num_values], dtype=np.int64),
414-
)
399+
return np.array([0], dtype=np.int64), np.array([num_values], dtype=np.int64)

pandas/core/window/rolling.py

+24-36
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,11 @@ def _validate(self) -> None:
229229
)
230230
if self.method not in ["table", "single"]:
231231
raise ValueError("method must be 'table' or 'single")
232+
if self.step is not None:
233+
if not is_integer(self.step):
234+
raise ValueError("step must be an integer")
235+
elif self.step < 0:
236+
raise ValueError("step must be >= 0")
232237

233238
def _check_window_bounds(
234239
self, start: np.ndarray, end: np.ndarray, num_vals: int
@@ -238,16 +243,14 @@ def _check_window_bounds(
238243
f"start ({len(start)}) and end ({len(end)}) bounds must be the "
239244
f"same length"
240245
)
241-
elif not isinstance(self._get_window_indexer(), GroupbyIndexer) and len(
242-
start
243-
) != (num_vals + (self.step or 1) - 1) // (self.step or 1):
246+
elif len(start) != (num_vals + (self.step or 1) - 1) // (self.step or 1):
244247
raise ValueError(
245248
f"start and end bounds ({len(start)}) must be the same length "
246249
f"as the object ({num_vals}) divided by the step ({self.step}) "
247250
f"if given and rounded up"
248251
)
249252

250-
def _slice_index(self, index: Index, result: Sized | None = None) -> Index:
253+
def _slice_axis_for_step(self, index: Index, result: Sized | None = None) -> Index:
251254
"""
252255
Slices the index for a given result and the preset step.
253256
"""
@@ -446,7 +449,7 @@ def _apply_series(
446449
raise DataError("No numeric types to aggregate") from err
447450

448451
result = homogeneous_func(values)
449-
index = self._slice_index(obj.index, result)
452+
index = self._slice_axis_for_step(obj.index, result)
450453
return obj._constructor(result, index=index, name=obj.name)
451454

452455
def _apply_blockwise(
@@ -484,7 +487,7 @@ def hfunc(values: ArrayLike) -> ArrayLike:
484487
res_values.append(res)
485488
taker.append(i)
486489

487-
index = self._slice_index(
490+
index = self._slice_axis_for_step(
488491
obj.index, res_values[0] if len(res_values) > 0 else None
489492
)
490493
df = type(obj)._from_arrays(
@@ -524,7 +527,7 @@ def _apply_tablewise(
524527
values = values.T if self.axis == 1 else values
525528
result = homogeneous_func(values)
526529
result = result.T if self.axis == 1 else result
527-
index = self._slice_index(obj.index, result)
530+
index = self._slice_axis_for_step(obj.index, result)
528531
columns = (
529532
obj.columns
530533
if result.shape[1] == len(obj.columns)
@@ -644,13 +647,13 @@ def _numba_apply(
644647
)
645648
result = aggregator(values, start, end, min_periods, *func_args)
646649
result = result.T if self.axis == 1 else result
647-
index = self._slice_index(obj.index, result)
650+
index = self._slice_axis_for_step(obj.index, result)
648651
if obj.ndim == 1:
649652
result = result.squeeze()
650653
out = obj._constructor(result, index=index, name=obj.name)
651654
return out
652655
else:
653-
columns = self._slice_index(obj.columns, result.T)
656+
columns = self._slice_axis_for_step(obj.columns, result.T)
654657
out = obj._constructor(result, index=index, columns=columns)
655658
return self._resolve_output(out, obj)
656659

@@ -692,7 +695,7 @@ def __init__(
692695
obj = obj.drop(columns=self._grouper.names, errors="ignore")
693696
# GH 15354
694697
if kwargs.get("step") is not None:
695-
raise NotImplementedError("step not implemented for rolling groupby")
698+
raise NotImplementedError("step not implemented for groupby")
696699
super().__init__(obj, *args, **kwargs)
697700

698701
def _apply(
@@ -938,14 +941,12 @@ class Window(BaseWindow):
938941
The closed parameter with fixed windows is now supported.
939942
940943
step : int, default None
941-
When supported, applies ``[::step]`` to the resulting sequence of windows, in a
942-
computationally efficient manner. Currently supported only with fixed-length
943-
window indexers. Note that using a step argument other than None or 1 will
944-
produce a result with a different shape than the input.
945944
946-
..versionadded:: 1.5
945+
..versionadded:: 1.5.0
947946
948-
The step parameter is only supported with fixed windows.
947+
Evaluate the window at every ``step`` result, equivalent to slicing as
948+
``[::step]``. ``window`` must be an integer. Using a step argument other
949+
than None or 1 will produce a result with a different shape than the input.
949950
950951
method : str {'single', 'table'}, default 'single'
951952
@@ -1605,9 +1606,7 @@ def cov(
16051606
**kwargs,
16061607
):
16071608
if self.step is not None:
1608-
raise NotImplementedError(
1609-
"step not implemented for rolling and expanding cov"
1610-
)
1609+
raise NotImplementedError("step not implemented for cov")
16111610

16121611
from pandas import Series
16131612

@@ -1650,11 +1649,8 @@ def corr(
16501649
ddof: int = 1,
16511650
**kwargs,
16521651
):
1653-
16541652
if self.step is not None:
1655-
raise NotImplementedError(
1656-
"step not implemented for rolling and expanding corr"
1657-
)
1653+
raise NotImplementedError("step not implemented for corr")
16581654

16591655
from pandas import Series
16601656

@@ -1749,24 +1745,16 @@ def _validate(self):
17491745
if self.min_periods is None:
17501746
self.min_periods = 1
17511747

1748+
if self.step is not None:
1749+
raise NotImplementedError(
1750+
"step is not supported with frequency windows"
1751+
)
1752+
17521753
elif isinstance(self.window, BaseIndexer):
17531754
# Passed BaseIndexer subclass should handle all other rolling kwargs
17541755
pass
17551756
elif not is_integer(self.window) or self.window < 0:
17561757
raise ValueError("window must be an integer 0 or greater")
1757-
# GH 15354:
1758-
# validate window indexer parameters do not raise in get_window_bounds
1759-
# this cannot be done in BaseWindow._validate because there _get_window_indexer
1760-
# would erroneously create a fixed window given a window argument like "1s" due
1761-
# to _win_freq_i8 not being set
1762-
indexer = self._get_window_indexer()
1763-
indexer.get_window_bounds(
1764-
num_values=0,
1765-
min_periods=self.min_periods,
1766-
center=self.center,
1767-
closed=self.closed,
1768-
step=self.step,
1769-
)
17701758

17711759
def _validate_datetimelike_monotonic(self):
17721760
"""

pandas/tests/window/test_apply.py

+13-22
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,7 @@ def test_rolling_apply_out_of_bounds(engine_and_raw):
5252

5353

5454
@pytest.mark.parametrize("window", [2, "2s"])
55-
@pytest.mark.parametrize("step", [None])
56-
def test_rolling_apply_with_pandas_objects(window, step):
55+
def test_rolling_apply_with_pandas_objects(window):
5756
# 5071
5857
df = DataFrame(
5958
{"A": np.random.randn(5), "B": np.random.randint(0, 10, size=5)},
@@ -67,8 +66,8 @@ def f(x):
6766
return np.nan
6867
return x.iloc[-1]
6968

70-
result = df.rolling(window, step=step).apply(f, raw=False)
71-
expected = df.iloc[2:].reindex_like(df)[::step]
69+
result = df.rolling(window).apply(f, raw=False)
70+
expected = df.iloc[2:].reindex_like(df)
7271
tm.assert_frame_equal(result, expected)
7372

7473
with tm.external_error_raised(AttributeError):
@@ -96,8 +95,7 @@ def test_rolling_apply(engine_and_raw, step):
9695
tm.assert_series_equal(result, expected)
9796

9897

99-
@pytest.mark.parametrize("step", [None])
100-
def test_all_apply(engine_and_raw, step):
98+
def test_all_apply(engine_and_raw):
10199
engine, raw = engine_and_raw
102100

103101
df = (
@@ -106,16 +104,15 @@ def test_all_apply(engine_and_raw, step):
106104
).set_index("A")
107105
* 2
108106
)
109-
er = df.rolling(window=1, step=step)
110-
r = df.rolling(window="1s", step=step)
107+
er = df.rolling(window=1)
108+
r = df.rolling(window="1s")
111109

112110
result = r.apply(lambda x: 1, engine=engine, raw=raw)
113111
expected = er.apply(lambda x: 1, engine=engine, raw=raw)
114112
tm.assert_frame_equal(result, expected)
115113

116114

117-
@pytest.mark.parametrize("step", [None])
118-
def test_ragged_apply(engine_and_raw, step):
115+
def test_ragged_apply(engine_and_raw):
119116
engine, raw = engine_and_raw
120117

121118
df = DataFrame({"B": range(5)})
@@ -128,24 +125,18 @@ def test_ragged_apply(engine_and_raw, step):
128125
]
129126

130127
f = lambda x: 1
131-
result = df.rolling(window="1s", min_periods=1, step=step).apply(
132-
f, engine=engine, raw=raw
133-
)
134-
expected = df.copy()[::step]
128+
result = df.rolling(window="1s", min_periods=1).apply(f, engine=engine, raw=raw)
129+
expected = df.copy()
135130
expected["B"] = 1.0
136131
tm.assert_frame_equal(result, expected)
137132

138-
result = df.rolling(window="2s", min_periods=1, step=step).apply(
139-
f, engine=engine, raw=raw
140-
)
141-
expected = df.copy()[::step]
133+
result = df.rolling(window="2s", min_periods=1).apply(f, engine=engine, raw=raw)
134+
expected = df.copy()
142135
expected["B"] = 1.0
143136
tm.assert_frame_equal(result, expected)
144137

145-
result = df.rolling(window="5s", min_periods=1, step=step).apply(
146-
f, engine=engine, raw=raw
147-
)
148-
expected = df.copy()[::step]
138+
result = df.rolling(window="5s", min_periods=1).apply(f, engine=engine, raw=raw)
139+
expected = df.copy()
149140
expected["B"] = 1.0
150141
tm.assert_frame_equal(result, expected)
151142

pandas/tests/window/test_base_indexer.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -259,14 +259,13 @@ def test_rolling_forward_cov_corr(func, expected):
259259
["left", [0.0, 0.0, 1.0, 2.0, 5.0, 9.0, 5.0, 6.0, 7.0, 8.0]],
260260
],
261261
)
262-
@pytest.mark.parametrize("step", [None])
263-
def test_non_fixed_variable_window_indexer(closed, expected_data, step):
262+
def test_non_fixed_variable_window_indexer(closed, expected_data):
264263
index = date_range("2020", periods=10)
265264
df = DataFrame(range(10), index=index)
266265
offset = BusinessDay(1)
267266
indexer = VariableOffsetWindowIndexer(index=index, offset=offset)
268-
result = df.rolling(indexer, closed=closed, step=step).sum()
269-
expected = DataFrame(expected_data, index=index)[::step]
267+
result = df.rolling(indexer, closed=closed).sum()
268+
expected = DataFrame(expected_data, index=index)
270269
tm.assert_frame_equal(result, expected)
271270

272271

0 commit comments

Comments
 (0)