Skip to content

ENH: Rolling window with step size (GH-15354) #45765

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Feb 28, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pandas/_libs/window/indexers.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@ def calculate_variable_window_bounds(
min_periods,
center: bool,
closed: str | None,
step: int | None,
index: np.ndarray, # const int64_t[:]
) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]: ...
6 changes: 5 additions & 1 deletion pandas/_libs/window/indexers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def calculate_variable_window_bounds(
object min_periods, # unused but here to match get_window_bounds signature
bint center,
str closed,
int64_t step,
const int64_t[:] index
):
"""
Expand All @@ -38,6 +39,9 @@ def calculate_variable_window_bounds(
closed : str
string of side of the window that should be closed

step : int64
Spacing between windows

index : ndarray[int64]
time series index to roll over

Expand Down Expand Up @@ -143,4 +147,4 @@ def calculate_variable_window_bounds(
# right endpoint is open
if not right_closed and not center:
end[i] -= 1
return start, end
return start[::step], end[::step]
3 changes: 3 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11263,6 +11263,7 @@ def rolling(
on: str | None = None,
axis: Axis = 0,
closed: str | None = None,
step: int | None = None,
method: str = "single",
):
axis = self._get_axis_number(axis)
Expand All @@ -11277,6 +11278,7 @@ def rolling(
on=on,
axis=axis,
closed=closed,
step=step,
method=method,
)

Expand All @@ -11289,6 +11291,7 @@ def rolling(
on=on,
axis=axis,
closed=closed,
step=step,
method=method,
)

Expand Down
49 changes: 40 additions & 9 deletions pandas/core/indexers/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
center passed from the top level rolling API
closed : str, default None
closed passed from the top level rolling API
step : int, default None
step passed from the top level rolling API
win_type : str, default None
win_type passed from the top level rolling API

Expand Down Expand Up @@ -62,6 +64,7 @@ def get_window_bounds(
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:

raise NotImplementedError
Expand All @@ -77,14 +80,15 @@ def get_window_bounds(
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:

if center:
offset = (self.window_size - 1) // 2
else:
offset = 0

end = np.arange(1 + offset, num_values + 1 + offset, dtype="int64")
end = np.arange(1 + offset, num_values + 1 + offset, step, dtype="int64")
start = end - self.window_size
if closed in ["left", "both"]:
start -= 1
Expand All @@ -107,8 +111,12 @@ def get_window_bounds(
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:

if step is not None:
raise NotImplementedError("step not implemented for variable window")

# error: Argument 4 to "calculate_variable_window_bounds" has incompatible
# type "Optional[bool]"; expected "bool"
# error: Argument 6 to "calculate_variable_window_bounds" has incompatible
Expand All @@ -119,6 +127,7 @@ def get_window_bounds(
min_periods,
center, # type: ignore[arg-type]
closed,
1,
self.index_array, # type: ignore[arg-type]
)

Expand All @@ -145,8 +154,12 @@ def get_window_bounds(
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:

if step is not None:
raise NotImplementedError("step not implemented for variable offset window")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tests hit this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, and there are multiple instances of this error in this file that get tested. This is done using @step_not_implemented (defined in pandas/util/_test_decorators.py) on test cases.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't these check be performed in BaseWindow._validate? Ideally e.g. df.rolling("2s" , step=5)/df.ewm(step=5) would raise at the constructor and not necessarily when an aggregation call is made

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah i agree should move this check to be more consistent


# if windows is variable, default is 'right', otherwise default is 'both'
if closed is None:
closed = "right" if self.index is not None else "both"
Expand Down Expand Up @@ -215,12 +228,15 @@ def get_window_bounds(
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:

return (
np.zeros(num_values, dtype=np.int64),
np.arange(1, num_values + 1, dtype=np.int64),
)
if step is not None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here

raise NotImplementedError("step not implemented for expanding window")

end = np.arange(1, num_values + 1, dtype=np.int64)
start = np.zeros(len(end), dtype=np.int64)
return start, end


class FixedForwardWindowIndexer(BaseIndexer):
Expand Down Expand Up @@ -256,6 +272,7 @@ def get_window_bounds(
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:

if center:
Expand All @@ -264,11 +281,13 @@ def get_window_bounds(
raise ValueError(
"Forward-looking windows don't support setting the closed argument"
)
if step is None:
step = 1

start = np.arange(num_values, dtype="int64")
start = np.arange(0, num_values, step, dtype="int64")
end = start + self.window_size
if self.window_size:
end[-self.window_size :] = num_values
end = np.clip(end, 0, num_values)

return start, end

Expand Down Expand Up @@ -319,7 +338,11 @@ def get_window_bounds(
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:
if step is not None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same

raise NotImplementedError("step not implemented for groupby window")

# 1) For each group, get the indices that belong to the group
# 2) Use the indices to calculate the start & end bounds of the window
# 3) Append the window bounds in group order
Expand All @@ -339,7 +362,7 @@ def get_window_bounds(
**self.indexer_kwargs,
)
start, end = indexer.get_window_bounds(
len(indices), min_periods, center, closed
len(indices), min_periods, center, closed, step
)
start = start.astype(np.int64)
end = end.astype(np.int64)
Expand Down Expand Up @@ -373,6 +396,14 @@ def get_window_bounds(
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:

return np.array([0], dtype=np.int64), np.array([num_values], dtype=np.int64)
if step is not None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same

raise NotImplementedError(
"step not implemented for exponentail moving window"
)
return (
np.array([0], dtype=np.int64),
np.array([num_values], dtype=np.int64),
)
11 changes: 8 additions & 3 deletions pandas/core/window/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ def flex_binary_moment(arg1, arg2, f, pairwise=False):
from pandas import DataFrame

def dataframe_from_int_dict(data, frame_template):
result = DataFrame(data, index=frame_template.index)
result = DataFrame(
data, index=None if len(data) > 0 else frame_template.index
)
if len(result.columns) > 0:
result.columns = frame_template.columns[result.columns]
return result
Expand All @@ -42,13 +44,16 @@ def dataframe_from_int_dict(data, frame_template):
raise ValueError("'arg2' columns are not unique")
X, Y = arg1.align(arg2, join="outer")
X, Y = prep_binary(X, Y)
result_index = X.index
res_columns = arg1.columns.union(arg2.columns)
for col in res_columns:
if col in X and col in Y:
results[col] = f(X[col], Y[col])
return DataFrame(results, index=X.index, columns=res_columns)
result_index = results[col].index
return DataFrame(results, index=result_index, columns=res_columns)
elif pairwise is True:
results = defaultdict(dict)
result_index = arg1.index.union(arg2.index)
for i in range(len(arg1.columns)):
for j in range(len(arg2.columns)):
if j < i and arg2 is arg1:
Expand All @@ -58,10 +63,10 @@ def dataframe_from_int_dict(data, frame_template):
results[i][j] = f(
*prep_binary(arg1.iloc[:, i], arg2.iloc[:, j])
)
result_index = results[i][j].index

from pandas import concat

result_index = arg1.index.union(arg2.index)
if len(result_index):

# construct result frame
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/window/ewm.py
Original file line number Diff line number Diff line change
Expand Up @@ -737,6 +737,7 @@ def cov_func(x, y):
min_periods=min_periods,
center=self.center,
closed=self.closed,
step=self.step,
)
result = window_aggregations.ewmcov(
x_array,
Expand Down Expand Up @@ -803,6 +804,7 @@ def cov_func(x, y):
min_periods=min_periods,
center=self.center,
closed=self.closed,
step=self.step,
)

def _cov(X, Y):
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/window/numba_.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,8 +234,8 @@ def roll_table(
minimum_periods: int,
*args: Any,
):
result = np.empty(values.shape)
min_periods_mask = np.empty(values.shape)
result = np.empty((len(begin), values.shape[1]))
min_periods_mask = np.empty(result.shape)
for i in numba.prange(len(result)):
start = begin[i]
stop = end[i]
Expand Down
Loading