Skip to content

Commit 4521308

Browse files
authored
BUG: support min/max functions for rolling windows with custom BaseIndexer (#33180)
1 parent b712971 commit 4521308

File tree

4 files changed

+66
-51
lines changed

4 files changed

+66
-51
lines changed

doc/source/whatsnew/v1.1.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,8 @@ Other API changes
102102
- Added :meth:`DataFrame.value_counts` (:issue:`5377`)
103103
- :meth:`Groupby.groups` now returns an abbreviated representation when called on large dataframes (:issue:`1135`)
104104
- ``loc`` lookups with an object-dtype :class:`Index` and an integer key will now raise ``KeyError`` instead of ``TypeError`` when key is missing (:issue:`31905`)
105-
- Using a :func:`pandas.api.indexers.BaseIndexer` with ``min``, ``max``, ``std``, ``var``, ``count``, ``skew``, ``cov``, ``corr`` will now raise a ``NotImplementedError`` (:issue:`32865`)
105+
- Using a :func:`pandas.api.indexers.BaseIndexer` with ``std``, ``var``, ``count``, ``skew``, ``cov``, ``corr`` will now raise a ``NotImplementedError`` (:issue:`32865`)
106+
- Using a :func:`pandas.api.indexers.BaseIndexer` with ``min``, ``max`` will now return correct results for any monotonic :func:`pandas.api.indexers.BaseIndexer` descendant (:issue:`32865`)
106107
-
107108

108109
Backwards incompatible API changes

pandas/_libs/window/aggregations.pyx

+31-46
Original file line numberDiff line numberDiff line change
@@ -1051,7 +1051,7 @@ cdef _roll_min_max_variable(ndarray[numeric] values,
10511051
bint is_max):
10521052
cdef:
10531053
numeric ai
1054-
int64_t i, close_offset, curr_win_size
1054+
int64_t i, k, curr_win_size, start
10551055
Py_ssize_t nobs = 0, N = len(values)
10561056
deque Q[int64_t] # min/max always the front
10571057
deque W[int64_t] # track the whole window for nobs compute
@@ -1068,60 +1068,45 @@ cdef _roll_min_max_variable(ndarray[numeric] values,
10681068
# The original impl didn't deal with variable window sizes
10691069
# So the code was optimized for that
10701070

1071-
for i in range(starti[0], endi[0]):
1072-
ai = init_mm(values[i], &nobs, is_max)
1073-
1074-
# Discard previous entries if we find new min or max
1075-
if is_max:
1076-
while not Q.empty() and ((ai >= values[Q.back()]) or
1077-
values[Q.back()] != values[Q.back()]):
1078-
Q.pop_back()
1079-
else:
1080-
while not Q.empty() and ((ai <= values[Q.back()]) or
1081-
values[Q.back()] != values[Q.back()]):
1082-
Q.pop_back()
1083-
Q.push_back(i)
1084-
W.push_back(i)
1085-
1086-
# if right is open then the first window is empty
1087-
close_offset = 0 if endi[0] > starti[0] else 1
10881071
# first window's size
10891072
curr_win_size = endi[0] - starti[0]
1073+
# GH 32865
1074+
# Anchor output index to values index to provide custom
1075+
# BaseIndexer support
1076+
for i in range(N):
10901077

1091-
for i in range(endi[0], endi[N-1]):
1092-
if not Q.empty() and curr_win_size > 0:
1093-
output[i-1+close_offset] = calc_mm(
1094-
minp, nobs, values[Q.front()])
1095-
else:
1096-
output[i-1+close_offset] = NaN
1097-
1098-
ai = init_mm(values[i], &nobs, is_max)
1099-
1100-
# Discard previous entries if we find new min or max
1101-
if is_max:
1102-
while not Q.empty() and ((ai >= values[Q.back()]) or
1103-
values[Q.back()] != values[Q.back()]):
1104-
Q.pop_back()
1078+
curr_win_size = endi[i] - starti[i]
1079+
if i == 0:
1080+
start = starti[i]
11051081
else:
1106-
while not Q.empty() and ((ai <= values[Q.back()]) or
1107-
values[Q.back()] != values[Q.back()]):
1108-
Q.pop_back()
1082+
start = endi[i - 1]
11091083

1110-
# Maintain window/nobs retention
1111-
curr_win_size = endi[i + close_offset] - starti[i + close_offset]
1112-
while not Q.empty() and Q.front() <= i - curr_win_size:
1084+
for k in range(start, endi[i]):
1085+
ai = init_mm(values[k], &nobs, is_max)
1086+
# Discard previous entries if we find new min or max
1087+
if is_max:
1088+
while not Q.empty() and ((ai >= values[Q.back()]) or
1089+
values[Q.back()] != values[Q.back()]):
1090+
Q.pop_back()
1091+
else:
1092+
while not Q.empty() and ((ai <= values[Q.back()]) or
1093+
values[Q.back()] != values[Q.back()]):
1094+
Q.pop_back()
1095+
Q.push_back(k)
1096+
W.push_back(k)
1097+
1098+
# Discard entries outside and left of current window
1099+
while not Q.empty() and Q.front() <= starti[i] - 1:
11131100
Q.pop_front()
1114-
while not W.empty() and W.front() <= i - curr_win_size:
1101+
while not W.empty() and W.front() <= starti[i] - 1:
11151102
remove_mm(values[W.front()], &nobs)
11161103
W.pop_front()
11171104

1118-
Q.push_back(i)
1119-
W.push_back(i)
1120-
1121-
if not Q.empty() and curr_win_size > 0:
1122-
output[N-1] = calc_mm(minp, nobs, values[Q.front()])
1123-
else:
1124-
output[N-1] = NaN
1105+
# Save output based on index in input value array
1106+
if not Q.empty() and curr_win_size > 0:
1107+
output[i] = calc_mm(minp, nobs, values[Q.front()])
1108+
else:
1109+
output[i] = NaN
11251110

11261111
return output
11271112

pandas/core/window/common.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,7 @@ def func(arg, window, min_periods=None):
327327

328328
def validate_baseindexer_support(func_name: Optional[str]) -> None:
329329
# GH 32865: These functions work correctly with a BaseIndexer subclass
330-
BASEINDEXER_WHITELIST = {"mean", "sum", "median", "kurt", "quantile"}
330+
BASEINDEXER_WHITELIST = {"min", "max", "mean", "sum", "median", "kurt", "quantile"}
331331
if isinstance(func_name, str) and func_name not in BASEINDEXER_WHITELIST:
332332
raise NotImplementedError(
333333
f"{func_name} is not supported with using a BaseIndexer "

pandas/tests/window/test_base_indexer.py

+32-3
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,7 @@ def get_window_bounds(self, num_values, min_periods, center, closed):
8282
df.rolling(indexer, win_type="boxcar")
8383

8484

85-
@pytest.mark.parametrize(
86-
"func", ["min", "max", "std", "var", "count", "skew", "cov", "corr"]
87-
)
85+
@pytest.mark.parametrize("func", ["std", "var", "count", "skew", "cov", "corr"])
8886
def test_notimplemented_functions(func):
8987
# GH 32865
9088
class CustomIndexer(BaseIndexer):
@@ -95,3 +93,34 @@ def get_window_bounds(self, num_values, min_periods, center, closed):
9593
indexer = CustomIndexer()
9694
with pytest.raises(NotImplementedError, match=f"{func} is not supported"):
9795
getattr(df.rolling(indexer), func)()
96+
97+
98+
@pytest.mark.parametrize("constructor", [Series, DataFrame])
99+
@pytest.mark.parametrize(
100+
"func,alt_func,expected",
101+
[
102+
("min", np.min, [0.0, 1.0, 2.0, 3.0, 4.0, 6.0, 6.0, 7.0, 8.0, np.nan]),
103+
("max", np.max, [2.0, 3.0, 4.0, 100.0, 100.0, 100.0, 8.0, 9.0, 9.0, np.nan]),
104+
],
105+
)
106+
def test_rolling_forward_window(constructor, func, alt_func, expected):
107+
# GH 32865
108+
class ForwardIndexer(BaseIndexer):
109+
def get_window_bounds(self, num_values, min_periods, center, closed):
110+
start = np.arange(num_values, dtype="int64")
111+
end_s = start[: -self.window_size] + self.window_size
112+
end_e = np.full(self.window_size, num_values, dtype="int64")
113+
end = np.concatenate([end_s, end_e])
114+
115+
return start, end
116+
117+
values = np.arange(10)
118+
values[5] = 100.0
119+
120+
indexer = ForwardIndexer(window_size=3)
121+
rolling = constructor(values).rolling(window=indexer, min_periods=2)
122+
result = getattr(rolling, func)()
123+
expected = constructor(expected)
124+
tm.assert_equal(result, expected)
125+
expected2 = constructor(rolling.apply(lambda x: alt_func(x)))
126+
tm.assert_equal(result, expected2)

0 commit comments

Comments
 (0)