-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: Fix #46726; wrong result with varying window size min/max rolling calc. #61288
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
3382797
7c41898
ba94a73
a1c68ee
4ee752a
c8761ec
ee74058
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,10 @@ | |
|
||
from __future__ import annotations | ||
|
||
from typing import TYPE_CHECKING | ||
from typing import ( | ||
TYPE_CHECKING, | ||
Any, | ||
) | ||
|
||
import numba | ||
import numpy as np | ||
|
@@ -18,6 +21,20 @@ | |
from pandas._typing import npt | ||
|
||
|
||
@numba.njit(nogil=True, parallel=False) | ||
def bisect_left(a: list[Any], x: Any, lo: int = 0, hi: int = -1) -> int: | ||
"""Same as https://docs.python.org/3/library/bisect.html; not in numba yet!""" | ||
if hi == -1: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add a docstring here; I think just a single line is fine. Something like
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added docstrings and comments for all requests. Made a couple of more one-liner code comments in strategic spots. If you must, feel free to remove them. |
||
hi = len(a) | ||
while lo < hi: | ||
mid = (lo + hi) // 2 | ||
if a[mid] < x: | ||
lo = mid + 1 | ||
else: | ||
hi = mid | ||
return lo | ||
|
||
|
||
@numba.jit(nopython=True, nogil=True, parallel=False) | ||
def sliding_min_max( | ||
values: np.ndarray, | ||
|
@@ -27,55 +44,87 @@ def sliding_min_max( | |
min_periods: int, | ||
is_max: bool, | ||
) -> tuple[np.ndarray, list[int]]: | ||
# Basic idea of the algorithm: https://stackoverflow.com/a/12239580 | ||
# It was generalized to work with an arbitrary list of any window size and position | ||
# by adding the Dominators stack. | ||
|
||
N = len(start) | ||
nobs = 0 | ||
output = np.empty(N, dtype=result_dtype) | ||
na_pos = [] | ||
# Use deque once numba supports it | ||
# https://github.com/numba/numba/issues/7417 | ||
Q: list = [] | ||
W: list = [] | ||
for i in range(N): | ||
curr_win_size = end[i] - start[i] | ||
if i == 0: | ||
st = start[i] | ||
else: | ||
st = end[i - 1] | ||
|
||
for k in range(st, end[i]): | ||
ai = values[k] | ||
if not np.isnan(ai): | ||
nobs += 1 | ||
elif is_max: | ||
ai = -np.inf | ||
else: | ||
ai = np.inf | ||
# Discard previous entries if we find new min or max | ||
if is_max: | ||
while Q and ((ai >= values[Q[-1]]) or values[Q[-1]] != values[Q[-1]]): | ||
Q.pop() | ||
else: | ||
while Q and ((ai <= values[Q[-1]]) or values[Q[-1]] != values[Q[-1]]): | ||
Q.pop() | ||
Q.append(k) | ||
W.append(k) | ||
|
||
# Discard entries outside and left of current window | ||
while Q and Q[0] <= start[i] - 1: | ||
Q.pop(0) | ||
while W and W[0] <= start[i] - 1: | ||
if not np.isnan(values[W[0]]): | ||
nobs -= 1 | ||
W.pop(0) | ||
|
||
# Save output based on index in input value array | ||
if Q and curr_win_size > 0 and nobs >= min_periods: | ||
output[i] = values[Q[0]] | ||
output = np.empty(N, dtype=result_dtype) | ||
|
||
def cmp(a: Any, b: Any, is_max: bool) -> bool: | ||
if is_max: | ||
return a >= b | ||
else: | ||
return a <= b | ||
|
||
# Indices of bounded extrema in `values`. `candidates[i]` is always increasing. | ||
# `values[candidates[i]]` is decreasing for max and increasing for min. | ||
candidates: list[int] = [] # this is a queue | ||
# Indices of largest windows that "cover" preceding windows. | ||
dominators: list[int] = [] # this is a stack | ||
|
||
if min_periods < 1: | ||
min_periods = 1 | ||
|
||
if N > 2: | ||
i_next = N - 1 # equivalent to i_next = i+1 inside the loop | ||
for i in range(N - 2, -1, -1): | ||
next_dominates = start[i_next] < start[i] | ||
if next_dominates and ( | ||
not dominators or start[dominators[-1]] > start[i_next] | ||
): | ||
dominators.append(i_next) | ||
i_next = i | ||
|
||
# NaN tracking to guarantee min_periods | ||
valid_start = -min_periods | ||
|
||
last_end = 0 | ||
last_start = -1 | ||
|
||
for i in range(N): | ||
this_start = start[i].item() | ||
this_end = end[i].item() | ||
|
||
if dominators and dominators[-1] == i: | ||
dominators.pop() | ||
|
||
if not ( | ||
this_end > last_end or (this_end == last_end and this_start >= last_start) | ||
): | ||
raise ValueError( | ||
"Start/End ordering requirement is violated at index " + str(i) | ||
) | ||
|
||
stash_start = ( | ||
this_start if not dominators else min(this_start, start[dominators[-1]]) | ||
) | ||
while candidates and candidates[0] < stash_start: | ||
candidates.pop(0) | ||
|
||
for k in range(last_end, this_end): | ||
if not np.isnan(values[k]): | ||
valid_start += 1 | ||
while valid_start >= 0 and np.isnan(values[valid_start]): | ||
valid_start += 1 | ||
while candidates and cmp(values[k], values[candidates[-1]], is_max): | ||
candidates.pop() # Q.pop_back() | ||
candidates.append(k) # Q.push_back(k) | ||
|
||
if not candidates or (this_start > valid_start): | ||
if values.dtype.kind != "i": | ||
output[i] = np.nan | ||
else: | ||
na_pos.append(i) | ||
elif candidates[0] >= this_start: | ||
# ^^ This is here to avoid costly bisection for fixed window sizes. | ||
output[i] = values[candidates[0]] | ||
else: | ||
q_idx = bisect_left(candidates, this_start, lo=1) | ||
output[i] = values[candidates[q_idx]] | ||
last_end = this_end | ||
last_start = this_start | ||
|
||
return output, na_pos | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same here, can you add a similar docstring.