-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
CLN/TYPE: window aggregation cleanups and typing #30137
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
07c00ed
ba95bd6
a73d7b3
32015b2
c79d5c1
e776c67
636166b
259b8b4
674894a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,7 +18,6 @@ cdef extern from "src/headers/cmath" namespace "std": | |
int signbit(float64_t) nogil | ||
float64_t sqrt(float64_t x) nogil | ||
|
||
cimport pandas._libs.util as util | ||
from pandas._libs.util cimport numeric | ||
|
||
from pandas._libs.skiplist cimport ( | ||
|
@@ -48,39 +47,6 @@ cdef inline int int_min(int a, int b): return a if a <= b else b | |
# periodically revisited to see if it's still true. | ||
# | ||
|
||
|
||
def _check_minp(win, minp, N, floor=None) -> int: | ||
""" | ||
Parameters | ||
---------- | ||
win: int | ||
minp: int or None | ||
N: len of window | ||
floor: int, optional | ||
default 1 | ||
|
||
Returns | ||
------- | ||
minimum period | ||
""" | ||
|
||
if minp is None: | ||
minp = 1 | ||
if not util.is_integer_object(minp): | ||
raise ValueError("min_periods must be an integer") | ||
if minp > win: | ||
raise ValueError(f"min_periods (minp) must be <= " | ||
f"window (win)") | ||
elif minp > N: | ||
minp = N + 1 | ||
elif minp < 0: | ||
raise ValueError('min_periods must be >= 0') | ||
if floor is None: | ||
floor = 1 | ||
|
||
return max(minp, floor) | ||
|
||
|
||
# original C implementation by N. Devillard. | ||
# This code in public domain. | ||
# Function : kth_smallest() | ||
|
@@ -183,14 +149,15 @@ cdef inline void remove_sum(float64_t val, int64_t *nobs, float64_t *sum_x) nogi | |
|
||
|
||
def roll_sum_variable(ndarray[float64_t] values, ndarray[int64_t] start, | ||
ndarray[int64_t] end, int64_t minp, | ||
bint is_monotonic_bounds=True): | ||
ndarray[int64_t] end, int64_t minp): | ||
cdef: | ||
float64_t sum_x = 0 | ||
int64_t s, e | ||
int64_t nobs = 0, i, j, N = len(values) | ||
ndarray[float64_t] output | ||
bint is_monotonic_bounds | ||
|
||
is_monotonic_bounds = np.all(np.diff(start) > 0) and np.all(np.diff(end) > 0) | ||
output = np.empty(N, dtype=float) | ||
|
||
with nogil: | ||
|
@@ -331,14 +298,15 @@ def roll_mean_fixed(ndarray[float64_t] values, ndarray[int64_t] start, | |
|
||
|
||
def roll_mean_variable(ndarray[float64_t] values, ndarray[int64_t] start, | ||
ndarray[int64_t] end, int64_t minp, | ||
bint is_monotonic_bounds=True): | ||
ndarray[int64_t] end, int64_t minp): | ||
cdef: | ||
float64_t val, sum_x = 0 | ||
int64_t s, e | ||
Py_ssize_t nobs = 0, i, j, neg_ct = 0, N = len(values) | ||
ndarray[float64_t] output | ||
bint is_monotonic_bounds | ||
|
||
is_monotonic_bounds = np.all(np.diff(start) > 0) and np.all(np.diff(end) > 0) | ||
output = np.empty(N, dtype=float) | ||
|
||
with nogil: | ||
|
@@ -493,8 +461,7 @@ def roll_var_fixed(ndarray[float64_t] values, ndarray[int64_t] start, | |
|
||
|
||
def roll_var_variable(ndarray[float64_t] values, ndarray[int64_t] start, | ||
ndarray[int64_t] end, int64_t minp, int ddof=1, | ||
bint is_monotonic_bounds=True): | ||
ndarray[int64_t] end, int64_t minp, int ddof=1): | ||
""" | ||
Numerically stable implementation using Welford's method. | ||
""" | ||
|
@@ -504,7 +471,9 @@ def roll_var_variable(ndarray[float64_t] values, ndarray[int64_t] start, | |
int64_t s, e | ||
Py_ssize_t i, j, N = len(values) | ||
ndarray[float64_t] output | ||
bint is_monotonic_bounds | ||
|
||
is_monotonic_bounds = np.all(np.diff(start) > 0) and np.all(np.diff(end) > 0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. make this a function |
||
output = np.empty(N, dtype=float) | ||
|
||
with nogil: | ||
|
@@ -641,15 +610,16 @@ def roll_skew_fixed(ndarray[float64_t] values, ndarray[int64_t] start, | |
|
||
|
||
def roll_skew_variable(ndarray[float64_t] values, ndarray[int64_t] start, | ||
ndarray[int64_t] end, int64_t minp, | ||
bint is_monotonic_bounds=True): | ||
ndarray[int64_t] end, int64_t minp): | ||
cdef: | ||
float64_t val, prev | ||
float64_t x = 0, xx = 0, xxx = 0 | ||
int64_t nobs = 0, i, j, N = len(values) | ||
int64_t s, e | ||
ndarray[float64_t] output | ||
bint is_monotonic_bounds | ||
|
||
is_monotonic_bounds = np.all(np.diff(start) > 0) and np.all(np.diff(end) > 0) | ||
output = np.empty(N, dtype=float) | ||
|
||
with nogil: | ||
|
@@ -794,14 +764,15 @@ def roll_kurt_fixed(ndarray[float64_t] values, ndarray[int64_t] start, | |
|
||
|
||
def roll_kurt_variable(ndarray[float64_t] values, ndarray[int64_t] start, | ||
ndarray[int64_t] end, int64_t minp, | ||
bint is_monotonic_bounds=True): | ||
ndarray[int64_t] end, int64_t minp): | ||
cdef: | ||
float64_t val, prev | ||
float64_t x = 0, xx = 0, xxx = 0, xxxx = 0 | ||
int64_t nobs = 0, i, j, s, e, N = len(values) | ||
ndarray[float64_t] output | ||
bint is_monotonic_bounds | ||
|
||
is_monotonic_bounds = np.all(np.diff(start) > 0) and np.all(np.diff(end) > 0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you make this a |
||
output = np.empty(N, dtype=float) | ||
|
||
with nogil: | ||
|
@@ -1030,8 +1001,7 @@ def roll_min_fixed(ndarray[float64_t] values, ndarray[int64_t] start, | |
|
||
|
||
def roll_min_variable(ndarray[float64_t] values, ndarray[int64_t] start, | ||
ndarray[int64_t] end, int64_t minp, | ||
bint is_monotonic_bounds=True): | ||
ndarray[int64_t] end, int64_t minp): | ||
""" | ||
Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. | ||
|
||
|
@@ -1424,10 +1394,7 @@ def roll_generic_variable(object obj, | |
ndarray[int64_t] start, ndarray[int64_t] end, | ||
int64_t minp, | ||
int offset, object func, bint raw, | ||
object args, object kwargs, | ||
bint is_monotonic_bounds=True): | ||
# is_monotonic_bounds unused since variable algorithm doesn't calculate | ||
# adds/subtracts across windows, but matches other *_variable functions | ||
object args, object kwargs): | ||
cdef: | ||
ndarray[float64_t] output, counts, bufarr | ||
ndarray[float64_t, cast=True] arr | ||
|
@@ -1501,7 +1468,15 @@ cdef ndarray[float64_t] _roll_weighted_sum_mean(float64_t[:] values, | |
if avg: | ||
tot_wgt = np.zeros(in_n, dtype=np.float64) | ||
|
||
minp = _check_minp(len(weights), minp, in_n) | ||
if minp > win_n: | ||
raise ValueError(f"min_periods (minp) must be <= " | ||
f"window (win)") | ||
elif minp > in_n: | ||
minp = in_n + 1 | ||
elif minp < 0: | ||
raise ValueError('min_periods must be >= 0') | ||
|
||
minp = max(minp, 1) | ||
|
||
with nogil: | ||
if avg: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -109,7 +109,7 @@ def _on(self): | |
def is_freq_type(self) -> bool: | ||
return self.win_type == "freq" | ||
|
||
def validate(self): | ||
def validate(self) -> None: | ||
if self.center is not None and not is_bool(self.center): | ||
raise ValueError("center must be a boolean") | ||
if self.min_periods is not None and not is_integer(self.min_periods): | ||
|
@@ -412,7 +412,7 @@ def _get_roll_func(self, func_name: str) -> Callable: | |
) | ||
return window_func | ||
|
||
def _get_cython_func_type(self, func): | ||
def _get_cython_func_type(self, func: str) -> Callable: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If possible to add subtypes to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unfortunately the arguments to the cython functions for rolling are still not entirely uniform so the argument list can't be typed consistently. |
||
""" | ||
Return a variable or fixed cython function type. | ||
|
||
|
@@ -517,13 +517,6 @@ def calc(x): | |
center=self.center, | ||
closed=self.closed, | ||
) | ||
if np.any(np.diff(start) < 0) or np.any(np.diff(end) < 0): | ||
# Our "variable" algorithms assume start/end are | ||
# monotonically increasing. A custom window indexer | ||
# can produce a non monotonic start/end. | ||
return func( | ||
x, start, end, min_periods, is_monotonic_bounds=False | ||
) | ||
return func(x, start, end, min_periods) | ||
|
||
else: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is this call expensive? we might have something in _libs.algos that short-circuits this check
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good call. Was able to use
is_monotonic
in_libs.algos