Skip to content

Commit 4b449f7

Browse files
mroeschkeproost
authored andcommitted
CLN/TYPE: window aggregation cleanups and typing (pandas-dev#30137)
* Move is_monotonic_bounds to the aggregation functions * Remove single usage of _check_min in aggregations.pyx * remove _check_min * Add some typing * fix condition * Use is_monotonic from _lib.algos * Add inline helper function
1 parent c73c44c commit 4b449f7

File tree

2 files changed

+33
-61
lines changed

2 files changed

+33
-61
lines changed

pandas/_libs/window/aggregations.pyx

+31-52
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ cdef extern from "src/headers/cmath" namespace "std":
1818
int signbit(float64_t) nogil
1919
float64_t sqrt(float64_t x) nogil
2020

21-
cimport pandas._libs.util as util
21+
from pandas._libs.algos import is_monotonic
22+
2223
from pandas._libs.util cimport numeric
2324

2425
from pandas._libs.skiplist cimport (
@@ -37,6 +38,9 @@ cdef:
3738
cdef inline int int_max(int a, int b): return a if a >= b else b
3839
cdef inline int int_min(int a, int b): return a if a <= b else b
3940

41+
cdef inline bint is_monotonic_start_end_bounds(ndarray[int64_t, ndim=1] start,
42+
ndarray[int64_t, ndim=1] end):
43+
return is_monotonic(start, False)[0] and is_monotonic(end, False)[0]
4044

4145
# Cython implementations of rolling sum, mean, variance, skewness,
4246
# other statistical moment functions
@@ -48,39 +52,6 @@ cdef inline int int_min(int a, int b): return a if a <= b else b
4852
# periodically revisited to see if it's still true.
4953
#
5054

51-
52-
def _check_minp(win, minp, N, floor=None) -> int:
53-
"""
54-
Parameters
55-
----------
56-
win: int
57-
minp: int or None
58-
N: len of window
59-
floor: int, optional
60-
default 1
61-
62-
Returns
63-
-------
64-
minimum period
65-
"""
66-
67-
if minp is None:
68-
minp = 1
69-
if not util.is_integer_object(minp):
70-
raise ValueError("min_periods must be an integer")
71-
if minp > win:
72-
raise ValueError(f"min_periods (minp) must be <= "
73-
f"window (win)")
74-
elif minp > N:
75-
minp = N + 1
76-
elif minp < 0:
77-
raise ValueError('min_periods must be >= 0')
78-
if floor is None:
79-
floor = 1
80-
81-
return max(minp, floor)
82-
83-
8455
# original C implementation by N. Devillard.
8556
# This code in public domain.
8657
# Function : kth_smallest()
@@ -96,7 +67,6 @@ def _check_minp(win, minp, N, floor=None) -> int:
9667
# Physical description: 366 p.
9768
# Series: Prentice-Hall Series in Automatic Computation
9869

99-
10070
# ----------------------------------------------------------------------
10171
# Rolling count
10272
# this is only an impl for index not None, IOW, freq aware
@@ -183,14 +153,15 @@ cdef inline void remove_sum(float64_t val, int64_t *nobs, float64_t *sum_x) nogi
183153

184154

185155
def roll_sum_variable(ndarray[float64_t] values, ndarray[int64_t] start,
186-
ndarray[int64_t] end, int64_t minp,
187-
bint is_monotonic_bounds=True):
156+
ndarray[int64_t] end, int64_t minp):
188157
cdef:
189158
float64_t sum_x = 0
190159
int64_t s, e
191160
int64_t nobs = 0, i, j, N = len(values)
192161
ndarray[float64_t] output
162+
bint is_monotonic_bounds
193163

164+
is_monotonic_bounds = is_monotonic_start_end_bounds(start, end)
194165
output = np.empty(N, dtype=float)
195166

196167
with nogil:
@@ -331,14 +302,15 @@ def roll_mean_fixed(ndarray[float64_t] values, ndarray[int64_t] start,
331302

332303

333304
def roll_mean_variable(ndarray[float64_t] values, ndarray[int64_t] start,
334-
ndarray[int64_t] end, int64_t minp,
335-
bint is_monotonic_bounds=True):
305+
ndarray[int64_t] end, int64_t minp):
336306
cdef:
337307
float64_t val, sum_x = 0
338308
int64_t s, e
339309
Py_ssize_t nobs = 0, i, j, neg_ct = 0, N = len(values)
340310
ndarray[float64_t] output
311+
bint is_monotonic_bounds
341312

313+
is_monotonic_bounds = is_monotonic_start_end_bounds(start, end)
342314
output = np.empty(N, dtype=float)
343315

344316
with nogil:
@@ -493,8 +465,7 @@ def roll_var_fixed(ndarray[float64_t] values, ndarray[int64_t] start,
493465

494466

495467
def roll_var_variable(ndarray[float64_t] values, ndarray[int64_t] start,
496-
ndarray[int64_t] end, int64_t minp, int ddof=1,
497-
bint is_monotonic_bounds=True):
468+
ndarray[int64_t] end, int64_t minp, int ddof=1):
498469
"""
499470
Numerically stable implementation using Welford's method.
500471
"""
@@ -504,7 +475,9 @@ def roll_var_variable(ndarray[float64_t] values, ndarray[int64_t] start,
504475
int64_t s, e
505476
Py_ssize_t i, j, N = len(values)
506477
ndarray[float64_t] output
478+
bint is_monotonic_bounds
507479

480+
is_monotonic_bounds = is_monotonic_start_end_bounds(start, end)
508481
output = np.empty(N, dtype=float)
509482

510483
with nogil:
@@ -641,15 +614,16 @@ def roll_skew_fixed(ndarray[float64_t] values, ndarray[int64_t] start,
641614

642615

643616
def roll_skew_variable(ndarray[float64_t] values, ndarray[int64_t] start,
644-
ndarray[int64_t] end, int64_t minp,
645-
bint is_monotonic_bounds=True):
617+
ndarray[int64_t] end, int64_t minp):
646618
cdef:
647619
float64_t val, prev
648620
float64_t x = 0, xx = 0, xxx = 0
649621
int64_t nobs = 0, i, j, N = len(values)
650622
int64_t s, e
651623
ndarray[float64_t] output
624+
bint is_monotonic_bounds
652625

626+
is_monotonic_bounds = is_monotonic_start_end_bounds(start, end)
653627
output = np.empty(N, dtype=float)
654628

655629
with nogil:
@@ -794,14 +768,15 @@ def roll_kurt_fixed(ndarray[float64_t] values, ndarray[int64_t] start,
794768

795769

796770
def roll_kurt_variable(ndarray[float64_t] values, ndarray[int64_t] start,
797-
ndarray[int64_t] end, int64_t minp,
798-
bint is_monotonic_bounds=True):
771+
ndarray[int64_t] end, int64_t minp):
799772
cdef:
800773
float64_t val, prev
801774
float64_t x = 0, xx = 0, xxx = 0, xxxx = 0
802775
int64_t nobs = 0, i, j, s, e, N = len(values)
803776
ndarray[float64_t] output
777+
bint is_monotonic_bounds
804778

779+
is_monotonic_bounds = is_monotonic_start_end_bounds(start, end)
805780
output = np.empty(N, dtype=float)
806781

807782
with nogil:
@@ -1030,8 +1005,7 @@ def roll_min_fixed(ndarray[float64_t] values, ndarray[int64_t] start,
10301005

10311006

10321007
def roll_min_variable(ndarray[float64_t] values, ndarray[int64_t] start,
1033-
ndarray[int64_t] end, int64_t minp,
1034-
bint is_monotonic_bounds=True):
1008+
ndarray[int64_t] end, int64_t minp):
10351009
"""
10361010
Moving max of 1d array of any numeric type along axis=0 ignoring NaNs.
10371011
@@ -1424,10 +1398,7 @@ def roll_generic_variable(object obj,
14241398
ndarray[int64_t] start, ndarray[int64_t] end,
14251399
int64_t minp,
14261400
int offset, object func, bint raw,
1427-
object args, object kwargs,
1428-
bint is_monotonic_bounds=True):
1429-
# is_monotonic_bounds unused since variable algorithm doesn't calculate
1430-
# adds/subtracts across windows, but matches other *_variable functions
1401+
object args, object kwargs):
14311402
cdef:
14321403
ndarray[float64_t] output, counts, bufarr
14331404
ndarray[float64_t, cast=True] arr
@@ -1501,7 +1472,15 @@ cdef ndarray[float64_t] _roll_weighted_sum_mean(float64_t[:] values,
15011472
if avg:
15021473
tot_wgt = np.zeros(in_n, dtype=np.float64)
15031474

1504-
minp = _check_minp(len(weights), minp, in_n)
1475+
if minp > win_n:
1476+
raise ValueError(f"min_periods (minp) must be <= "
1477+
f"window (win)")
1478+
elif minp > in_n:
1479+
minp = in_n + 1
1480+
elif minp < 0:
1481+
raise ValueError('min_periods must be >= 0')
1482+
1483+
minp = max(minp, 1)
15051484

15061485
with nogil:
15071486
if avg:

pandas/core/window/rolling.py

+2-9
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def _on(self):
109109
def is_freq_type(self) -> bool:
110110
return self.win_type == "freq"
111111

112-
def validate(self):
112+
def validate(self) -> None:
113113
if self.center is not None and not is_bool(self.center):
114114
raise ValueError("center must be a boolean")
115115
if self.min_periods is not None and not is_integer(self.min_periods):
@@ -412,7 +412,7 @@ def _get_roll_func(self, func_name: str) -> Callable:
412412
)
413413
return window_func
414414

415-
def _get_cython_func_type(self, func):
415+
def _get_cython_func_type(self, func: str) -> Callable:
416416
"""
417417
Return a variable or fixed cython function type.
418418
@@ -517,13 +517,6 @@ def calc(x):
517517
center=self.center,
518518
closed=self.closed,
519519
)
520-
if np.any(np.diff(start) < 0) or np.any(np.diff(end) < 0):
521-
# Our "variable" algorithms assume start/end are
522-
# monotonically increasing. A custom window indexer
523-
# can produce a non monotonic start/end.
524-
return func(
525-
x, start, end, min_periods, is_monotonic_bounds=False
526-
)
527520
return func(x, start, end, min_periods)
528521

529522
else:

0 commit comments

Comments
 (0)