Skip to content

Commit be4f156

Browse files
committed
BUG: fix rolling_max/min for small inputs and large windows. Add a check that the min_period <= window size. Fixes pandas-dev#1897.
1 parent c32cc6e commit be4f156

File tree

2 files changed

+54
-27
lines changed

2 files changed

+54
-27
lines changed

pandas/src/moments.pyx

+42-27
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,18 @@
99
#
1010
# -
1111

12+
def _check_minp(win, minp, N):
13+
if minp > win:
14+
raise ValueError('min_periods %d must be <= window %d'
15+
% (minp, win))
16+
elif minp > N:
17+
minp = N + 1
18+
elif minp == 0:
19+
minp = 1
20+
elif minp < 0:
21+
raise ValueError('min_periods must be >= 0')
22+
return minp
23+
1224
# original C implementation by N. Devillard.
1325
# This code in public domain.
1426
# Function : kth_smallest()
@@ -24,7 +36,6 @@
2436
# Physical description: 366 p.
2537
# Series: Prentice-Hall Series in Automatic Computation
2638

27-
2839
def kth_smallest(ndarray[double_t] a, Py_ssize_t k):
2940
cdef:
3041
Py_ssize_t i,j,l,m,n
@@ -149,7 +160,7 @@ def roll_sum(ndarray[double_t] input, int win, int minp):
149160

150161
cdef ndarray[double_t] output = np.empty(N, dtype=float)
151162

152-
minp = _check_minp(minp, N)
163+
minp = _check_minp(win, minp, N)
153164

154165
for i from 0 <= i < minp - 1:
155166
val = input[i]
@@ -192,7 +203,7 @@ def roll_mean(ndarray[double_t] input,
192203

193204
cdef ndarray[double_t] output = np.empty(N, dtype=float)
194205

195-
minp = _check_minp(minp, N)
206+
minp = _check_minp(win, minp, N)
196207

197208
for i from 0 <= i < minp - 1:
198209
val = input[i]
@@ -335,23 +346,14 @@ def nancorr(ndarray[float64_t, ndim=2] mat, cov=False):
335346
#----------------------------------------------------------------------
336347
# Rolling variance
337348

338-
def _check_minp(minp, N):
339-
if minp > N:
340-
minp = N + 1
341-
elif minp == 0:
342-
minp = 1
343-
elif minp < 0:
344-
raise ValueError('min_periods must be >= 0')
345-
return minp
346-
347349
def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1):
348350
cdef double val, prev, sum_x = 0, sum_xx = 0, nobs = 0
349351
cdef Py_ssize_t i
350352
cdef Py_ssize_t N = len(input)
351353

352354
cdef ndarray[double_t] output = np.empty(N, dtype=float)
353355

354-
minp = _check_minp(minp, N)
356+
minp = _check_minp(win, minp, N)
355357

356358
for i from 0 <= i < minp - 1:
357359
val = input[i]
@@ -400,7 +402,7 @@ def roll_skew(ndarray[double_t] input, int win, int minp):
400402
# 3 components of the skewness equation
401403
cdef double A, B, C, R
402404

403-
minp = _check_minp(minp, N)
405+
minp = _check_minp(win, minp, N)
404406

405407
for i from 0 <= i < minp - 1:
406408
val = input[i]
@@ -462,7 +464,7 @@ def roll_kurt(ndarray[double_t] input,
462464
# 5 components of the kurtosis equation
463465
cdef double A, B, C, D, R, K
464466

465-
minp = _check_minp(minp, N)
467+
minp = _check_minp(win, minp, N)
466468

467469
for i from 0 <= i < minp - 1:
468470
val = input[i]
@@ -533,7 +535,7 @@ cdef _roll_skiplist_op(ndarray arg, int win, int minp, skiplist_f op):
533535

534536
skiplist = IndexableSkiplist(win)
535537

536-
minp = _check_minp(minp, N)
538+
minp = _check_minp(win, minp, N)
537539

538540
for i from 0 <= i < minp - 1:
539541
val = input[i]
@@ -578,7 +580,7 @@ def roll_median_c(ndarray[float64_t] arg, int win, int minp):
578580

579581
sl = skiplist_init(win)
580582

581-
minp = _check_minp(minp, N)
583+
minp = _check_minp(win, minp, N)
582584

583585
for i from 0 <= i < minp - 1:
584586
val = arg[i]
@@ -672,11 +674,17 @@ def roll_max2(ndarray[float64_t] a, int window, int minp):
672674
cdef np.ndarray[np.float64_t, ndim=1] y = PyArray_EMPTY(1, dims,
673675
NPY_float64, 0)
674676

675-
minp = _check_minp(minp, n0)
677+
if window < 1:
678+
raise ValueError('Invalid window size %d'
679+
% (window))
676680

677-
if (window < 1) or (window > n0):
678-
raise ValueError('Invalid window size %d for len %d array'
679-
% (window, n0))
681+
if minp > window:
682+
raise ValueError('Invalid min_periods size %d greater than window %d'
683+
% (minp, window))
684+
685+
minp = _check_minp(window, minp, n0)
686+
687+
window = min(window, n0)
680688

681689
ring = <pairs*>stdlib.malloc(window * sizeof(pairs))
682690
end = ring + window
@@ -766,11 +774,18 @@ def roll_min2(np.ndarray[np.float64_t, ndim=1] a, int window, int minp):
766774
cdef np.npy_intp *dims = [n0]
767775
cdef np.ndarray[np.float64_t, ndim=1] y = PyArray_EMPTY(1, dims,
768776
NPY_float64, 0)
769-
if (window < 1) or (window > n0):
770-
raise ValueError('Invalid window size %d for len %d array'
771-
% (window, n0))
772777

773-
minp = _check_minp(minp, n0)
778+
if window < 1:
779+
raise ValueError('Invalid window size %d'
780+
% (window))
781+
782+
if minp > window:
783+
raise ValueError('Invalid min_periods size %d greater than window %d'
784+
% (minp, window))
785+
786+
window = min(window, n0)
787+
788+
minp = _check_minp(window, minp, n0)
774789

775790
ring = <pairs*>stdlib.malloc(window * sizeof(pairs))
776791
end = ring + window
@@ -843,7 +858,7 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int win,
843858

844859
skiplist = IndexableSkiplist(win)
845860

846-
minp = _check_minp(minp, N)
861+
minp = _check_minp(win, minp, N)
847862

848863
for i from 0 <= i < minp - 1:
849864
val = input[i]
@@ -892,7 +907,7 @@ def roll_generic(ndarray[float64_t, cast=True] input, int win,
892907
if n == 0:
893908
return input
894909

895-
minp = _check_minp(minp, n)
910+
minp = _check_minp(win, minp, n)
896911
output = np.empty(n, dtype=float)
897912
counts = roll_sum(np.isfinite(input).astype(float), win, minp)
898913

pandas/stats/tests/test_moments.py

+12
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,21 @@ def test_rolling_median(self):
5050
def test_rolling_min(self):
5151
self._check_moment_func(mom.rolling_min, np.min)
5252

53+
a = np.array([1,2,3,4,5])
54+
b = mom.rolling_min(a, window=100, min_periods=1)
55+
assert_almost_equal(b, np.ones(len(a)))
56+
57+
self.assertRaises(ValueError, mom.rolling_min, np.array([1,2,3]), window=3, min_periods=5)
58+
5359
def test_rolling_max(self):
5460
self._check_moment_func(mom.rolling_max, np.max)
5561

62+
a = np.array([1,2,3,4,5])
63+
b = mom.rolling_max(a, window=100, min_periods=1)
64+
assert_almost_equal(a, b)
65+
66+
self.assertRaises(ValueError, mom.rolling_max, np.array([1,2,3]), window=3, min_periods=5)
67+
5668
def test_rolling_quantile(self):
5769
qs = [.1, .5, .9]
5870

0 commit comments

Comments
 (0)