Skip to content

Commit b1fb3bc

Browse files
committed
BUG: treat min_periods=0 as 1 in moving window functions, GH #365
1 parent ff877b4 commit b1fb3bc

File tree

4 files changed

+68
-56
lines changed

4 files changed

+68
-56
lines changed

RELEASE.rst

+2
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ pandas 0.5.1
127127
- setupegg.py will invoke Cython (GH #192)
128128
- Fix block consolidation bug after inserting column into MultiIndex (GH #366)
129129
- Fix bug in join operations between Index and Int64Index (GH #367)
130+
- Handle min_periods=0 case in moving window functions (GH #365)
130131

131132
Thanks
132133
------
@@ -135,6 +136,7 @@ Thanks
135136
- Joel Cross
136137
- Jeff Hammerbacher
137138
- Adam Klein
139+
- Thomas Kluyver
138140
- Jev Kuznetsov
139141
- Kieran O'Mahony
140142
- Wouter Overmeire

pandas/src/moments.pyx

+50-47
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
# Series: Prentice-Hall Series in Automatic Computation
2626

2727

28-
def kth_smallest(ndarray[double_t, ndim=1] a, Py_ssize_t k):
28+
def kth_smallest(ndarray[double_t] a, Py_ssize_t k):
2929
cdef:
3030
Py_ssize_t i,j,l,m,n
3131
double_t x, t
@@ -82,8 +82,7 @@ def roll_sum(ndarray[double_t] input, int win, int minp):
8282

8383
cdef ndarray[double_t] output = np.empty(N, dtype=float)
8484

85-
if minp > N:
86-
minp = N + 1
85+
minp = _check_minp(minp, N)
8786

8887
for i from 0 <= i < minp - 1:
8988
val = input[i]
@@ -126,8 +125,7 @@ def roll_mean(ndarray[double_t] input,
126125

127126
cdef ndarray[double_t] output = np.empty(N, dtype=float)
128127

129-
if minp > N:
130-
minp = N + 1
128+
minp = _check_minp(minp, N)
131129

132130
for i from 0 <= i < minp - 1:
133131
val = input[i]
@@ -213,15 +211,23 @@ def ewma(ndarray[double_t] input, double_t com):
213211
#-------------------------------------------------------------------------------
214212
# Rolling variance
215213

214+
def _check_minp(minp, N):
215+
if minp > N:
216+
minp = N + 1
217+
elif minp == 0:
218+
minp = 1
219+
elif minp < 0:
220+
raise ValueError('min_periods must be >= 0')
221+
return minp
222+
216223
def roll_var(ndarray[double_t] input, int win, int minp):
217224
cdef double val, prev, sum_x = 0, sum_xx = 0, nobs = 0
218225
cdef Py_ssize_t i
219226
cdef Py_ssize_t N = len(input)
220227

221228
cdef ndarray[double_t] output = np.empty(N, dtype=float)
222229

223-
if minp > N:
224-
minp = N + 1
230+
minp = _check_minp(minp, N)
225231

226232
for i from 0 <= i < minp - 1:
227233
val = input[i]
@@ -270,8 +276,7 @@ def roll_skew(ndarray[double_t] input, int win, int minp):
270276
# 3 components of the skewness equation
271277
cdef double A, B, C, R
272278

273-
if minp > N:
274-
minp = N + 1
279+
minp = _check_minp(minp, N)
275280

276281
for i from 0 <= i < minp - 1:
277282
val = input[i]
@@ -333,8 +338,7 @@ def roll_kurt(ndarray[double_t] input,
333338
# 5 components of the kurtosis equation
334339
cdef double A, B, C, D, R, K
335340

336-
if minp > N:
337-
minp = N + 1
341+
minp = _check_minp(minp, N)
338342

339343
for i from 0 <= i < minp - 1:
340344
val = input[i]
@@ -405,8 +409,7 @@ cdef _roll_skiplist_op(ndarray arg, int win, int minp, skiplist_f op):
405409

406410
skiplist = IndexableSkiplist(win)
407411

408-
if minp > N:
409-
minp = N + 1
412+
minp = _check_minp(minp, N)
410413

411414
for i from 0 <= i < minp - 1:
412415
val = input[i]
@@ -484,51 +487,50 @@ cdef double_t _get_min(object skiplist, int nobs, int minp):
484487

485488
def roll_quantile(ndarray[float64_t, cast=True] input, int win,
486489
int minp, double quantile):
487-
'''
488-
O(N log(window)) implementation using skip list
489-
'''
490-
cdef double val, prev, midpoint
491-
cdef IndexableSkiplist skiplist
492-
cdef Py_ssize_t nobs = 0, i
493-
cdef Py_ssize_t N = len(input)
494-
cdef ndarray[double_t] output = np.empty(N, dtype=float)
490+
'''
491+
O(N log(window)) implementation using skip list
492+
'''
493+
cdef double val, prev, midpoint
494+
cdef IndexableSkiplist skiplist
495+
cdef Py_ssize_t nobs = 0, i
496+
cdef Py_ssize_t N = len(input)
497+
cdef ndarray[double_t] output = np.empty(N, dtype=float)
495498

496-
skiplist = IndexableSkiplist(win)
499+
skiplist = IndexableSkiplist(win)
497500

498-
if minp > N:
499-
minp = N + 1
501+
minp = _check_minp(minp, N)
500502

501-
for i from 0 <= i < minp - 1:
502-
val = input[i]
503+
for i from 0 <= i < minp - 1:
504+
val = input[i]
503505

504-
# Not NaN
505-
if val == val:
506-
nobs += 1
507-
skiplist.insert(val)
506+
# Not NaN
507+
if val == val:
508+
nobs += 1
509+
skiplist.insert(val)
508510

509-
output[i] = NaN
511+
output[i] = NaN
510512

511-
for i from minp - 1 <= i < N:
512-
val = input[i]
513+
for i from minp - 1 <= i < N:
514+
val = input[i]
513515

514-
if i > win - 1:
515-
prev = input[i - win]
516+
if i > win - 1:
517+
prev = input[i - win]
516518

517-
if prev == prev:
518-
skiplist.remove(prev)
519-
nobs -= 1
519+
if prev == prev:
520+
skiplist.remove(prev)
521+
nobs -= 1
520522

521-
if val == val:
522-
nobs += 1
523-
skiplist.insert(val)
523+
if val == val:
524+
nobs += 1
525+
skiplist.insert(val)
524526

525-
if nobs >= minp:
526-
idx = int((quantile / 1.) * (nobs - 1))
527-
output[i] = skiplist.get(idx)
528-
else:
529-
output[i] = NaN
527+
if nobs >= minp:
528+
idx = int((quantile / 1.) * (nobs - 1))
529+
output[i] = skiplist.get(idx)
530+
else:
531+
output[i] = NaN
530532

531-
return output
533+
return output
532534

533535
def roll_generic(ndarray[float64_t, cast=True] input, int win,
534536
int minp, object func):
@@ -542,6 +544,7 @@ def roll_generic(ndarray[float64_t, cast=True] input, int win,
542544
buf = <float64_t*> input.data
543545

544546
n = len(input)
547+
minp = _check_minp(minp, n)
545548
output = np.empty(n, dtype=float)
546549
counts = roll_sum(np.isfinite(input).astype(float), win, minp)
547550

pandas/stats/moments.py

+11-9
Original file line numberDiff line numberDiff line change
@@ -291,11 +291,13 @@ def _conv_timerule(arg, time_rule):
291291

292292
return arg
293293

294-
def _two_periods(minp, window):
295-
if minp is None:
296-
return window
297-
else:
298-
return max(2, minp)
294+
def _require_min_periods(p):
295+
def _check_func(minp, window):
296+
if minp is None:
297+
return window
298+
else:
299+
return max(p, minp)
300+
return _check_func
299301

300302
def _use_window(minp, window):
301303
if minp is None:
@@ -324,13 +326,13 @@ def call_cython(arg, window, minp):
324326

325327
_ts_std = lambda *a, **kw: np.sqrt(_tseries.roll_var(*a, **kw))
326328
rolling_std = _rolling_func(_ts_std, 'Unbiased moving standard deviation',
327-
check_minp=_two_periods)
329+
check_minp=_require_min_periods(2))
328330
rolling_var = _rolling_func(_tseries.roll_var, 'Unbiased moving variance',
329-
check_minp=_two_periods)
331+
check_minp=_require_min_periods(2))
330332
rolling_skew = _rolling_func(_tseries.roll_skew, 'Unbiased moving skewness',
331-
check_minp=_two_periods)
333+
check_minp=_require_min_periods(3))
332334
rolling_kurt = _rolling_func(_tseries.roll_kurt, 'Unbiased moving kurtosis',
333-
check_minp=_two_periods)
335+
check_minp=_require_min_periods(4))
334336

335337
def rolling_quantile(arg, window, quantile, min_periods=None, time_rule=None):
336338
"""Moving quantile

pandas/stats/tests/test_moments.py

+5
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,11 @@ def _check_ndarray(self, func, static_comp, window=50,
142142

143143
self.assert_(not np.isnan(result[-6]))
144144
self.assert_(np.isnan(result[-5]))
145+
146+
# min_periods=0
147+
result0 = func(arr, 20, min_periods=0)
148+
result1 = func(arr, 20, min_periods=1)
149+
assert_almost_equal(result0, result1)
145150
else:
146151
result = func(arr, 50)
147152
assert_almost_equal(result[-1], static_comp(arr[10:-10]))

0 commit comments

Comments
 (0)