Skip to content

Commit 2dcae6e

Browse files
committed
ENH: use bottleneck moving max/min impl for perf improvement close #1504, #50
1 parent f95a5d7 commit 2dcae6e

File tree

3 files changed

+166
-2
lines changed

3 files changed

+166
-2
lines changed

RELEASE.rst

+2
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ pandas 0.8.1
3838

3939
**Improvements to existing features**
4040

41+
- Use moving min/max algorithms from Bottleneck in rolling_min/rolling_max
42+
for > 100x speedup. (#1504, #50)
4143
- Drastically improve ``to_datetime`` performance on ISO8601 datetime strings
4244
(with no time zones) (#1571)
4345
- Add ability to append hierarchical index levels with ``set_index`` and to

pandas/src/moments.pyx

+162
Original file line numberDiff line numberDiff line change
@@ -613,12 +613,100 @@ cdef double_t _get_median(object sl, int nobs, int minp):
613613
else:
614614
return NaN
615615

616+
#----------------------------------------------------------------------
617+
618+
# Moving maximum / minimum code taken from Bottleneck under the terms
619+
# of its Simplified BSD license
620+
# https://github.com/kwgoodman/bottleneck
621+
622+
cdef struct pairs:
623+
double value
624+
int death
625+
626+
from libc cimport stdlib
627+
628+
@cython.boundscheck(False)
629+
@cython.wraparound(False)
630+
def roll_max2(ndarray[float64_t] a, int window, int minp):
631+
"Moving max of 1d array of dtype=float64 along axis=0 ignoring NaNs."
632+
cdef np.float64_t ai, aold
633+
cdef Py_ssize_t count
634+
cdef pairs* ring
635+
cdef pairs* minpair
636+
cdef pairs* end
637+
cdef pairs* last
638+
cdef Py_ssize_t i0
639+
cdef np.npy_intp *dim
640+
dim = PyArray_DIMS(a)
641+
cdef Py_ssize_t n0 = dim[0]
642+
cdef np.npy_intp *dims = [n0]
643+
cdef np.ndarray[np.float64_t, ndim=1] y = PyArray_EMPTY(1, dims,
644+
NPY_float64, 0)
645+
646+
minp = _check_minp(minp, n0)
647+
648+
if (window < 1) or (window > n0):
649+
raise ValueError('Invalid window size %d for len %d array'
650+
% (window, n0))
651+
652+
ring = <pairs*>stdlib.malloc(window * sizeof(pairs))
653+
end = ring + window
654+
last = ring
655+
656+
minpair = ring
657+
ai = a[0]
658+
if ai == ai:
659+
minpair.value = ai
660+
else:
661+
minpair.value = MINfloat64
662+
minpair.death = window
663+
664+
count = 0
665+
for i0 in range(n0):
666+
ai = a[i0]
667+
if ai == ai:
668+
count += 1
669+
else:
670+
ai = MINfloat64
671+
if i0 >= window:
672+
aold = a[i0 - window]
673+
if aold == aold:
674+
count -= 1
675+
if minpair.death == i0:
676+
minpair += 1
677+
if minpair >= end:
678+
minpair = ring
679+
if ai >= minpair.value:
680+
minpair.value = ai
681+
minpair.death = i0 + window
682+
last = minpair
683+
else:
684+
while last.value <= ai:
685+
if last == ring:
686+
last = end
687+
last -= 1
688+
last += 1
689+
if last == end:
690+
last = ring
691+
last.value = ai
692+
last.death = i0 + window
693+
if count >= minp:
694+
y[i0] = minpair.value
695+
else:
696+
y[i0] = NaN
697+
for i0 in range(window - 1):
698+
y[i0] = NaN
699+
700+
stdlib.free(ring)
701+
return y
702+
616703
def roll_max(ndarray input, int win, int minp):
617704
'''
618705
O(N log(window)) implementation using skip list
619706
'''
620707
return _roll_skiplist_op(input, win, minp, _get_max)
621708

709+
622710
cdef double_t _get_max(object skiplist, int nobs, int minp):
623711
if nobs >= minp:
624712
return <IndexableSkiplist> skiplist.get(nobs - 1)
@@ -631,6 +719,80 @@ def roll_min(ndarray input, int win, int minp):
631719
'''
632720
return _roll_skiplist_op(input, win, minp, _get_min)
633721

722+
@cython.boundscheck(False)
723+
@cython.wraparound(False)
724+
def roll_min2(np.ndarray[np.float64_t, ndim=1] a, int window, int minp):
725+
"Moving min of 1d array of dtype=float64 along axis=0 ignoring NaNs."
726+
cdef np.float64_t ai, aold
727+
cdef Py_ssize_t count
728+
cdef pairs* ring
729+
cdef pairs* minpair
730+
cdef pairs* end
731+
cdef pairs* last
732+
cdef Py_ssize_t i0
733+
cdef np.npy_intp *dim
734+
dim = PyArray_DIMS(a)
735+
cdef Py_ssize_t n0 = dim[0]
736+
cdef np.npy_intp *dims = [n0]
737+
cdef np.ndarray[np.float64_t, ndim=1] y = PyArray_EMPTY(1, dims,
738+
NPY_float64, 0)
739+
if (window < 1) or (window > n0):
740+
raise ValueError('Invalid window size %d for len %d array'
741+
% (window, n0))
742+
743+
minp = _check_minp(minp, n0)
744+
745+
ring = <pairs*>stdlib.malloc(window * sizeof(pairs))
746+
end = ring + window
747+
last = ring
748+
749+
minpair = ring
750+
ai = a[0]
751+
if ai == ai:
752+
minpair.value = ai
753+
else:
754+
minpair.value = MAXfloat64
755+
minpair.death = window
756+
757+
count = 0
758+
for i0 in range(n0):
759+
ai = a[i0]
760+
if ai == ai:
761+
count += 1
762+
else:
763+
ai = MAXfloat64
764+
if i0 >= window:
765+
aold = a[i0 - window]
766+
if aold == aold:
767+
count -= 1
768+
if minpair.death == i0:
769+
minpair += 1
770+
if minpair >= end:
771+
minpair = ring
772+
if ai <= minpair.value:
773+
minpair.value = ai
774+
minpair.death = i0 + window
775+
last = minpair
776+
else:
777+
while last.value >= ai:
778+
if last == ring:
779+
last = end
780+
last -= 1
781+
last += 1
782+
if last == end:
783+
last = ring
784+
last.value = ai
785+
last.death = i0 + window
786+
if count >= minp:
787+
y[i0] = minpair.value
788+
else:
789+
y[i0] = NaN
790+
for i0 in range(window - 1):
791+
y[i0] = NaN
792+
793+
stdlib.free(ring)
794+
return y
795+
634796
cdef double_t _get_min(object skiplist, int nobs, int minp):
635797
if nobs >= minp:
636798
return <IndexableSkiplist> skiplist.get(0)

pandas/stats/moments.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -395,8 +395,8 @@ def call_cython(arg, window, minp, **kwds):
395395

396396
return f
397397

398-
rolling_max = _rolling_func(_tseries.roll_max, 'Moving maximum')
399-
rolling_min = _rolling_func(_tseries.roll_min, 'Moving minimum')
398+
rolling_max = _rolling_func(_tseries.roll_max2, 'Moving maximum')
399+
rolling_min = _rolling_func(_tseries.roll_min2, 'Moving minimum')
400400
rolling_sum = _rolling_func(_tseries.roll_sum, 'Moving sum')
401401
rolling_mean = _rolling_func(_tseries.roll_mean, 'Moving mean')
402402
rolling_median = _rolling_func(_tseries.roll_median_cython, 'Moving median')

0 commit comments

Comments
 (0)