Skip to content

Implementing rolling min/max functions that can retain the original type #12595

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.18.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,4 @@ Bug Fixes


- Bug in ``pivot_table`` when ``margins=True`` and ``dropna=True`` where nulls still contributed to margin count (:issue:`12577`)
- Bug in ``.rolling.min`` and ``.rolling.max`` where passing columns of type float32 raised a Value error (:issue:`12373`)
204 changes: 85 additions & 119 deletions pandas/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1625,117 +1625,56 @@ def roll_median_c(ndarray[float64_t] arg, int win, int minp):
# of its Simplified BSD license
# https://github.com/kwgoodman/bottleneck

cdef struct pairs:
double value
int death

from libc cimport stdlib

@cython.boundscheck(False)
@cython.wraparound(False)
def roll_max(ndarray[float64_t] a, int window, int minp):
"Moving max of 1d array of dtype=float64 along axis=0 ignoring NaNs."
cdef np.float64_t ai, aold
cdef Py_ssize_t count
cdef pairs* ring
cdef pairs* minpair
cdef pairs* end
cdef pairs* last
cdef Py_ssize_t i0
cdef np.npy_intp *dim
dim = PyArray_DIMS(a)
cdef Py_ssize_t n0 = dim[0]
cdef np.npy_intp *dims = [n0]
cdef np.ndarray[np.float64_t, ndim=1] y = PyArray_EMPTY(1, dims,
NPY_float64, 0)

if window < 1:
raise ValueError('Invalid window size %d'
% (window))

if minp > window:
raise ValueError('Invalid min_periods size %d greater than window %d'
% (minp, window))

minp = _check_minp(window, minp, n0)
with nogil:
ring = <pairs*>stdlib.malloc(window * sizeof(pairs))
end = ring + window
last = ring

minpair = ring
ai = a[0]
if ai == ai:
minpair.value = ai
else:
minpair.value = MINfloat64
minpair.death = window

count = 0
for i0 in range(n0):
ai = a[i0]
if ai == ai:
count += 1
else:
ai = MINfloat64
if i0 >= window:
aold = a[i0 - window]
if aold == aold:
count -= 1
if minpair.death == i0:
minpair += 1
if minpair >= end:
minpair = ring
if ai >= minpair.value:
minpair.value = ai
minpair.death = i0 + window
last = minpair
else:
while last.value <= ai:
if last == ring:
last = end
last -= 1
last += 1
if last == end:
last = ring
last.value = ai
last.death = i0 + window
if count >= minp:
y[i0] = minpair.value
else:
y[i0] = NaN

for i0 in range(minp - 1):
y[i0] = NaN

stdlib.free(ring)
return y
def roll_max(ndarray[numeric] a, int window, int minp):
"""
Moving max of 1d array of any numeric type along axis=0 ignoring NaNs.

Parameters
----------
a: numpy array
window: int, size of rolling window
minp: if number of observations in window
is below this, output a NaN
"""
return _roll_min_max(a, window, minp, 1)

cdef double_t _get_max(object skiplist, int nobs, int minp):
if nobs >= minp:
return <IndexableSkiplist> skiplist.get(nobs - 1)
else:
return NaN
@cython.boundscheck(False)
@cython.wraparound(False)
def roll_min(ndarray[numeric] a, int window, int minp):
"""
Moving max of 1d array of any numeric type along axis=0 ignoring NaNs.

Parameters
----------
a: numpy array
window: int, size of rolling window
minp: if number of observations in window
is below this, output a NaN
"""
return _roll_min_max(a, window, minp, 0)

@cython.boundscheck(False)
@cython.wraparound(False)
def roll_min(np.ndarray[np.float64_t, ndim=1] a, int window, int minp):
"Moving min of 1d array of dtype=float64 along axis=0 ignoring NaNs."
cdef np.float64_t ai, aold
cdef _roll_min_max(ndarray[numeric] a, int window, int minp, bint is_max):
"Moving min/max of 1d array of any numeric type along axis=0 ignoring NaNs."
cdef numeric ai, aold
cdef Py_ssize_t count
cdef pairs* ring
cdef pairs* minpair
cdef pairs* end
cdef pairs* last
cdef Py_ssize_t* death
cdef numeric* ring
cdef numeric* minvalue
cdef numeric* end
cdef numeric* last
cdef Py_ssize_t i0
cdef np.npy_intp *dim
dim = PyArray_DIMS(a)
cdef Py_ssize_t n0 = dim[0]
cdef np.npy_intp *dims = [n0]
cdef np.ndarray[np.float64_t, ndim=1] y = PyArray_EMPTY(1, dims,
NPY_float64, 0)
cdef bint should_replace
cdef np.ndarray[numeric, ndim=1] y = PyArray_EMPTY(1, dims, PyArray_TYPE(a), 0)

if window < 1:
raise ValueError('Invalid window size %d'
Expand All @@ -1747,58 +1686,85 @@ def roll_min(np.ndarray[np.float64_t, ndim=1] a, int window, int minp):

minp = _check_minp(window, minp, n0)
with nogil:
ring = <pairs*>stdlib.malloc(window * sizeof(pairs))
ring = <numeric*>stdlib.malloc(window * sizeof(numeric))
death = <Py_ssize_t*>stdlib.malloc(window * sizeof(Py_ssize_t))
end = ring + window
last = ring

minpair = ring
minvalue = ring
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

minvalue does not seem to be declared

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cython declares it automatically, which can be seen in the generated C code.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, but you declare everything else, any problem with declaring it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nope, I'll put it in and add the docs.

ai = a[0]
if ai == ai:
minpair.value = ai
if numeric in cython.floating:
if ai == ai:
minvalue[0] = ai
elif is_max:
minvalue[0] = MINfloat64
else:
minvalue[0] = MAXfloat64
else:
minpair.value = MAXfloat64
minpair.death = window
minvalue[0] = ai
death[0] = window

count = 0
for i0 in range(n0):
ai = a[i0]
if ai == ai:
count += 1
if numeric in cython.floating:
if ai == ai:
count += 1
elif is_max:
ai = MINfloat64
else:
ai = MAXfloat64
else:
ai = MAXfloat64
count += 1
if i0 >= window:
aold = a[i0 - window]
if aold == aold:
count -= 1
if minpair.death == i0:
minpair += 1
if minpair >= end:
minpair = ring
if ai <= minpair.value:
minpair.value = ai
minpair.death = i0 + window
last = minpair
if death[minvalue-ring] == i0:
minvalue += 1
if minvalue >= end:
minvalue = ring
should_replace = ai >= minvalue[0] if is_max else ai <= minvalue[0]
if should_replace:
minvalue[0] = ai
death[minvalue-ring] = i0 + window
last = minvalue
else:
while last.value >= ai:
should_replace = last[0] <= ai if is_max else last[0] >= ai
while should_replace:
if last == ring:
last = end
last -= 1
should_replace = last[0] <= ai if is_max else last[0] >= ai
last += 1
if last == end:
last = ring
last.value = ai
last.death = i0 + window
if count >= minp:
y[i0] = minpair.value
last[0] = ai
death[last - ring] = i0 + window
if numeric in cython.floating:
if count >= minp:
y[i0] = minvalue[0]
else:
y[i0] = NaN
else:
y[i0] = NaN
y[i0] = minvalue[0]

for i0 in range(minp - 1):
y[i0] = NaN
if numeric in cython.floating:
y[i0] = NaN
else:
y[i0] = 0

stdlib.free(ring)
stdlib.free(death)
return y

cdef double_t _get_max(object skiplist, int nobs, int minp):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

did you add this? I don't see this (or _get_min) used AT ALL in the codebase. any idea?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you don't need to fix, just lmk.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it was there before and was removed but I accidentally added it in a merge. I won’t have a chance to get to this today. Can you accept the pull request and I’ll make a separate PR to remove it?

From: Jeff Reback [mailto:[email protected]]
Sent: Thursday, March 17, 2016 9:36 AM
To: pydata/pandas
Cc: Joshua Storck
Subject: Re: [pandas] Implementing rolling min/max functions that can retain the original type (#12595)

In pandas/algos.pyxhttps://github.com//pull/12595#discussion_r56503823:

 return y

+cdef double_t _get_max(object skiplist, int nobs, int minp):

you don't need to fix, just lmk.


You are receiving this because you were mentioned.
Reply to this email directly or view it on GitHubhttps://github.com//pull/12595/files/b5a84cf309ffc8cbb20cb6b6534d2ad2083aa5bf#r56503823

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am going to merge this and take it out. thxnks

if nobs >= minp:
return <IndexableSkiplist> skiplist.get(nobs - 1)
else:
return NaN

cdef double_t _get_min(object skiplist, int nobs, int minp):
if nobs >= minp:
return <IndexableSkiplist> skiplist.get(0)
Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/test_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -2697,3 +2697,19 @@ def test_rolling_median_memory_error(self):
n = 20000
Series(np.random.randn(n)).rolling(window=2, center=False).median()
Series(np.random.randn(n)).rolling(window=2, center=False).median()

def test_rolling_min_max_numeric_types(self):
# GH12373
types_test = [np.dtype("f{}".format(width)) for width in [4, 8]]
types_test.extend([np.dtype("{}{}".format(sign, width))
for width in [1, 2, 4, 8] for sign in "ui"])
for data_type in types_test:
# Just testing that these don't throw exceptions and that
# the return type is float64. Other tests will cover quantitative
# correctness
result = (DataFrame(np.arange(20, dtype=data_type))
.rolling(window=5).max())
self.assertEqual(result.dtypes[0], np.dtype("f8"))
result = (DataFrame(np.arange(20, dtype=data_type))
.rolling(window=5).min())
self.assertEqual(result.dtypes[0], np.dtype("f8"))