diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index b7a0cf888f1a2..d9e0061411843 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -132,3 +132,4 @@ Bug Fixes - Bug in ``pivot_table`` when ``margins=True`` and ``dropna=True`` where nulls still contributed to margin count (:issue:`12577`) +- Bug in ``.rolling.min`` and ``.rolling.max`` where passing columns of type float32 raised a Value error (:issue:`12373`) diff --git a/pandas/algos.pyx b/pandas/algos.pyx index 0f9ceba48e608..fb42bb9292272 100644 --- a/pandas/algos.pyx +++ b/pandas/algos.pyx @@ -1625,117 +1625,56 @@ def roll_median_c(ndarray[float64_t] arg, int win, int minp): # of its Simplified BSD license # https://github.com/kwgoodman/bottleneck -cdef struct pairs: - double value - int death - from libc cimport stdlib @cython.boundscheck(False) @cython.wraparound(False) -def roll_max(ndarray[float64_t] a, int window, int minp): - "Moving max of 1d array of dtype=float64 along axis=0 ignoring NaNs." - cdef np.float64_t ai, aold - cdef Py_ssize_t count - cdef pairs* ring - cdef pairs* minpair - cdef pairs* end - cdef pairs* last - cdef Py_ssize_t i0 - cdef np.npy_intp *dim - dim = PyArray_DIMS(a) - cdef Py_ssize_t n0 = dim[0] - cdef np.npy_intp *dims = [n0] - cdef np.ndarray[np.float64_t, ndim=1] y = PyArray_EMPTY(1, dims, - NPY_float64, 0) - - if window < 1: - raise ValueError('Invalid window size %d' - % (window)) - - if minp > window: - raise ValueError('Invalid min_periods size %d greater than window %d' - % (minp, window)) - - minp = _check_minp(window, minp, n0) - with nogil: - ring = stdlib.malloc(window * sizeof(pairs)) - end = ring + window - last = ring - - minpair = ring - ai = a[0] - if ai == ai: - minpair.value = ai - else: - minpair.value = MINfloat64 - minpair.death = window - - count = 0 - for i0 in range(n0): - ai = a[i0] - if ai == ai: - count += 1 - else: - ai = MINfloat64 - if i0 >= window: - aold = a[i0 - window] - if aold == aold: - count -= 1 - if minpair.death == i0: - minpair += 1 - if minpair >= end: - minpair = ring - if ai >= minpair.value: - minpair.value = ai - minpair.death = i0 + window - last = minpair - else: - while last.value <= ai: - if last == ring: - last = end - last -= 1 - last += 1 - if last == end: - last = ring - last.value = ai - last.death = i0 + window - if count >= minp: - y[i0] = minpair.value - else: - y[i0] = NaN - - for i0 in range(minp - 1): - y[i0] = NaN - - stdlib.free(ring) - return y +def roll_max(ndarray[numeric] a, int window, int minp): + """ + Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. + Parameters + ---------- + a: numpy array + window: int, size of rolling window + minp: if number of observations in window + is below this, output a NaN + """ + return _roll_min_max(a, window, minp, 1) -cdef double_t _get_max(object skiplist, int nobs, int minp): - if nobs >= minp: - return skiplist.get(nobs - 1) - else: - return NaN +@cython.boundscheck(False) +@cython.wraparound(False) +def roll_min(ndarray[numeric] a, int window, int minp): + """ + Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. + Parameters + ---------- + a: numpy array + window: int, size of rolling window + minp: if number of observations in window + is below this, output a NaN + """ + return _roll_min_max(a, window, minp, 0) @cython.boundscheck(False) @cython.wraparound(False) -def roll_min(np.ndarray[np.float64_t, ndim=1] a, int window, int minp): - "Moving min of 1d array of dtype=float64 along axis=0 ignoring NaNs." - cdef np.float64_t ai, aold +cdef _roll_min_max(ndarray[numeric] a, int window, int minp, bint is_max): + "Moving min/max of 1d array of any numeric type along axis=0 ignoring NaNs." + cdef numeric ai, aold cdef Py_ssize_t count - cdef pairs* ring - cdef pairs* minpair - cdef pairs* end - cdef pairs* last + cdef Py_ssize_t* death + cdef numeric* ring + cdef numeric* minvalue + cdef numeric* end + cdef numeric* last cdef Py_ssize_t i0 cdef np.npy_intp *dim dim = PyArray_DIMS(a) cdef Py_ssize_t n0 = dim[0] cdef np.npy_intp *dims = [n0] - cdef np.ndarray[np.float64_t, ndim=1] y = PyArray_EMPTY(1, dims, - NPY_float64, 0) + cdef bint should_replace + cdef np.ndarray[numeric, ndim=1] y = PyArray_EMPTY(1, dims, PyArray_TYPE(a), 0) if window < 1: raise ValueError('Invalid window size %d' @@ -1747,58 +1686,85 @@ def roll_min(np.ndarray[np.float64_t, ndim=1] a, int window, int minp): minp = _check_minp(window, minp, n0) with nogil: - ring = stdlib.malloc(window * sizeof(pairs)) + ring = stdlib.malloc(window * sizeof(numeric)) + death = stdlib.malloc(window * sizeof(Py_ssize_t)) end = ring + window last = ring - minpair = ring + minvalue = ring ai = a[0] - if ai == ai: - minpair.value = ai + if numeric in cython.floating: + if ai == ai: + minvalue[0] = ai + elif is_max: + minvalue[0] = MINfloat64 + else: + minvalue[0] = MAXfloat64 else: - minpair.value = MAXfloat64 - minpair.death = window + minvalue[0] = ai + death[0] = window count = 0 for i0 in range(n0): ai = a[i0] - if ai == ai: - count += 1 + if numeric in cython.floating: + if ai == ai: + count += 1 + elif is_max: + ai = MINfloat64 + else: + ai = MAXfloat64 else: - ai = MAXfloat64 + count += 1 if i0 >= window: aold = a[i0 - window] if aold == aold: count -= 1 - if minpair.death == i0: - minpair += 1 - if minpair >= end: - minpair = ring - if ai <= minpair.value: - minpair.value = ai - minpair.death = i0 + window - last = minpair + if death[minvalue-ring] == i0: + minvalue += 1 + if minvalue >= end: + minvalue = ring + should_replace = ai >= minvalue[0] if is_max else ai <= minvalue[0] + if should_replace: + minvalue[0] = ai + death[minvalue-ring] = i0 + window + last = minvalue else: - while last.value >= ai: + should_replace = last[0] <= ai if is_max else last[0] >= ai + while should_replace: if last == ring: last = end last -= 1 + should_replace = last[0] <= ai if is_max else last[0] >= ai last += 1 if last == end: last = ring - last.value = ai - last.death = i0 + window - if count >= minp: - y[i0] = minpair.value + last[0] = ai + death[last - ring] = i0 + window + if numeric in cython.floating: + if count >= minp: + y[i0] = minvalue[0] + else: + y[i0] = NaN else: - y[i0] = NaN + y[i0] = minvalue[0] for i0 in range(minp - 1): - y[i0] = NaN + if numeric in cython.floating: + y[i0] = NaN + else: + y[i0] = 0 stdlib.free(ring) + stdlib.free(death) return y +cdef double_t _get_max(object skiplist, int nobs, int minp): + if nobs >= minp: + return skiplist.get(nobs - 1) + else: + return NaN + cdef double_t _get_min(object skiplist, int nobs, int minp): if nobs >= minp: return skiplist.get(0) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 0d3b7e967f3ce..fb0e2ad2ca34e 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -2697,3 +2697,19 @@ def test_rolling_median_memory_error(self): n = 20000 Series(np.random.randn(n)).rolling(window=2, center=False).median() Series(np.random.randn(n)).rolling(window=2, center=False).median() + + def test_rolling_min_max_numeric_types(self): + # GH12373 + types_test = [np.dtype("f{}".format(width)) for width in [4, 8]] + types_test.extend([np.dtype("{}{}".format(sign, width)) + for width in [1, 2, 4, 8] for sign in "ui"]) + for data_type in types_test: + # Just testing that these don't throw exceptions and that + # the return type is float64. Other tests will cover quantitative + # correctness + result = (DataFrame(np.arange(20, dtype=data_type)) + .rolling(window=5).max()) + self.assertEqual(result.dtypes[0], np.dtype("f8")) + result = (DataFrame(np.arange(20, dtype=data_type)) + .rolling(window=5).min()) + self.assertEqual(result.dtypes[0], np.dtype("f8"))