pandas-dev · WillAyd · Jul 20, 2018 · Jul 9, 2018
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -398,6 +398,9 @@ Groupby/Resample/Rolling
 -
 -
 
+- Multiple bugs in :func:`pandas.core.Rolling.min` with ``closed='left'` and a
+  datetime-like index leading to incorrect results and also segfault. (:issue:`21704`)
+
 Sparse
 ^^^^^^
 

diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx
@@ -1218,141 +1218,188 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp,
     Moving min/max of 1d array of any numeric type along axis=0
     ignoring NaNs.
     """
-
     cdef:
-        numeric ai
-        bint is_variable, should_replace
-        int64_t N, i, removed, window_i
-        Py_ssize_t nobs = 0
-        deque Q[int64_t]
         ndarray[int64_t] starti, endi
-        ndarray[numeric, ndim=1] output
-    cdef:
-        int64_t* death
-        numeric* ring
-        numeric* minvalue
-        numeric* end
-        numeric* last
-
-    cdef:
-        cdef numeric r
+        int64_t N
+        bint is_variable
 
     starti, endi, N, win, minp, is_variable = get_window_indexer(
         input, win,
         minp, index, closed)
 
-    output = np.empty(N, dtype=input.dtype)
+    if is_variable:
+        return _roll_min_max_variable(input, starti, endi, N, win, minp,
+                                      is_max)
+    else:
+        return _roll_min_max_fixed(input, starti, endi, N, win, minp, is_max)
+
 
+cdef _roll_min_max_variable(ndarray[numeric] input,
+                            ndarray[int64_t] starti,
+                            ndarray[int64_t] endi,
+                            int64_t N,
+                            int64_t win,
+                            int64_t minp,
+                            bint is_max):
+    cdef:
+        numeric ai
+        int64_t i, close_offset, curr_win_size
+        Py_ssize_t nobs = 0
+        deque Q[int64_t]  # min/max always the front
+        deque W[int64_t]  # track the whole window for nobs compute
+        ndarray[double_t, ndim=1] output
+
+    output = np.empty(N, dtype=float)
     Q = deque[int64_t]()
+    W = deque[int64_t]()
 
-    if is_variable:
+    with nogil:
 
-        with nogil:
+        # This is using a modified version of the C++ code in this
+        # SO post: http://bit.ly/2nOoHlY
+        # The original impl didn't deal with variable window sizes
+        # So the code was optimized for that
 
-            # This is using a modified version of the C++ code in this
-            # SO post: http://bit.ly/2nOoHlY
-            # The original impl didn't deal with variable window sizes
-            # So the code was optimized for that
+        for i from starti[0] <= i < endi[0]:
+            ai = init_mm(input[i], &nobs, is_max)
 
-            for i from starti[0] <= i < endi[0]:
-                ai = init_mm(input[i], &nobs, is_max)
+            # Discard previous entries if we find new min or max
+            if is_max:
+                while not Q.empty() and ((ai >= input[Q.back()]) or
+                                         (input[Q.back()] != input[Q.back()])):
+                    Q.pop_back()
+            else:
+                while not Q.empty() and ((ai <= input[Q.back()]) or
+                                         (input[Q.back()] != input[Q.back()])):
+                    Q.pop_back()
+            Q.push_back(i)
+            W.push_back(i)
+
+        # if right is open then the first window is empty
+        close_offset = 0 if endi[0] > starti[0] else 1
+
+        for i in range(endi[0], endi[N-1]):
+            if not Q.empty():
+                output[i-1+close_offset] = calc_mm(
+                    minp, nobs, input[Q.front()])
+            else:
+                output[i-1+close_offset] = NaN
 
-                if is_max:
-                    while not Q.empty() and ai >= input[Q.back()]:
-                        Q.pop_back()
-                else:
-                    while not Q.empty() and ai <= input[Q.back()]:
-                        Q.pop_back()
-                Q.push_back(i)
+            ai = init_mm(input[i], &nobs, is_max)
+
+            # Discard previous entries if we find new min or max
+            if is_max:
+                while not Q.empty() and ((ai >= input[Q.back()]) or
+                                         (input[Q.back()] != input[Q.back()])):
+                    Q.pop_back()
+            else:
+                while not Q.empty() and ((ai <= input[Q.back()]) or
+                                         (input[Q.back()] != input[Q.back()])):
+                    Q.pop_back()
 
-            for i from endi[0] <= i < N:
-                output[i-1] = calc_mm(minp, nobs, input[Q.front()])
+            # Maintain window/nobs retention
+            curr_win_size = endi[i + close_offset] - starti[i + close_offset]
+            while not Q.empty() and Q.front() <= i - curr_win_size:
+                Q.pop_front()
+            while not W.empty() and W.front() <= i - curr_win_size:
+                remove_mm(input[W.front()], &nobs)
+                W.pop_front()
 
-                ai = init_mm(input[i], &nobs, is_max)
+            Q.push_back(i)
+            W.push_back(i)
 
-                if is_max:
-                    while not Q.empty() and ai >= input[Q.back()]:
-                        Q.pop_back()
-                else:
-                    while not Q.empty() and ai <= input[Q.back()]:
-                        Q.pop_back()
+        output[N-1] = calc_mm(minp, nobs, input[Q.front()])
 
-                while not Q.empty() and Q.front() <= i - (endi[i] - starti[i]):
-                    Q.pop_front()
+    return output
 
-                Q.push_back(i)
 
-            output[N-1] = calc_mm(minp, nobs, input[Q.front()])
+cdef _roll_min_max_fixed(ndarray[numeric] input,
+                         ndarray[int64_t] starti,
+                         ndarray[int64_t] endi,
+                         int64_t N,
+                         int64_t win,
+                         int64_t minp,
+                         bint is_max):
+    cdef:
+        numeric ai
+        bint should_replace
+        int64_t i, removed, window_i,
+        Py_ssize_t nobs = 0
+        int64_t* death
+        numeric* ring
+        numeric* minvalue
+        numeric* end
+        numeric* last
+        ndarray[double_t, ndim=1] output
 
-    else:
-        # setup the rings of death!
-        ring = <numeric *>malloc(win * sizeof(numeric))
-        death = <int64_t *>malloc(win * sizeof(int64_t))
-
-        end = ring + win
-        last = ring
-        minvalue = ring
-        ai = input[0]
-        minvalue[0] = init_mm(input[0], &nobs, is_max)
-        death[0] = win
-        nobs = 0
+    output = np.empty(N, dtype=float)
+    # setup the rings of death!
+    ring = <numeric *>malloc(win * sizeof(numeric))
+    death = <int64_t *>malloc(win * sizeof(int64_t))
+
+    end = ring + win
+    last = ring
+    minvalue = ring
+    ai = input[0]
+    minvalue[0] = init_mm(input[0], &nobs, is_max)
+    death[0] = win
+    nobs = 0
 
-        with nogil:
+    with nogil:
 
-            for i in range(N):
-                ai = init_mm(input[i], &nobs, is_max)
+        for i in range(N):
+            ai = init_mm(input[i], &nobs, is_max)
 
-                if i >= win:
-                    remove_mm(input[i - win], &nobs)
+            if i >= win:
+                remove_mm(input[i - win], &nobs)
 
-                if death[minvalue - ring] == i:
-                    minvalue = minvalue + 1
-                    if minvalue >= end:
-                        minvalue = ring
+            if death[minvalue - ring] == i:
+                minvalue = minvalue + 1
+                if minvalue >= end:
+                    minvalue = ring
 
-                if is_max:
-                    should_replace = ai >= minvalue[0]
-                else:
-                    should_replace = ai <= minvalue[0]
-                if should_replace:
+            if is_max:
+                should_replace = ai >= minvalue[0]
+            else:
+                should_replace = ai <= minvalue[0]
+            if should_replace:
 
-                    minvalue[0] = ai
-                    death[minvalue - ring] = i + win
-                    last = minvalue
+                minvalue[0] = ai
+                death[minvalue - ring] = i + win
+                last = minvalue
 
-                else:
+            else:
 
+                if is_max:
+                    should_replace = last[0] <= ai
+                else:
+                    should_replace = last[0] >= ai
+                while should_replace:
+                    if last == ring:
+                        last = end
+                    last -= 1
                     if is_max:
                         should_replace = last[0] <= ai
                     else:
                         should_replace = last[0] >= ai
-                    while should_replace:
-                        if last == ring:
-                            last = end
-                        last -= 1
-                        if is_max:
-                            should_replace = last[0] <= ai
-                        else:
-                            should_replace = last[0] >= ai
 
-                    last += 1
-                    if last == end:
-                        last = ring
-                    last[0] = ai
-                    death[last - ring] = i + win
+                last += 1
+                if last == end:
+                    last = ring
+                last[0] = ai
+                death[last - ring] = i + win
 
-                output[i] = calc_mm(minp, nobs, minvalue[0])
+            output[i] = calc_mm(minp, nobs, minvalue[0])
 
-            for i in range(minp - 1):
-                if numeric in cython.floating:
-                    output[i] = NaN
-                else:
-                    output[i] = 0
+        for i in range(minp - 1):
+            if numeric in cython.floating:
+                output[i] = NaN
+            else:
+                output[i] = 0
 
-            free(ring)
-            free(death)
+        free(ring)
+        free(death)
 
-    # print("output: {0}".format(output))
     return output
 
 

diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py
@@ -464,6 +464,60 @@ def test_closed(self):
         with pytest.raises(ValueError):
             df.rolling(window=3, closed='neither')
 
+    @pytest.mark.parametrize("input_dtype", ['int', 'float'])
+    @pytest.mark.parametrize("func,closed,expected", [
+        ('min', 'right', [0.0, 0, 0, 1, 2, 3, 4, 5, 6, 7]),
+        ('min', 'both', [0.0, 0, 0, 0, 1, 2, 3, 4, 5, 6]),
+        ('min', 'neither', [np.nan, 0, 0, 1, 2, 3, 4, 5, 6, 7]),
+        ('min', 'left', [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, 6]),
+        ('max', 'right', [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
+        ('max', 'both', [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
+        ('max', 'neither', [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8]),
+        ('max', 'left', [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8])
+    ])
+    def test_closed_min_max_datetime(self, input_dtype,
+                                     func, closed,
+                                     expected):
+        # see gh-21704
+        ser = pd.Series(data=np.arange(10).astype(input_dtype),
+                        index=pd.date_range('2000', periods=10))
+
+        result = getattr(ser.rolling('3D', closed=closed), func)()
+        expected = pd.Series(expected, index=ser.index)
+        tm.assert_series_equal(result, expected)
+
+    def test_closed_uneven(self):
+        # see gh-21704
+        ser = pd.Series(data=np.arange(10),
+                        index=pd.date_range('2000', periods=10))
+
+        # uneven
+        ser = ser.drop(index=ser.index[[1, 5]])
+        result = ser.rolling('3D', closed='left').min()
+        expected = pd.Series([np.nan, 0, 0, 2, 3, 4, 6, 6],
+                             index=ser.index)
+        tm.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize("func,closed,expected", [
+        ('min', 'right', [np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan, np.nan]),
+        ('min', 'both', [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, np.nan]),
+        ('min', 'neither', [np.nan, np.nan, 0, 1, 2, 3, 4, 5, np.nan, np.nan]),
+        ('min', 'left', [np.nan, np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan]),
+        ('max', 'right', [np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan, np.nan]),
+        ('max', 'both', [np.nan, 1, 2, 3, 4, 5, 6, 6, 6, np.nan]),
+        ('max', 'neither', [np.nan, np.nan, 1, 2, 3, 4, 5, 6, np.nan, np.nan]),
+        ('max', 'left', [np.nan, np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan])
+    ])
+    def test_closed_min_max_minp(self, func, closed, expected):
+        # see gh-21704
+        ser = pd.Series(data=np.arange(10),
+                        index=pd.date_range('2000', periods=10))
+        ser[ser.index[-3:]] = np.nan
+        result = getattr(ser.rolling('3D', min_periods=2, closed=closed),
+                         func)()
+        expected = pd.Series(expected, index=ser.index)
+        tm.assert_series_equal(result, expected)
+
     @pytest.mark.parametrize('roller', ['1s', 1])
     def tests_empty_df_rolling(self, roller):
         # GH 15819 Verifies that datetime and integer rolling windows can be
-Original file line number
+Diff line change
@@ Expand Up / @@ -398,6 +398,9 @@ Groupby/Resample/Rolling @@
     -
     -
+    - Multiple bugs in :func:`pandas.core.Rolling.min` with ``closed='left'` and a
+      datetime-like index leading to incorrect results and also segfault. (:issue:`21704`)
     Sparse
     ^^^^^^
@@ Expand Down @@