Skip to content

BUG: datetime rolling min/max segfaults when closed=left (#21704) #21853

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 20, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,9 @@ Groupby/Resample/Rolling
-
-

- Multiple bugs in :func:`pandas.core.Rolling.min` with ``closed='left'` and a
datetime-like index leading to incorrect results and also segfault. (:issue:`21704`)

Sparse
^^^^^^

Expand Down
241 changes: 144 additions & 97 deletions pandas/_libs/window.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1218,141 +1218,188 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp,
Moving min/max of 1d array of any numeric type along axis=0
ignoring NaNs.
"""

cdef:
numeric ai
bint is_variable, should_replace
int64_t N, i, removed, window_i
Py_ssize_t nobs = 0
deque Q[int64_t]
ndarray[int64_t] starti, endi
ndarray[numeric, ndim=1] output
cdef:
int64_t* death
numeric* ring
numeric* minvalue
numeric* end
numeric* last

cdef:
cdef numeric r
int64_t N
bint is_variable

starti, endi, N, win, minp, is_variable = get_window_indexer(
input, win,
minp, index, closed)

output = np.empty(N, dtype=input.dtype)
if is_variable:
return _roll_min_max_variable(input, starti, endi, N, win, minp,
is_max)
else:
return _roll_min_max_fixed(input, starti, endi, N, win, minp, is_max)


cdef _roll_min_max_variable(ndarray[numeric] input,
ndarray[int64_t] starti,
ndarray[int64_t] endi,
int64_t N,
int64_t win,
int64_t minp,
bint is_max):
cdef:
numeric ai
int64_t i, close_offset, curr_win_size
Py_ssize_t nobs = 0
deque Q[int64_t] # min/max always the front
deque W[int64_t] # track the whole window for nobs compute
ndarray[double_t, ndim=1] output

output = np.empty(N, dtype=float)
Q = deque[int64_t]()
W = deque[int64_t]()

if is_variable:
with nogil:

with nogil:
# This is using a modified version of the C++ code in this
# SO post: http://bit.ly/2nOoHlY
# The original impl didn't deal with variable window sizes
# So the code was optimized for that

# This is using a modified version of the C++ code in this
# SO post: http://bit.ly/2nOoHlY
# The original impl didn't deal with variable window sizes
# So the code was optimized for that
for i from starti[0] <= i < endi[0]:
ai = init_mm(input[i], &nobs, is_max)

for i from starti[0] <= i < endi[0]:
ai = init_mm(input[i], &nobs, is_max)
# Discard previous entries if we find new min or max
if is_max:
while not Q.empty() and ((ai >= input[Q.back()]) or
(input[Q.back()] != input[Q.back()])):
Q.pop_back()
else:
while not Q.empty() and ((ai <= input[Q.back()]) or
(input[Q.back()] != input[Q.back()])):
Q.pop_back()
Q.push_back(i)
W.push_back(i)

# if right is open then the first window is empty
close_offset = 0 if endi[0] > starti[0] else 1

for i in range(endi[0], endi[N-1]):
if not Q.empty():
output[i-1+close_offset] = calc_mm(
minp, nobs, input[Q.front()])
else:
output[i-1+close_offset] = NaN

if is_max:
while not Q.empty() and ai >= input[Q.back()]:
Q.pop_back()
else:
while not Q.empty() and ai <= input[Q.back()]:
Q.pop_back()
Q.push_back(i)
ai = init_mm(input[i], &nobs, is_max)

# Discard previous entries if we find new min or max
if is_max:
while not Q.empty() and ((ai >= input[Q.back()]) or
(input[Q.back()] != input[Q.back()])):
Q.pop_back()
else:
while not Q.empty() and ((ai <= input[Q.back()]) or
(input[Q.back()] != input[Q.back()])):
Q.pop_back()

for i from endi[0] <= i < N:
output[i-1] = calc_mm(minp, nobs, input[Q.front()])
# Maintain window/nobs retention
curr_win_size = endi[i + close_offset] - starti[i + close_offset]
while not Q.empty() and Q.front() <= i - curr_win_size:
Q.pop_front()
while not W.empty() and W.front() <= i - curr_win_size:
remove_mm(input[W.front()], &nobs)
W.pop_front()

ai = init_mm(input[i], &nobs, is_max)
Q.push_back(i)
W.push_back(i)

if is_max:
while not Q.empty() and ai >= input[Q.back()]:
Q.pop_back()
else:
while not Q.empty() and ai <= input[Q.back()]:
Q.pop_back()
output[N-1] = calc_mm(minp, nobs, input[Q.front()])

while not Q.empty() and Q.front() <= i - (endi[i] - starti[i]):
Q.pop_front()
return output

Q.push_back(i)

output[N-1] = calc_mm(minp, nobs, input[Q.front()])
cdef _roll_min_max_fixed(ndarray[numeric] input,
ndarray[int64_t] starti,
ndarray[int64_t] endi,
int64_t N,
int64_t win,
int64_t minp,
bint is_max):
cdef:
numeric ai
bint should_replace
int64_t i, removed, window_i,
Py_ssize_t nobs = 0
int64_t* death
numeric* ring
numeric* minvalue
numeric* end
numeric* last
ndarray[double_t, ndim=1] output

else:
# setup the rings of death!
ring = <numeric *>malloc(win * sizeof(numeric))
death = <int64_t *>malloc(win * sizeof(int64_t))

end = ring + win
last = ring
minvalue = ring
ai = input[0]
minvalue[0] = init_mm(input[0], &nobs, is_max)
death[0] = win
nobs = 0
output = np.empty(N, dtype=float)
# setup the rings of death!
ring = <numeric *>malloc(win * sizeof(numeric))
death = <int64_t *>malloc(win * sizeof(int64_t))

end = ring + win
last = ring
minvalue = ring
ai = input[0]
minvalue[0] = init_mm(input[0], &nobs, is_max)
death[0] = win
nobs = 0

with nogil:
with nogil:

for i in range(N):
ai = init_mm(input[i], &nobs, is_max)
for i in range(N):
ai = init_mm(input[i], &nobs, is_max)

if i >= win:
remove_mm(input[i - win], &nobs)
if i >= win:
remove_mm(input[i - win], &nobs)

if death[minvalue - ring] == i:
minvalue = minvalue + 1
if minvalue >= end:
minvalue = ring
if death[minvalue - ring] == i:
minvalue = minvalue + 1
if minvalue >= end:
minvalue = ring

if is_max:
should_replace = ai >= minvalue[0]
else:
should_replace = ai <= minvalue[0]
if should_replace:
if is_max:
should_replace = ai >= minvalue[0]
else:
should_replace = ai <= minvalue[0]
if should_replace:

minvalue[0] = ai
death[minvalue - ring] = i + win
last = minvalue
minvalue[0] = ai
death[minvalue - ring] = i + win
last = minvalue

else:
else:

if is_max:
should_replace = last[0] <= ai
else:
should_replace = last[0] >= ai
while should_replace:
if last == ring:
last = end
last -= 1
if is_max:
should_replace = last[0] <= ai
else:
should_replace = last[0] >= ai
while should_replace:
if last == ring:
last = end
last -= 1
if is_max:
should_replace = last[0] <= ai
else:
should_replace = last[0] >= ai

last += 1
if last == end:
last = ring
last[0] = ai
death[last - ring] = i + win
last += 1
if last == end:
last = ring
last[0] = ai
death[last - ring] = i + win

output[i] = calc_mm(minp, nobs, minvalue[0])
output[i] = calc_mm(minp, nobs, minvalue[0])

for i in range(minp - 1):
if numeric in cython.floating:
output[i] = NaN
else:
output[i] = 0
for i in range(minp - 1):
if numeric in cython.floating:
output[i] = NaN
else:
output[i] = 0

free(ring)
free(death)
free(ring)
free(death)

# print("output: {0}".format(output))
return output


Expand Down
54 changes: 54 additions & 0 deletions pandas/tests/test_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,60 @@ def test_closed(self):
with pytest.raises(ValueError):
df.rolling(window=3, closed='neither')

@pytest.mark.parametrize("input_dtype", ['int', 'float'])
@pytest.mark.parametrize("func,closed,expected", [
('min', 'right', [0.0, 0, 0, 1, 2, 3, 4, 5, 6, 7]),
('min', 'both', [0.0, 0, 0, 0, 1, 2, 3, 4, 5, 6]),
('min', 'neither', [np.nan, 0, 0, 1, 2, 3, 4, 5, 6, 7]),
('min', 'left', [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, 6]),
('max', 'right', [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
('max', 'both', [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
('max', 'neither', [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8]),
('max', 'left', [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8])
])
def test_closed_min_max_datetime(self, input_dtype,
func, closed,
expected):
# see gh-21704
ser = pd.Series(data=np.arange(10).astype(input_dtype),
index=pd.date_range('2000', periods=10))

result = getattr(ser.rolling('3D', closed=closed), func)()
expected = pd.Series(expected, index=ser.index)
tm.assert_series_equal(result, expected)

def test_closed_uneven(self):
# see gh-21704
ser = pd.Series(data=np.arange(10),
index=pd.date_range('2000', periods=10))

# uneven
ser = ser.drop(index=ser.index[[1, 5]])
result = ser.rolling('3D', closed='left').min()
expected = pd.Series([np.nan, 0, 0, 2, 3, 4, 6, 6],
index=ser.index)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("func,closed,expected", [
('min', 'right', [np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan, np.nan]),
('min', 'both', [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, np.nan]),
('min', 'neither', [np.nan, np.nan, 0, 1, 2, 3, 4, 5, np.nan, np.nan]),
('min', 'left', [np.nan, np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan]),
('max', 'right', [np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan, np.nan]),
('max', 'both', [np.nan, 1, 2, 3, 4, 5, 6, 6, 6, np.nan]),
('max', 'neither', [np.nan, np.nan, 1, 2, 3, 4, 5, 6, np.nan, np.nan]),
('max', 'left', [np.nan, np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan])
])
def test_closed_min_max_minp(self, func, closed, expected):
# see gh-21704
ser = pd.Series(data=np.arange(10),
index=pd.date_range('2000', periods=10))
ser[ser.index[-3:]] = np.nan
result = getattr(ser.rolling('3D', min_periods=2, closed=closed),
func)()
expected = pd.Series(expected, index=ser.index)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize('roller', ['1s', 1])
def tests_empty_df_rolling(self, roller):
# GH 15819 Verifies that datetime and integer rolling windows can be
Expand Down