Skip to content

Commit 0a0b2b9

Browse files
changhiskhanWillAyd
authored andcommitted
BUG: datetime rolling min/max segfaults when closed=left (#21704) (#21853)
1 parent 1f6ddc4 commit 0a0b2b9

File tree

3 files changed

+201
-97
lines changed

3 files changed

+201
-97
lines changed

doc/source/whatsnew/v0.24.0.txt

+3
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,9 @@ Groupby/Resample/Rolling
477477
-
478478
-
479479

480+
- Multiple bugs in :func:`pandas.core.Rolling.min` with ``closed='left'` and a
481+
datetime-like index leading to incorrect results and also segfault. (:issue:`21704`)
482+
480483
Sparse
481484
^^^^^^
482485

pandas/_libs/window.pyx

+144-97
Original file line numberDiff line numberDiff line change
@@ -1218,141 +1218,188 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp,
12181218
Moving min/max of 1d array of any numeric type along axis=0
12191219
ignoring NaNs.
12201220
"""
1221-
12221221
cdef:
1223-
numeric ai
1224-
bint is_variable, should_replace
1225-
int64_t N, i, removed, window_i
1226-
Py_ssize_t nobs = 0
1227-
deque Q[int64_t]
12281222
ndarray[int64_t] starti, endi
1229-
ndarray[numeric, ndim=1] output
1230-
cdef:
1231-
int64_t* death
1232-
numeric* ring
1233-
numeric* minvalue
1234-
numeric* end
1235-
numeric* last
1236-
1237-
cdef:
1238-
cdef numeric r
1223+
int64_t N
1224+
bint is_variable
12391225

12401226
starti, endi, N, win, minp, is_variable = get_window_indexer(
12411227
input, win,
12421228
minp, index, closed)
12431229

1244-
output = np.empty(N, dtype=input.dtype)
1230+
if is_variable:
1231+
return _roll_min_max_variable(input, starti, endi, N, win, minp,
1232+
is_max)
1233+
else:
1234+
return _roll_min_max_fixed(input, starti, endi, N, win, minp, is_max)
1235+
12451236

1237+
cdef _roll_min_max_variable(ndarray[numeric] input,
1238+
ndarray[int64_t] starti,
1239+
ndarray[int64_t] endi,
1240+
int64_t N,
1241+
int64_t win,
1242+
int64_t minp,
1243+
bint is_max):
1244+
cdef:
1245+
numeric ai
1246+
int64_t i, close_offset, curr_win_size
1247+
Py_ssize_t nobs = 0
1248+
deque Q[int64_t] # min/max always the front
1249+
deque W[int64_t] # track the whole window for nobs compute
1250+
ndarray[double_t, ndim=1] output
1251+
1252+
output = np.empty(N, dtype=float)
12461253
Q = deque[int64_t]()
1254+
W = deque[int64_t]()
12471255

1248-
if is_variable:
1256+
with nogil:
12491257

1250-
with nogil:
1258+
# This is using a modified version of the C++ code in this
1259+
# SO post: http://bit.ly/2nOoHlY
1260+
# The original impl didn't deal with variable window sizes
1261+
# So the code was optimized for that
12511262

1252-
# This is using a modified version of the C++ code in this
1253-
# SO post: http://bit.ly/2nOoHlY
1254-
# The original impl didn't deal with variable window sizes
1255-
# So the code was optimized for that
1263+
for i from starti[0] <= i < endi[0]:
1264+
ai = init_mm(input[i], &nobs, is_max)
12561265

1257-
for i from starti[0] <= i < endi[0]:
1258-
ai = init_mm(input[i], &nobs, is_max)
1266+
# Discard previous entries if we find new min or max
1267+
if is_max:
1268+
while not Q.empty() and ((ai >= input[Q.back()]) or
1269+
(input[Q.back()] != input[Q.back()])):
1270+
Q.pop_back()
1271+
else:
1272+
while not Q.empty() and ((ai <= input[Q.back()]) or
1273+
(input[Q.back()] != input[Q.back()])):
1274+
Q.pop_back()
1275+
Q.push_back(i)
1276+
W.push_back(i)
1277+
1278+
# if right is open then the first window is empty
1279+
close_offset = 0 if endi[0] > starti[0] else 1
1280+
1281+
for i in range(endi[0], endi[N-1]):
1282+
if not Q.empty():
1283+
output[i-1+close_offset] = calc_mm(
1284+
minp, nobs, input[Q.front()])
1285+
else:
1286+
output[i-1+close_offset] = NaN
12591287

1260-
if is_max:
1261-
while not Q.empty() and ai >= input[Q.back()]:
1262-
Q.pop_back()
1263-
else:
1264-
while not Q.empty() and ai <= input[Q.back()]:
1265-
Q.pop_back()
1266-
Q.push_back(i)
1288+
ai = init_mm(input[i], &nobs, is_max)
1289+
1290+
# Discard previous entries if we find new min or max
1291+
if is_max:
1292+
while not Q.empty() and ((ai >= input[Q.back()]) or
1293+
(input[Q.back()] != input[Q.back()])):
1294+
Q.pop_back()
1295+
else:
1296+
while not Q.empty() and ((ai <= input[Q.back()]) or
1297+
(input[Q.back()] != input[Q.back()])):
1298+
Q.pop_back()
12671299

1268-
for i from endi[0] <= i < N:
1269-
output[i-1] = calc_mm(minp, nobs, input[Q.front()])
1300+
# Maintain window/nobs retention
1301+
curr_win_size = endi[i + close_offset] - starti[i + close_offset]
1302+
while not Q.empty() and Q.front() <= i - curr_win_size:
1303+
Q.pop_front()
1304+
while not W.empty() and W.front() <= i - curr_win_size:
1305+
remove_mm(input[W.front()], &nobs)
1306+
W.pop_front()
12701307

1271-
ai = init_mm(input[i], &nobs, is_max)
1308+
Q.push_back(i)
1309+
W.push_back(i)
12721310

1273-
if is_max:
1274-
while not Q.empty() and ai >= input[Q.back()]:
1275-
Q.pop_back()
1276-
else:
1277-
while not Q.empty() and ai <= input[Q.back()]:
1278-
Q.pop_back()
1311+
output[N-1] = calc_mm(minp, nobs, input[Q.front()])
12791312

1280-
while not Q.empty() and Q.front() <= i - (endi[i] - starti[i]):
1281-
Q.pop_front()
1313+
return output
12821314

1283-
Q.push_back(i)
12841315

1285-
output[N-1] = calc_mm(minp, nobs, input[Q.front()])
1316+
cdef _roll_min_max_fixed(ndarray[numeric] input,
1317+
ndarray[int64_t] starti,
1318+
ndarray[int64_t] endi,
1319+
int64_t N,
1320+
int64_t win,
1321+
int64_t minp,
1322+
bint is_max):
1323+
cdef:
1324+
numeric ai
1325+
bint should_replace
1326+
int64_t i, removed, window_i,
1327+
Py_ssize_t nobs = 0
1328+
int64_t* death
1329+
numeric* ring
1330+
numeric* minvalue
1331+
numeric* end
1332+
numeric* last
1333+
ndarray[double_t, ndim=1] output
12861334

1287-
else:
1288-
# setup the rings of death!
1289-
ring = <numeric *>malloc(win * sizeof(numeric))
1290-
death = <int64_t *>malloc(win * sizeof(int64_t))
1291-
1292-
end = ring + win
1293-
last = ring
1294-
minvalue = ring
1295-
ai = input[0]
1296-
minvalue[0] = init_mm(input[0], &nobs, is_max)
1297-
death[0] = win
1298-
nobs = 0
1335+
output = np.empty(N, dtype=float)
1336+
# setup the rings of death!
1337+
ring = <numeric *>malloc(win * sizeof(numeric))
1338+
death = <int64_t *>malloc(win * sizeof(int64_t))
1339+
1340+
end = ring + win
1341+
last = ring
1342+
minvalue = ring
1343+
ai = input[0]
1344+
minvalue[0] = init_mm(input[0], &nobs, is_max)
1345+
death[0] = win
1346+
nobs = 0
12991347

1300-
with nogil:
1348+
with nogil:
13011349

1302-
for i in range(N):
1303-
ai = init_mm(input[i], &nobs, is_max)
1350+
for i in range(N):
1351+
ai = init_mm(input[i], &nobs, is_max)
13041352

1305-
if i >= win:
1306-
remove_mm(input[i - win], &nobs)
1353+
if i >= win:
1354+
remove_mm(input[i - win], &nobs)
13071355

1308-
if death[minvalue - ring] == i:
1309-
minvalue = minvalue + 1
1310-
if minvalue >= end:
1311-
minvalue = ring
1356+
if death[minvalue - ring] == i:
1357+
minvalue = minvalue + 1
1358+
if minvalue >= end:
1359+
minvalue = ring
13121360

1313-
if is_max:
1314-
should_replace = ai >= minvalue[0]
1315-
else:
1316-
should_replace = ai <= minvalue[0]
1317-
if should_replace:
1361+
if is_max:
1362+
should_replace = ai >= minvalue[0]
1363+
else:
1364+
should_replace = ai <= minvalue[0]
1365+
if should_replace:
13181366

1319-
minvalue[0] = ai
1320-
death[minvalue - ring] = i + win
1321-
last = minvalue
1367+
minvalue[0] = ai
1368+
death[minvalue - ring] = i + win
1369+
last = minvalue
13221370

1323-
else:
1371+
else:
13241372

1373+
if is_max:
1374+
should_replace = last[0] <= ai
1375+
else:
1376+
should_replace = last[0] >= ai
1377+
while should_replace:
1378+
if last == ring:
1379+
last = end
1380+
last -= 1
13251381
if is_max:
13261382
should_replace = last[0] <= ai
13271383
else:
13281384
should_replace = last[0] >= ai
1329-
while should_replace:
1330-
if last == ring:
1331-
last = end
1332-
last -= 1
1333-
if is_max:
1334-
should_replace = last[0] <= ai
1335-
else:
1336-
should_replace = last[0] >= ai
13371385

1338-
last += 1
1339-
if last == end:
1340-
last = ring
1341-
last[0] = ai
1342-
death[last - ring] = i + win
1386+
last += 1
1387+
if last == end:
1388+
last = ring
1389+
last[0] = ai
1390+
death[last - ring] = i + win
13431391

1344-
output[i] = calc_mm(minp, nobs, minvalue[0])
1392+
output[i] = calc_mm(minp, nobs, minvalue[0])
13451393

1346-
for i in range(minp - 1):
1347-
if numeric in cython.floating:
1348-
output[i] = NaN
1349-
else:
1350-
output[i] = 0
1394+
for i in range(minp - 1):
1395+
if numeric in cython.floating:
1396+
output[i] = NaN
1397+
else:
1398+
output[i] = 0
13511399

1352-
free(ring)
1353-
free(death)
1400+
free(ring)
1401+
free(death)
13541402

1355-
# print("output: {0}".format(output))
13561403
return output
13571404

13581405

pandas/tests/test_window.py

+54
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,60 @@ def test_closed(self):
464464
with pytest.raises(ValueError):
465465
df.rolling(window=3, closed='neither')
466466

467+
@pytest.mark.parametrize("input_dtype", ['int', 'float'])
468+
@pytest.mark.parametrize("func,closed,expected", [
469+
('min', 'right', [0.0, 0, 0, 1, 2, 3, 4, 5, 6, 7]),
470+
('min', 'both', [0.0, 0, 0, 0, 1, 2, 3, 4, 5, 6]),
471+
('min', 'neither', [np.nan, 0, 0, 1, 2, 3, 4, 5, 6, 7]),
472+
('min', 'left', [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, 6]),
473+
('max', 'right', [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
474+
('max', 'both', [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
475+
('max', 'neither', [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8]),
476+
('max', 'left', [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8])
477+
])
478+
def test_closed_min_max_datetime(self, input_dtype,
479+
func, closed,
480+
expected):
481+
# see gh-21704
482+
ser = pd.Series(data=np.arange(10).astype(input_dtype),
483+
index=pd.date_range('2000', periods=10))
484+
485+
result = getattr(ser.rolling('3D', closed=closed), func)()
486+
expected = pd.Series(expected, index=ser.index)
487+
tm.assert_series_equal(result, expected)
488+
489+
def test_closed_uneven(self):
490+
# see gh-21704
491+
ser = pd.Series(data=np.arange(10),
492+
index=pd.date_range('2000', periods=10))
493+
494+
# uneven
495+
ser = ser.drop(index=ser.index[[1, 5]])
496+
result = ser.rolling('3D', closed='left').min()
497+
expected = pd.Series([np.nan, 0, 0, 2, 3, 4, 6, 6],
498+
index=ser.index)
499+
tm.assert_series_equal(result, expected)
500+
501+
@pytest.mark.parametrize("func,closed,expected", [
502+
('min', 'right', [np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan, np.nan]),
503+
('min', 'both', [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, np.nan]),
504+
('min', 'neither', [np.nan, np.nan, 0, 1, 2, 3, 4, 5, np.nan, np.nan]),
505+
('min', 'left', [np.nan, np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan]),
506+
('max', 'right', [np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan, np.nan]),
507+
('max', 'both', [np.nan, 1, 2, 3, 4, 5, 6, 6, 6, np.nan]),
508+
('max', 'neither', [np.nan, np.nan, 1, 2, 3, 4, 5, 6, np.nan, np.nan]),
509+
('max', 'left', [np.nan, np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan])
510+
])
511+
def test_closed_min_max_minp(self, func, closed, expected):
512+
# see gh-21704
513+
ser = pd.Series(data=np.arange(10),
514+
index=pd.date_range('2000', periods=10))
515+
ser[ser.index[-3:]] = np.nan
516+
result = getattr(ser.rolling('3D', min_periods=2, closed=closed),
517+
func)()
518+
expected = pd.Series(expected, index=ser.index)
519+
tm.assert_series_equal(result, expected)
520+
467521
@pytest.mark.parametrize('roller', ['1s', 1])
468522
def tests_empty_df_rolling(self, roller):
469523
# GH 15819 Verifies that datetime and integer rolling windows can be

0 commit comments

Comments
 (0)