Skip to content

BUG #10228: segfault due to out-of-bounds in binning #10337

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.17.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,5 @@ Performance Improvements
Bug Fixes
~~~~~~~~~
- Bug in ``DataFrame.apply`` when function returns categorical series. (:issue:`9573`)
- Bug that caused segfault when resampling an empty Series (:issue:`10228`)

4 changes: 4 additions & 0 deletions pandas/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2157,6 +2157,8 @@ def group_nth_bin_object(ndarray[object, ndim=2] out,
nobs = np.zeros((<object> out).shape, dtype=np.float64)
resx = np.empty((<object> out).shape, dtype=object)

if len(bins) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -2247,6 +2249,8 @@ def group_last_bin_object(ndarray[object, ndim=2] out,
nobs = np.zeros((<object> out).shape, dtype=np.float64)
resx = np.empty((<object> out).shape, dtype=object)

if len(bins) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down
20 changes: 20 additions & 0 deletions pandas/src/generate_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,8 @@ def group_last_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
nobs = np.zeros_like(out)
resx = np.empty_like(out)

if len(bins) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -797,6 +799,8 @@ def group_nth_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
nobs = np.zeros_like(out)
resx = np.empty_like(out)

if len(bin) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -948,6 +952,8 @@ def group_add_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
nobs = np.zeros_like(out)
sumx = np.zeros_like(out)

if len(bins) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -1064,6 +1070,8 @@ def group_prod_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
nobs = np.zeros_like(out)
prodx = np.ones_like(out)

if len(bins) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -1184,6 +1192,8 @@ def group_var_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
sumx = np.zeros_like(out)
sumxx = np.zeros_like(out)

if len(bins) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -1285,6 +1295,8 @@ def group_count_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
ndarray[int64_t, ndim=2] nobs = np.zeros((out.shape[0], out.shape[1]),
dtype=np.int64)

if len(bins) == 0:
return
ngroups = len(bins) + (bins[len(bins) - 1] != N)

for i in range(N):
Expand Down Expand Up @@ -1329,6 +1341,8 @@ def group_min_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
minx = np.empty_like(out)
minx.fill(%(inf_val)s)

if len(bins) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -1453,6 +1467,8 @@ def group_max_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
maxx = np.empty_like(out)
maxx.fill(-%(inf_val)s)

if len(bins) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -1629,6 +1645,8 @@ def group_mean_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
sumx = np.zeros_like(out)

N, K = (<object> values).shape
if len(bins) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -1685,6 +1703,8 @@ def group_ohlc_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
%(dest_type2)s vopen, vhigh, vlow, vclose, NA
bint got_first = 0

if len(bins) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down
54 changes: 54 additions & 0 deletions pandas/src/generated.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -6725,6 +6725,8 @@ def group_add_bin_float64(ndarray[float64_t, ndim=2] out,
nobs = np.zeros_like(out)
sumx = np.zeros_like(out)

if len(bins) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -6781,6 +6783,8 @@ def group_add_bin_float32(ndarray[float32_t, ndim=2] out,
nobs = np.zeros_like(out)
sumx = np.zeros_like(out)

if len(bins) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -6951,6 +6955,8 @@ def group_prod_bin_float64(ndarray[float64_t, ndim=2] out,
nobs = np.zeros_like(out)
prodx = np.ones_like(out)

if len(bins) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -7007,6 +7013,8 @@ def group_prod_bin_float32(ndarray[float32_t, ndim=2] out,
nobs = np.zeros_like(out)
prodx = np.ones_like(out)

if len(bins) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -7186,6 +7194,8 @@ def group_var_bin_float64(ndarray[float64_t, ndim=2] out,
sumx = np.zeros_like(out)
sumxx = np.zeros_like(out)

if len(bins) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -7247,6 +7257,8 @@ def group_var_bin_float32(ndarray[float32_t, ndim=2] out,
sumx = np.zeros_like(out)
sumxx = np.zeros_like(out)

if len(bins) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -7412,6 +7424,8 @@ def group_mean_bin_float64(ndarray[float64_t, ndim=2] out,
sumx = np.zeros_like(out)

N, K = (<object> values).shape
if len(bins) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -7465,6 +7479,8 @@ def group_mean_bin_float32(ndarray[float32_t, ndim=2] out,
sumx = np.zeros_like(out)

N, K = (<object> values).shape
if len(bins) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -7520,6 +7536,8 @@ def group_ohlc_float64(ndarray[float64_t, ndim=2] out,
float64_t vopen, vhigh, vlow, vclose, NA
bint got_first = 0

if len(bins) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -7594,6 +7612,8 @@ def group_ohlc_float32(ndarray[float32_t, ndim=2] out,
float32_t vopen, vhigh, vlow, vclose, NA
bint got_first = 0

if len(bins) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -7801,6 +7821,8 @@ def group_last_bin_float64(ndarray[float64_t, ndim=2] out,
nobs = np.zeros_like(out)
resx = np.empty_like(out)

if len(bins) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -7845,6 +7867,8 @@ def group_last_bin_float32(ndarray[float32_t, ndim=2] out,
nobs = np.zeros_like(out)
resx = np.empty_like(out)

if len(bins) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -7889,6 +7913,8 @@ def group_last_bin_int64(ndarray[int64_t, ndim=2] out,
nobs = np.zeros_like(out)
resx = np.empty_like(out)

if len(bins) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -8067,6 +8093,8 @@ def group_nth_bin_float64(ndarray[float64_t, ndim=2] out,
nobs = np.zeros_like(out)
resx = np.empty_like(out)

if len(bin) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -8112,6 +8140,8 @@ def group_nth_bin_float32(ndarray[float32_t, ndim=2] out,
nobs = np.zeros_like(out)
resx = np.empty_like(out)

if len(bin) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -8157,6 +8187,8 @@ def group_nth_bin_int64(ndarray[int64_t, ndim=2] out,
nobs = np.zeros_like(out)
resx = np.empty_like(out)

if len(bin) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -8386,6 +8418,8 @@ def group_min_bin_float64(ndarray[float64_t, ndim=2] out,
minx = np.empty_like(out)
minx.fill(np.inf)

if len(bins) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -8447,6 +8481,8 @@ def group_min_bin_float32(ndarray[float32_t, ndim=2] out,
minx = np.empty_like(out)
minx.fill(np.inf)

if len(bins) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -8508,6 +8544,8 @@ def group_min_bin_int64(ndarray[int64_t, ndim=2] out,
minx = np.empty_like(out)
minx.fill(9223372036854775807)

if len(bins) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -8750,6 +8788,8 @@ def group_max_bin_float64(ndarray[float64_t, ndim=2] out,
maxx = np.empty_like(out)
maxx.fill(-np.inf)

if len(bins) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -8810,6 +8850,8 @@ def group_max_bin_float32(ndarray[float32_t, ndim=2] out,
maxx = np.empty_like(out)
maxx.fill(-np.inf)

if len(bins) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -8870,6 +8912,8 @@ def group_max_bin_int64(ndarray[int64_t, ndim=2] out,
maxx = np.empty_like(out)
maxx.fill(-9223372036854775807)

if len(bins) == 0:
return
if bins[len(bins) - 1] == len(values):
ngroups = len(bins)
else:
Expand Down Expand Up @@ -9110,6 +9154,8 @@ def group_count_bin_float64(ndarray[float64_t, ndim=2] out,
ndarray[int64_t, ndim=2] nobs = np.zeros((out.shape[0], out.shape[1]),
dtype=np.int64)

if len(bins) == 0:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is by definition generated code, edits here will be lost. rather make the fix in src/generate_code.py, thenpython generate_code.py creates the generated.pyx

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh crap, my bad! I did see the name, but then I looked at git history and saw what appeared to be people editing it (but I get it now).

Do you think it's worth adding a warning message at the top for new devs like NumPy does? Or should it be obvious from the name?

return
ngroups = len(bins) + (bins[len(bins) - 1] != N)

for i in range(N):
Expand Down Expand Up @@ -9144,6 +9190,8 @@ def group_count_bin_float32(ndarray[float32_t, ndim=2] out,
ndarray[int64_t, ndim=2] nobs = np.zeros((out.shape[0], out.shape[1]),
dtype=np.int64)

if len(bins) == 0:
return
ngroups = len(bins) + (bins[len(bins) - 1] != N)

for i in range(N):
Expand Down Expand Up @@ -9178,6 +9226,8 @@ def group_count_bin_int64(ndarray[int64_t, ndim=2] out,
ndarray[int64_t, ndim=2] nobs = np.zeros((out.shape[0], out.shape[1]),
dtype=np.int64)

if len(bins) == 0:
return
ngroups = len(bins) + (bins[len(bins) - 1] != N)

for i in range(N):
Expand Down Expand Up @@ -9212,6 +9262,8 @@ def group_count_bin_object(ndarray[object, ndim=2] out,
ndarray[int64_t, ndim=2] nobs = np.zeros((out.shape[0], out.shape[1]),
dtype=np.int64)

if len(bins) == 0:
return
ngroups = len(bins) + (bins[len(bins) - 1] != N)

for i in range(N):
Expand Down Expand Up @@ -9246,6 +9298,8 @@ def group_count_bin_int64(ndarray[int64_t, ndim=2] out,
ndarray[int64_t, ndim=2] nobs = np.zeros((out.shape[0], out.shape[1]),
dtype=np.int64)

if len(bins) == 0:
return
ngroups = len(bins) + (bins[len(bins) - 1] != N)

for i in range(N):
Expand Down
15 changes: 15 additions & 0 deletions pandas/tseries/tests/test_resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from pandas import (Series, TimeSeries, DataFrame, Panel, Index,
isnull, notnull, Timestamp)

from pandas.core.groupby import DataError
from pandas.tseries.index import date_range
from pandas.tseries.offsets import Minute, BDay
from pandas.tseries.period import period_range, PeriodIndex, Period
Expand Down Expand Up @@ -660,6 +661,20 @@ def test_resample_empty(self):
rs = xp.resample('A')
assert_frame_equal(xp, rs)

# Empty series were sometimes causing a segfault (for the functions
# with Cython bounds-checking disabled) or an IndexError. We just run
# them to ensure they no longer do. (GH #10228)
for index in tm.all_timeseries_index_generator(0):
for dtype in (np.float, np.int, np.object, 'datetime64[ns]'):
for how in ('count', 'mean', 'min', 'ohlc', 'last', 'prod'):
empty_series = pd.Series([], index, dtype)
try:
empty_series.resample('d', how)
except DataError:
# Ignore these since some combinations are invalid
# (ex: doing mean with dtype of np.object)
pass

def test_weekly_resample_buglet(self):
# #1327
rng = date_range('1/1/2000', freq='B', periods=20)
Expand Down
Loading