diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 4a513f3122390..cc1e4b09291cc 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -59,3 +59,5 @@ Performance Improvements Bug Fixes ~~~~~~~~~ - Bug in ``DataFrame.apply`` when function returns categorical series. (:issue:`9573`) +- Bug that caused segfault when resampling an empty Series (:issue:`10228`) + diff --git a/pandas/algos.pyx b/pandas/algos.pyx index 5f68c1ee26e87..9b6bdf57d4509 100644 --- a/pandas/algos.pyx +++ b/pandas/algos.pyx @@ -2157,6 +2157,8 @@ def group_nth_bin_object(ndarray[object, ndim=2] out, nobs = np.zeros(( out).shape, dtype=np.float64) resx = np.empty(( out).shape, dtype=object) + if len(bins) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -2247,6 +2249,8 @@ def group_last_bin_object(ndarray[object, ndim=2] out, nobs = np.zeros(( out).shape, dtype=np.float64) resx = np.empty(( out).shape, dtype=object) + if len(bins) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: diff --git a/pandas/src/generate_code.py b/pandas/src/generate_code.py index 598cdff30e4f7..5d4b18b36050f 100644 --- a/pandas/src/generate_code.py +++ b/pandas/src/generate_code.py @@ -751,6 +751,8 @@ def group_last_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, nobs = np.zeros_like(out) resx = np.empty_like(out) + if len(bins) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -797,6 +799,8 @@ def group_nth_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, nobs = np.zeros_like(out) resx = np.empty_like(out) + if len(bin) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -948,6 +952,8 @@ def group_add_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, nobs = np.zeros_like(out) sumx = np.zeros_like(out) + if len(bins) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -1064,6 +1070,8 @@ def group_prod_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, nobs = np.zeros_like(out) prodx = np.ones_like(out) + if len(bins) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -1184,6 +1192,8 @@ def group_var_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, sumx = np.zeros_like(out) sumxx = np.zeros_like(out) + if len(bins) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -1285,6 +1295,8 @@ def group_count_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, ndarray[int64_t, ndim=2] nobs = np.zeros((out.shape[0], out.shape[1]), dtype=np.int64) + if len(bins) == 0: + return ngroups = len(bins) + (bins[len(bins) - 1] != N) for i in range(N): @@ -1329,6 +1341,8 @@ def group_min_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, minx = np.empty_like(out) minx.fill(%(inf_val)s) + if len(bins) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -1453,6 +1467,8 @@ def group_max_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, maxx = np.empty_like(out) maxx.fill(-%(inf_val)s) + if len(bins) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -1629,6 +1645,8 @@ def group_mean_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, sumx = np.zeros_like(out) N, K = ( values).shape + if len(bins) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -1685,6 +1703,8 @@ def group_ohlc_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, %(dest_type2)s vopen, vhigh, vlow, vclose, NA bint got_first = 0 + if len(bins) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: diff --git a/pandas/src/generated.pyx b/pandas/src/generated.pyx index 428decd4dca10..83dfacba45211 100644 --- a/pandas/src/generated.pyx +++ b/pandas/src/generated.pyx @@ -6725,6 +6725,8 @@ def group_add_bin_float64(ndarray[float64_t, ndim=2] out, nobs = np.zeros_like(out) sumx = np.zeros_like(out) + if len(bins) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -6781,6 +6783,8 @@ def group_add_bin_float32(ndarray[float32_t, ndim=2] out, nobs = np.zeros_like(out) sumx = np.zeros_like(out) + if len(bins) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -6951,6 +6955,8 @@ def group_prod_bin_float64(ndarray[float64_t, ndim=2] out, nobs = np.zeros_like(out) prodx = np.ones_like(out) + if len(bins) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -7007,6 +7013,8 @@ def group_prod_bin_float32(ndarray[float32_t, ndim=2] out, nobs = np.zeros_like(out) prodx = np.ones_like(out) + if len(bins) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -7186,6 +7194,8 @@ def group_var_bin_float64(ndarray[float64_t, ndim=2] out, sumx = np.zeros_like(out) sumxx = np.zeros_like(out) + if len(bins) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -7247,6 +7257,8 @@ def group_var_bin_float32(ndarray[float32_t, ndim=2] out, sumx = np.zeros_like(out) sumxx = np.zeros_like(out) + if len(bins) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -7412,6 +7424,8 @@ def group_mean_bin_float64(ndarray[float64_t, ndim=2] out, sumx = np.zeros_like(out) N, K = ( values).shape + if len(bins) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -7465,6 +7479,8 @@ def group_mean_bin_float32(ndarray[float32_t, ndim=2] out, sumx = np.zeros_like(out) N, K = ( values).shape + if len(bins) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -7520,6 +7536,8 @@ def group_ohlc_float64(ndarray[float64_t, ndim=2] out, float64_t vopen, vhigh, vlow, vclose, NA bint got_first = 0 + if len(bins) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -7594,6 +7612,8 @@ def group_ohlc_float32(ndarray[float32_t, ndim=2] out, float32_t vopen, vhigh, vlow, vclose, NA bint got_first = 0 + if len(bins) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -7801,6 +7821,8 @@ def group_last_bin_float64(ndarray[float64_t, ndim=2] out, nobs = np.zeros_like(out) resx = np.empty_like(out) + if len(bins) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -7845,6 +7867,8 @@ def group_last_bin_float32(ndarray[float32_t, ndim=2] out, nobs = np.zeros_like(out) resx = np.empty_like(out) + if len(bins) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -7889,6 +7913,8 @@ def group_last_bin_int64(ndarray[int64_t, ndim=2] out, nobs = np.zeros_like(out) resx = np.empty_like(out) + if len(bins) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -8067,6 +8093,8 @@ def group_nth_bin_float64(ndarray[float64_t, ndim=2] out, nobs = np.zeros_like(out) resx = np.empty_like(out) + if len(bin) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -8112,6 +8140,8 @@ def group_nth_bin_float32(ndarray[float32_t, ndim=2] out, nobs = np.zeros_like(out) resx = np.empty_like(out) + if len(bin) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -8157,6 +8187,8 @@ def group_nth_bin_int64(ndarray[int64_t, ndim=2] out, nobs = np.zeros_like(out) resx = np.empty_like(out) + if len(bin) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -8386,6 +8418,8 @@ def group_min_bin_float64(ndarray[float64_t, ndim=2] out, minx = np.empty_like(out) minx.fill(np.inf) + if len(bins) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -8447,6 +8481,8 @@ def group_min_bin_float32(ndarray[float32_t, ndim=2] out, minx = np.empty_like(out) minx.fill(np.inf) + if len(bins) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -8508,6 +8544,8 @@ def group_min_bin_int64(ndarray[int64_t, ndim=2] out, minx = np.empty_like(out) minx.fill(9223372036854775807) + if len(bins) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -8750,6 +8788,8 @@ def group_max_bin_float64(ndarray[float64_t, ndim=2] out, maxx = np.empty_like(out) maxx.fill(-np.inf) + if len(bins) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -8810,6 +8850,8 @@ def group_max_bin_float32(ndarray[float32_t, ndim=2] out, maxx = np.empty_like(out) maxx.fill(-np.inf) + if len(bins) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -8870,6 +8912,8 @@ def group_max_bin_int64(ndarray[int64_t, ndim=2] out, maxx = np.empty_like(out) maxx.fill(-9223372036854775807) + if len(bins) == 0: + return if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: @@ -9110,6 +9154,8 @@ def group_count_bin_float64(ndarray[float64_t, ndim=2] out, ndarray[int64_t, ndim=2] nobs = np.zeros((out.shape[0], out.shape[1]), dtype=np.int64) + if len(bins) == 0: + return ngroups = len(bins) + (bins[len(bins) - 1] != N) for i in range(N): @@ -9144,6 +9190,8 @@ def group_count_bin_float32(ndarray[float32_t, ndim=2] out, ndarray[int64_t, ndim=2] nobs = np.zeros((out.shape[0], out.shape[1]), dtype=np.int64) + if len(bins) == 0: + return ngroups = len(bins) + (bins[len(bins) - 1] != N) for i in range(N): @@ -9178,6 +9226,8 @@ def group_count_bin_int64(ndarray[int64_t, ndim=2] out, ndarray[int64_t, ndim=2] nobs = np.zeros((out.shape[0], out.shape[1]), dtype=np.int64) + if len(bins) == 0: + return ngroups = len(bins) + (bins[len(bins) - 1] != N) for i in range(N): @@ -9212,6 +9262,8 @@ def group_count_bin_object(ndarray[object, ndim=2] out, ndarray[int64_t, ndim=2] nobs = np.zeros((out.shape[0], out.shape[1]), dtype=np.int64) + if len(bins) == 0: + return ngroups = len(bins) + (bins[len(bins) - 1] != N) for i in range(N): @@ -9246,6 +9298,8 @@ def group_count_bin_int64(ndarray[int64_t, ndim=2] out, ndarray[int64_t, ndim=2] nobs = np.zeros((out.shape[0], out.shape[1]), dtype=np.int64) + if len(bins) == 0: + return ngroups = len(bins) + (bins[len(bins) - 1] != N) for i in range(N): diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 202ccb9438db5..3927caef58d2b 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -9,6 +9,7 @@ from pandas import (Series, TimeSeries, DataFrame, Panel, Index, isnull, notnull, Timestamp) +from pandas.core.groupby import DataError from pandas.tseries.index import date_range from pandas.tseries.offsets import Minute, BDay from pandas.tseries.period import period_range, PeriodIndex, Period @@ -660,6 +661,20 @@ def test_resample_empty(self): rs = xp.resample('A') assert_frame_equal(xp, rs) + # Empty series were sometimes causing a segfault (for the functions + # with Cython bounds-checking disabled) or an IndexError. We just run + # them to ensure they no longer do. (GH #10228) + for index in tm.all_timeseries_index_generator(0): + for dtype in (np.float, np.int, np.object, 'datetime64[ns]'): + for how in ('count', 'mean', 'min', 'ohlc', 'last', 'prod'): + empty_series = pd.Series([], index, dtype) + try: + empty_series.resample('d', how) + except DataError: + # Ignore these since some combinations are invalid + # (ex: doing mean with dtype of np.object) + pass + def test_weekly_resample_buglet(self): # #1327 rng = date_range('1/1/2000', freq='B', periods=20) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 25f5f84b0b1d9..83d6b97788e91 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -856,6 +856,33 @@ def makePeriodIndex(k=10): dr = PeriodIndex(start=dt, periods=k, freq='B') return dr +def all_index_generator(k=10): + """Generator which can be iterated over to get instances of all the various + index classes. + + Parameters + ---------- + k: length of each of the index instances + """ + all_make_index_funcs = [makeIntIndex, makeFloatIndex, makeStringIndex, + makeUnicodeIndex, makeDateIndex, makePeriodIndex, + makeTimedeltaIndex, makeBoolIndex, + makeCategoricalIndex] + for make_index_func in all_make_index_funcs: + yield make_index_func(k=k) + +def all_timeseries_index_generator(k=10): + """Generator which can be iterated over to get instances of all the classes + which represent time-seires. + + Parameters + ---------- + k: length of each of the index instances + """ + make_index_funcs = [makeDateIndex, makePeriodIndex, makeTimedeltaIndex] + for make_index_func in make_index_funcs: + yield make_index_func(k=k) + # make series def makeFloatSeries():