diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index d608304511a08..69d8632677b67 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -104,9 +104,9 @@ API changes :func:`rolling_std`, :func:`rolling_var`, :func:`rolling_skew`, :func:`rolling_kurt`, and :func:`rolling_quantile`, :func:`rolling_cov`, :func:`rolling_corr`, :func:`rolling_corr_pairwise`, :func:`rolling_window`, and :func:`rolling_apply` with ``center=True`` previously would return a result of the same - structure as the input ``arg`` with ``NaN``s in the final ``(window-1)/2`` entries. + structure as the input ``arg`` with ``NaN`` in the final ``(window-1)/2`` entries. Now the final ``(window-1)/2`` entries of the result are calculated as if the input ``arg`` were followed - by ``(window-1)/2`` ``NaN``s. (:issue:`7925`) + by ``(window-1)/2`` ``NaN`` values. (:issue:`7925`) Prior behavior (note final value is ``NaN``): @@ -556,8 +556,8 @@ Bug Fixes returning results with columns sorted by name and producing an error for non-unique columns; now handles non-unique columns and returns columns in original order (except for the case of two DataFrames with ``pairwise=False``, where behavior is unchanged) (:issue:`7542`) - - +- Bug in :func:`rolling_count` and ``expanding_*`` functions unnecessarily producing error message for zero-length data (:issue:`8056`) +- Bug in :func:`rolling_apply` and :func:`expanding_apply`` interpreting ``min_periods=0`` as ``min_periods=1 (:issue:`8080`) - Bug in ``DataFrame.plot`` and ``Series.plot`` may ignore ``rot`` and ``fontsize`` keywords (:issue:`7844`) diff --git a/pandas/algos.pyx b/pandas/algos.pyx index 1c1d32e1d2a20..c0f0590c22a25 100644 --- a/pandas/algos.pyx +++ b/pandas/algos.pyx @@ -712,17 +712,15 @@ def rank_2d_generic(object in_arr, axis=0, ties_method='average', # # - -def _check_minp(win, minp, N): +def _check_minp(win, minp, N, floor=1): if minp > win: raise ValueError('min_periods (%d) must be <= window (%d)' % (minp, win)) elif minp > N: minp = N + 1 - elif minp == 0: - minp = 1 elif minp < 0: raise ValueError('min_periods must be >= 0') - return minp + return max(minp, floor) # original C implementation by N. Devillard. # This code in public domain. @@ -1766,7 +1764,7 @@ def roll_generic(ndarray[float64_t, cast=True] input, int win, if n == 0: return input - minp = _check_minp(win, minp, n) + minp = _check_minp(win, minp, n, floor=0) output = np.empty(n, dtype=float) counts = roll_sum(np.isfinite(input).astype(float), win, minp) diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index 74545a08d45b6..a2c7cc30e4798 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -206,7 +206,7 @@ def rolling_count(arg, window, freq=None, center=False, how=None): return_hook, values = _process_data_structure(arg, kill_inf=False) converted = np.isfinite(values).astype(float) - result = rolling_sum(converted, window, min_periods=1, + result = rolling_sum(converted, window, min_periods=0, center=center) # already converted # putmask here? @@ -280,7 +280,8 @@ def _flex_binary_moment(arg1, arg2, f, pairwise=False): elif isinstance(arg1, DataFrame): def dataframe_from_int_dict(data, frame_template): result = DataFrame(data, index=frame_template.index) - result.columns = frame_template.columns[result.columns] + if len(result.columns) > 0: + result.columns = frame_template.columns[result.columns] return result results = {} @@ -314,8 +315,10 @@ def dataframe_from_int_dict(data, frame_template): else: results[i][j] = f(*_prep_binary(arg1.iloc[:, i], arg2.iloc[:, j])) p = Panel.from_dict(results).swapaxes('items', 'major') - p.major_axis = arg1.columns[p.major_axis] - p.minor_axis = arg2.columns[p.minor_axis] + if len(p.major_axis) > 0: + p.major_axis = arg1.columns[p.major_axis] + if len(p.minor_axis) > 0: + p.minor_axis = arg2.columns[p.minor_axis] return p else: raise ValueError("'pairwise' is not True/False") @@ -372,17 +375,22 @@ def _rolling_moment(arg, window, func, minp, axis=0, freq=None, center=False, y : type of input """ arg = _conv_timerule(arg, freq, how) - offset = int((window - 1) / 2.) if center else 0 - additional_nans = np.array([np.NaN] * offset) - calc = lambda x: func(np.concatenate((x, additional_nans)) if center else x, - window, minp=minp, args=args, kwargs=kwargs, - **kwds) + return_hook, values = _process_data_structure(arg) - # actually calculate the moment. Faster way to do this? - if values.ndim > 1: - result = np.apply_along_axis(calc, axis, values) + + if values.size == 0: + result = values.copy() else: - result = calc(values) + # actually calculate the moment. Faster way to do this? + offset = int((window - 1) / 2.) if center else 0 + additional_nans = np.array([np.NaN] * offset) + calc = lambda x: func(np.concatenate((x, additional_nans)) if center else x, + window, minp=minp, args=args, kwargs=kwargs, + **kwds) + if values.ndim > 1: + result = np.apply_along_axis(calc, axis, values) + else: + result = calc(values) if center: result = _center_window(result, window, axis) @@ -817,11 +825,14 @@ def rolling_window(arg, window=None, win_type=None, min_periods=None, arg = _conv_timerule(arg, freq, how) return_hook, values = _process_data_structure(arg) - offset = int((len(window) - 1) / 2.) if center else 0 - additional_nans = np.array([np.NaN] * offset) - f = lambda x: algos.roll_window(np.concatenate((x, additional_nans)) if center else x, - window, minp, avg=mean) - result = np.apply_along_axis(f, axis, values) + if values.size == 0: + result = values.copy() + else: + offset = int((len(window) - 1) / 2.) if center else 0 + additional_nans = np.array([np.NaN] * offset) + f = lambda x: algos.roll_window(np.concatenate((x, additional_nans)) if center else x, + window, minp, avg=mean) + result = np.apply_along_axis(f, axis, values) if center: result = _center_window(result, len(window), axis) @@ -856,7 +867,7 @@ def _expanding_func(func, desc, check_minp=_use_window): @Appender(_doc_template) @wraps(func) def f(arg, min_periods=1, freq=None, **kwargs): - window = len(arg) + window = max(len(arg), min_periods) if min_periods else len(arg) def call_cython(arg, window, minp, args=(), kwargs={}, **kwds): minp = check_minp(minp, window) diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index 359868262a681..2c2a19660f266 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -244,6 +244,12 @@ def roll_mean(x, window, min_periods=None, freq=None, center=False): center=center) self._check_moment_func(roll_mean, np.mean) + # GH 8080 + s = Series([None, None, None]) + result = mom.rolling_apply(s, 2, lambda x: len(x), min_periods=0) + expected = Series([1., 2., 2.]) + assert_series_equal(result, expected) + def test_rolling_apply_out_of_bounds(self): # #1850 arr = np.arange(4) @@ -814,6 +820,12 @@ def expanding_mean(x, min_periods=1, freq=None): freq=freq) self._check_expanding(expanding_mean, np.mean) + # GH 8080 + s = Series([None, None, None]) + result = mom.expanding_apply(s, lambda x: len(x), min_periods=0) + expected = Series([1., 2., 3.]) + assert_series_equal(result, expected) + def test_expanding_apply_args_kwargs(self): def mean_w_arg(x, const): return np.mean(x) + const @@ -989,6 +1001,77 @@ def test_rolling_functions_window_non_shrinkage(self): df_result_panel = f(df) assert_panel_equal(df_result_panel, df_expected_panel) + def test_moment_functions_zero_length(self): + # GH 8056 + s = Series() + s_expected = s + df1 = DataFrame() + df1_expected = df1 + df1_expected_panel = Panel(items=df1.index, major_axis=df1.columns, minor_axis=df1.columns) + df2 = DataFrame(columns=['a']) + df2_expected = df2 + df2_expected_panel = Panel(items=df2.index, major_axis=df2.columns, minor_axis=df2.columns) + + functions = [lambda x: mom.expanding_count(x), + lambda x: mom.expanding_cov(x, x, pairwise=False, min_periods=5), + lambda x: mom.expanding_corr(x, x, pairwise=False, min_periods=5), + lambda x: mom.expanding_max(x, min_periods=5), + lambda x: mom.expanding_min(x, min_periods=5), + lambda x: mom.expanding_sum(x, min_periods=5), + lambda x: mom.expanding_mean(x, min_periods=5), + lambda x: mom.expanding_std(x, min_periods=5), + lambda x: mom.expanding_var(x, min_periods=5), + lambda x: mom.expanding_skew(x, min_periods=5), + lambda x: mom.expanding_kurt(x, min_periods=5), + lambda x: mom.expanding_quantile(x, quantile=0.5, min_periods=5), + lambda x: mom.expanding_median(x, min_periods=5), + lambda x: mom.expanding_apply(x, func=sum, min_periods=5), + lambda x: mom.rolling_count(x, window=10), + lambda x: mom.rolling_cov(x, x, pairwise=False, window=10, min_periods=5), + lambda x: mom.rolling_corr(x, x, pairwise=False, window=10, min_periods=5), + lambda x: mom.rolling_max(x, window=10, min_periods=5), + lambda x: mom.rolling_min(x, window=10, min_periods=5), + lambda x: mom.rolling_sum(x, window=10, min_periods=5), + lambda x: mom.rolling_mean(x, window=10, min_periods=5), + lambda x: mom.rolling_std(x, window=10, min_periods=5), + lambda x: mom.rolling_var(x, window=10, min_periods=5), + lambda x: mom.rolling_skew(x, window=10, min_periods=5), + lambda x: mom.rolling_kurt(x, window=10, min_periods=5), + lambda x: mom.rolling_quantile(x, quantile=0.5, window=10, min_periods=5), + lambda x: mom.rolling_median(x, window=10, min_periods=5), + lambda x: mom.rolling_apply(x, func=sum, window=10, min_periods=5), + lambda x: mom.rolling_window(x, win_type='boxcar', window=10, min_periods=5), + ] + for f in functions: + try: + s_result = f(s) + assert_series_equal(s_result, s_expected) + + df1_result = f(df1) + assert_frame_equal(df1_result, df1_expected) + + df2_result = f(df2) + assert_frame_equal(df2_result, df2_expected) + except (ImportError): + + # scipy needed for rolling_window + continue + + functions = [lambda x: mom.expanding_cov(x, x, pairwise=True, min_periods=5), + lambda x: mom.expanding_corr(x, x, pairwise=True, min_periods=5), + lambda x: mom.rolling_cov(x, x, pairwise=True, window=10, min_periods=5), + lambda x: mom.rolling_corr(x, x, pairwise=True, window=10, min_periods=5), + # rolling_corr_pairwise is depracated, so the following line should be deleted + # when rolling_corr_pairwise is removed. + lambda x: mom.rolling_corr_pairwise(x, x, window=10, min_periods=5), + ] + for f in functions: + df1_result_panel = f(df1) + assert_panel_equal(df1_result_panel, df1_expected_panel) + + df2_result_panel = f(df2) + assert_panel_equal(df2_result_panel, df2_expected_panel) + def test_expanding_cov_pairwise_diff_length(self): # GH 7512 df1 = DataFrame([[1,5], [3, 2], [3,9]], columns=['A','B'])