diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index a8213a50a9ead..7c810cd3d526b 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -115,6 +115,13 @@ API changes :func:`expanding_cov`, :func:`expanding_corr`, :func:`expanding_corr_pairwise`, and :func:`expanding_apply`, as the results produced when ``center=True`` did not make much sense. (:issue:`7925`) +- Added optional ``ddof`` argument to :func:`expanding_cov` and :func:`rolling_cov`. + The default value of ``1`` is backwards-compatible. (:issue:`8279`) + +- Documented the ``ddof`` argument to :func:`expanding_var`, :func:`expanding_std`, + :func:`rolling_var`, and :func:`rolling_std`. These functions' support of a + ``ddof`` argument (with a default value of ``1``) was previously undocumented. (:issue:`8064`) + - :func:`ewma`, :func:`ewmstd`, :func:`ewmvol`, :func:`ewmvar`, :func:`ewmcov`, and :func:`ewmcorr` now interpret ``min_periods`` in the same manner that the ``rolling_*`` and ``expanding_*`` functions do: a given result entry will be ``NaN`` if the (expanding, in this case) window does not contain diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index 82423da26b53f..41a768783b1cb 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -167,6 +167,11 @@ elements, only complete pairwise observations will be used. """ +_ddof_kw = """ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. +""" + _bias_kw = r"""bias : boolean, default False Use a standard estimation bias correction """ @@ -216,10 +221,10 @@ def rolling_count(arg, window, freq=None, center=False, how=None): @Substitution("Unbiased moving covariance.", _binary_arg_flex, - _roll_kw%'None'+_pairwise_kw, _flex_retval, _roll_notes) + _roll_kw%'None'+_pairwise_kw+_ddof_kw, _flex_retval, _roll_notes) @Appender(_doc_template) def rolling_cov(arg1, arg2=None, window=None, min_periods=None, freq=None, - center=False, pairwise=None, how=None): + center=False, pairwise=None, how=None, ddof=1): if window is None and isinstance(arg2, (int, float)): window = arg2 arg2 = arg1 @@ -233,7 +238,7 @@ def rolling_cov(arg1, arg2=None, window=None, min_periods=None, freq=None, def _get_cov(X, Y): mean = lambda x: rolling_mean(x, window, min_periods, center=center) count = rolling_count(X + Y, window, center=center) - bias_adj = count / (count - 1) + bias_adj = count / (count - ddof) return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj rs = _flex_binary_moment(arg1, arg2, _get_cov, pairwise=bool(pairwise)) return rs @@ -620,14 +625,14 @@ def _use_window(minp, window): return minp -def _rolling_func(func, desc, check_minp=_use_window, how=None): +def _rolling_func(func, desc, check_minp=_use_window, how=None, additional_kw=''): if how is None: how_arg_str = 'None' else: how_arg_str = "'%s"%how - @Substitution(desc, _unary_arg, _roll_kw%how_arg_str, _type_of_input_retval, - _roll_notes) + @Substitution(desc, _unary_arg, _roll_kw%how_arg_str + additional_kw, + _type_of_input_retval, _roll_notes) @Appender(_doc_template) @wraps(func) def f(arg, window, min_periods=None, freq=None, center=False, how=how, @@ -648,10 +653,12 @@ def call_cython(arg, window, minp, args=(), kwargs={}, **kwds): how='median') _ts_std = lambda *a, **kw: _zsqrt(algos.roll_var(*a, **kw)) -rolling_std = _rolling_func(_ts_std, 'Unbiased moving standard deviation.', - check_minp=_require_min_periods(1)) -rolling_var = _rolling_func(algos.roll_var, 'Unbiased moving variance.', - check_minp=_require_min_periods(1)) +rolling_std = _rolling_func(_ts_std, 'Moving standard deviation.', + check_minp=_require_min_periods(1), + additional_kw=_ddof_kw) +rolling_var = _rolling_func(algos.roll_var, 'Moving variance.', + check_minp=_require_min_periods(1), + additional_kw=_ddof_kw) rolling_skew = _rolling_func(algos.roll_skew, 'Unbiased moving skewness.', check_minp=_require_min_periods(3)) rolling_kurt = _rolling_func(algos.roll_kurt, 'Unbiased moving kurtosis.', @@ -864,8 +871,9 @@ def _pop_args(win_type, arg_names, kwargs): return all_args -def _expanding_func(func, desc, check_minp=_use_window): - @Substitution(desc, _unary_arg, _expanding_kw, _type_of_input_retval, "") +def _expanding_func(func, desc, check_minp=_use_window, additional_kw=''): + @Substitution(desc, _unary_arg, _expanding_kw + additional_kw, + _type_of_input_retval, "") @Appender(_doc_template) @wraps(func) def f(arg, min_periods=1, freq=None, **kwargs): @@ -883,20 +891,18 @@ def call_cython(arg, window, minp, args=(), kwargs={}, **kwds): expanding_min = _expanding_func(algos.roll_min2, 'Expanding minimum.') expanding_sum = _expanding_func(algos.roll_sum, 'Expanding sum.') expanding_mean = _expanding_func(algos.roll_mean, 'Expanding mean.') -expanding_median = _expanding_func( - algos.roll_median_cython, 'Expanding median.') - -expanding_std = _expanding_func(_ts_std, - 'Unbiased expanding standard deviation.', - check_minp=_require_min_periods(1)) -expanding_var = _expanding_func(algos.roll_var, 'Unbiased expanding variance.', - check_minp=_require_min_periods(1)) -expanding_skew = _expanding_func( - algos.roll_skew, 'Unbiased expanding skewness.', - check_minp=_require_min_periods(3)) -expanding_kurt = _expanding_func( - algos.roll_kurt, 'Unbiased expanding kurtosis.', - check_minp=_require_min_periods(4)) +expanding_median = _expanding_func(algos.roll_median_cython, 'Expanding median.') + +expanding_std = _expanding_func(_ts_std, 'Expanding standard deviation.', + check_minp=_require_min_periods(1), + additional_kw=_ddof_kw) +expanding_var = _expanding_func(algos.roll_var, 'Expanding variance.', + check_minp=_require_min_periods(1), + additional_kw=_ddof_kw) +expanding_skew = _expanding_func(algos.roll_skew, 'Unbiased expanding skewness.', + check_minp=_require_min_periods(3)) +expanding_kurt = _expanding_func(algos.roll_kurt, 'Unbiased expanding kurtosis.', + check_minp=_require_min_periods(4)) def expanding_count(arg, freq=None): @@ -953,9 +959,9 @@ def expanding_quantile(arg, quantile, min_periods=1, freq=None): @Substitution("Unbiased expanding covariance.", _binary_arg_flex, - _expanding_kw+_pairwise_kw, _flex_retval, "") + _expanding_kw+_pairwise_kw+_ddof_kw, _flex_retval, "") @Appender(_doc_template) -def expanding_cov(arg1, arg2=None, min_periods=1, freq=None, pairwise=None): +def expanding_cov(arg1, arg2=None, min_periods=1, freq=None, pairwise=None, ddof=1): if arg2 is None: arg2 = arg1 pairwise = True if pairwise is None else pairwise @@ -966,7 +972,7 @@ def expanding_cov(arg1, arg2=None, min_periods=1, freq=None, pairwise=None): window = max((len(arg1) + len(arg2)), min_periods) if min_periods else (len(arg1) + len(arg2)) return rolling_cov(arg1, arg2, window, min_periods=min_periods, freq=freq, - pairwise=pairwise) + pairwise=pairwise, ddof=ddof) @Substitution("Expanding sample correlation.", _binary_arg_flex, diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index 55c618646c4f5..94c2521ff6938 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -4,6 +4,7 @@ from datetime import datetime from numpy.random import randn +from numpy.testing.decorators import slow import numpy as np from distutils.version import LooseVersion @@ -813,6 +814,7 @@ def _non_null_values(x): mean_x_times_y = mean(x * y) assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y)) + @slow def test_ewm_consistency(self): def _weights(s, com, adjust, ignore_na): @@ -877,6 +879,7 @@ def _ewma(s, com, min_periods, adjust, ignore_na): cov_biased=lambda x, y: mom.ewmcov(x, y, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na, bias=True), var_debiasing_factors=lambda x: _variance_debiasing_factors(x, com=com, adjust=adjust, ignore_na=ignore_na)) + @slow def test_expanding_consistency(self): base_functions = [ (mom.expanding_count, lambda v: Series(v).count(), None), @@ -931,7 +934,7 @@ def test_expanding_consistency(self): cov_unbiased=lambda x, y: mom.expanding_cov(x, y, min_periods=min_periods), var_biased=lambda x: mom.expanding_var(x, min_periods=min_periods, ddof=0), std_biased=lambda x: mom.expanding_std(x, min_periods=min_periods, ddof=0), - cov_biased=None, + cov_biased=lambda x, y: mom.expanding_cov(x, y, min_periods=min_periods, ddof=0), var_debiasing_factors=lambda x: mom.expanding_count(x) / (mom.expanding_count(x) - 1.).replace(0., np.nan) ) @@ -967,6 +970,7 @@ def test_expanding_consistency(self): expected.iloc[:, i, j] = expanding_f(x.iloc[:, i], x.iloc[:, j], min_periods=min_periods) assert_panel_equal(expanding_f_result, expected) + @slow def test_rolling_consistency(self): base_functions = [ (mom.rolling_count, lambda v: Series(v).count(), None), @@ -979,7 +983,7 @@ def test_rolling_consistency(self): (mom.rolling_corr, lambda v: Series(v).corr(Series(v)), None), (mom.rolling_var, lambda v: Series(v).var(), 1), #(mom.rolling_skew, lambda v: Series(v).skew(), 3), # restore once GH 8086 is fixed - # (mom.rolling_kurt, lambda v: Series(v).kurt(), 4), # restore once GH 8086 is fixed + #(mom.rolling_kurt, lambda v: Series(v).kurt(), 4), # restore once GH 8086 is fixed #(lambda x, window, min_periods, center: mom.rolling_quantile(x, window, 0.3, min_periods=min_periods, center=center), # lambda v: Series(v).quantile(0.3), None), # restore once GH 8084 is fixed (mom.rolling_median, lambda v: Series(v).median(), None), @@ -1026,7 +1030,7 @@ def test_rolling_consistency(self): cov_unbiased=lambda x, y: mom.rolling_cov(x, y, window=window, min_periods=min_periods, center=center), var_biased=lambda x: mom.rolling_var(x, window=window, min_periods=min_periods, center=center, ddof=0), std_biased=lambda x: mom.rolling_std(x, window=window, min_periods=min_periods, center=center, ddof=0), - cov_biased=None, + cov_biased=lambda x, y: mom.rolling_cov(x, y, window=window, min_periods=min_periods, center=center, ddof=0), var_debiasing_factors=lambda x: mom.rolling_count(x, window=window, center=center).divide( (mom.rolling_count(x, window=window, center=center) - 1.).replace(0., np.nan)), )