Skip to content

API: add ddof to expanding/rolling_cov() #8280

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 17, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions doc/source/v0.15.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,13 @@ API changes
:func:`expanding_cov`, :func:`expanding_corr`, :func:`expanding_corr_pairwise`, and :func:`expanding_apply`,
as the results produced when ``center=True`` did not make much sense. (:issue:`7925`)

- Added optional ``ddof`` argument to :func:`expanding_cov` and :func:`rolling_cov`.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so it WAS alreayd their for rolling_var/std, just not documented, right? also list the other issue as well

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct. (Technically ddof isn't an explicit parameter of expanding/rolling_var/std(), but is accepted as part of kwargs and passed on to algos.roll_var().)

I will add to the release note a comment about expanding/rolling_var/std support for ddof.

The default value of ``1`` is backwards-compatible. (:issue:`8279`)

- Documented the ``ddof`` argument to :func:`expanding_var`, :func:`expanding_std`,
:func:`rolling_var`, and :func:`rolling_std`. These functions' support of a
``ddof`` argument (with a default value of ``1``) was previously undocumented. (:issue:`8064`)

- :func:`ewma`, :func:`ewmstd`, :func:`ewmvol`, :func:`ewmvar`, :func:`ewmcov`, and :func:`ewmcorr`
now interpret ``min_periods`` in the same manner that the ``rolling_*`` and ``expanding_*`` functions do:
a given result entry will be ``NaN`` if the (expanding, in this case) window does not contain
Expand Down
64 changes: 35 additions & 29 deletions pandas/stats/moments.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,11 @@
elements, only complete pairwise observations will be used.
"""

_ddof_kw = """ddof : int, default 1
Delta Degrees of Freedom. The divisor used in calculations
is ``N - ddof``, where ``N`` represents the number of elements.
"""

_bias_kw = r"""bias : boolean, default False
Use a standard estimation bias correction
"""
Expand Down Expand Up @@ -216,10 +221,10 @@ def rolling_count(arg, window, freq=None, center=False, how=None):


@Substitution("Unbiased moving covariance.", _binary_arg_flex,
_roll_kw%'None'+_pairwise_kw, _flex_retval, _roll_notes)
_roll_kw%'None'+_pairwise_kw+_ddof_kw, _flex_retval, _roll_notes)
@Appender(_doc_template)
def rolling_cov(arg1, arg2=None, window=None, min_periods=None, freq=None,
center=False, pairwise=None, how=None):
center=False, pairwise=None, how=None, ddof=1):
if window is None and isinstance(arg2, (int, float)):
window = arg2
arg2 = arg1
Expand All @@ -233,7 +238,7 @@ def rolling_cov(arg1, arg2=None, window=None, min_periods=None, freq=None,
def _get_cov(X, Y):
mean = lambda x: rolling_mean(x, window, min_periods, center=center)
count = rolling_count(X + Y, window, center=center)
bias_adj = count / (count - 1)
bias_adj = count / (count - ddof)
return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj
rs = _flex_binary_moment(arg1, arg2, _get_cov, pairwise=bool(pairwise))
return rs
Expand Down Expand Up @@ -620,14 +625,14 @@ def _use_window(minp, window):
return minp


def _rolling_func(func, desc, check_minp=_use_window, how=None):
def _rolling_func(func, desc, check_minp=_use_window, how=None, additional_kw=''):
if how is None:
how_arg_str = 'None'
else:
how_arg_str = "'%s"%how

@Substitution(desc, _unary_arg, _roll_kw%how_arg_str, _type_of_input_retval,
_roll_notes)
@Substitution(desc, _unary_arg, _roll_kw%how_arg_str + additional_kw,
_type_of_input_retval, _roll_notes)
@Appender(_doc_template)
@wraps(func)
def f(arg, window, min_periods=None, freq=None, center=False, how=how,
Expand All @@ -648,10 +653,12 @@ def call_cython(arg, window, minp, args=(), kwargs={}, **kwds):
how='median')

_ts_std = lambda *a, **kw: _zsqrt(algos.roll_var(*a, **kw))
rolling_std = _rolling_func(_ts_std, 'Unbiased moving standard deviation.',
check_minp=_require_min_periods(1))
rolling_var = _rolling_func(algos.roll_var, 'Unbiased moving variance.',
check_minp=_require_min_periods(1))
rolling_std = _rolling_func(_ts_std, 'Moving standard deviation.',
check_minp=_require_min_periods(1),
additional_kw=_ddof_kw)
rolling_var = _rolling_func(algos.roll_var, 'Moving variance.',
check_minp=_require_min_periods(1),
additional_kw=_ddof_kw)
rolling_skew = _rolling_func(algos.roll_skew, 'Unbiased moving skewness.',
check_minp=_require_min_periods(3))
rolling_kurt = _rolling_func(algos.roll_kurt, 'Unbiased moving kurtosis.',
Expand Down Expand Up @@ -864,8 +871,9 @@ def _pop_args(win_type, arg_names, kwargs):
return all_args


def _expanding_func(func, desc, check_minp=_use_window):
@Substitution(desc, _unary_arg, _expanding_kw, _type_of_input_retval, "")
def _expanding_func(func, desc, check_minp=_use_window, additional_kw=''):
@Substitution(desc, _unary_arg, _expanding_kw + additional_kw,
_type_of_input_retval, "")
@Appender(_doc_template)
@wraps(func)
def f(arg, min_periods=1, freq=None, **kwargs):
Expand All @@ -883,20 +891,18 @@ def call_cython(arg, window, minp, args=(), kwargs={}, **kwds):
expanding_min = _expanding_func(algos.roll_min2, 'Expanding minimum.')
expanding_sum = _expanding_func(algos.roll_sum, 'Expanding sum.')
expanding_mean = _expanding_func(algos.roll_mean, 'Expanding mean.')
expanding_median = _expanding_func(
algos.roll_median_cython, 'Expanding median.')

expanding_std = _expanding_func(_ts_std,
'Unbiased expanding standard deviation.',
check_minp=_require_min_periods(1))
expanding_var = _expanding_func(algos.roll_var, 'Unbiased expanding variance.',
check_minp=_require_min_periods(1))
expanding_skew = _expanding_func(
algos.roll_skew, 'Unbiased expanding skewness.',
check_minp=_require_min_periods(3))
expanding_kurt = _expanding_func(
algos.roll_kurt, 'Unbiased expanding kurtosis.',
check_minp=_require_min_periods(4))
expanding_median = _expanding_func(algos.roll_median_cython, 'Expanding median.')

expanding_std = _expanding_func(_ts_std, 'Expanding standard deviation.',
check_minp=_require_min_periods(1),
additional_kw=_ddof_kw)
expanding_var = _expanding_func(algos.roll_var, 'Expanding variance.',
check_minp=_require_min_periods(1),
additional_kw=_ddof_kw)
expanding_skew = _expanding_func(algos.roll_skew, 'Unbiased expanding skewness.',
check_minp=_require_min_periods(3))
expanding_kurt = _expanding_func(algos.roll_kurt, 'Unbiased expanding kurtosis.',
check_minp=_require_min_periods(4))


def expanding_count(arg, freq=None):
Expand Down Expand Up @@ -953,9 +959,9 @@ def expanding_quantile(arg, quantile, min_periods=1, freq=None):


@Substitution("Unbiased expanding covariance.", _binary_arg_flex,
_expanding_kw+_pairwise_kw, _flex_retval, "")
_expanding_kw+_pairwise_kw+_ddof_kw, _flex_retval, "")
@Appender(_doc_template)
def expanding_cov(arg1, arg2=None, min_periods=1, freq=None, pairwise=None):
def expanding_cov(arg1, arg2=None, min_periods=1, freq=None, pairwise=None, ddof=1):
if arg2 is None:
arg2 = arg1
pairwise = True if pairwise is None else pairwise
Expand All @@ -966,7 +972,7 @@ def expanding_cov(arg1, arg2=None, min_periods=1, freq=None, pairwise=None):
window = max((len(arg1) + len(arg2)), min_periods) if min_periods else (len(arg1) + len(arg2))
return rolling_cov(arg1, arg2, window,
min_periods=min_periods, freq=freq,
pairwise=pairwise)
pairwise=pairwise, ddof=ddof)


@Substitution("Expanding sample correlation.", _binary_arg_flex,
Expand Down
10 changes: 7 additions & 3 deletions pandas/stats/tests/test_moments.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from datetime import datetime
from numpy.random import randn
from numpy.testing.decorators import slow
import numpy as np
from distutils.version import LooseVersion

Expand Down Expand Up @@ -813,6 +814,7 @@ def _non_null_values(x):
mean_x_times_y = mean(x * y)
assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y))

@slow
def test_ewm_consistency(self):

def _weights(s, com, adjust, ignore_na):
Expand Down Expand Up @@ -877,6 +879,7 @@ def _ewma(s, com, min_periods, adjust, ignore_na):
cov_biased=lambda x, y: mom.ewmcov(x, y, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na, bias=True),
var_debiasing_factors=lambda x: _variance_debiasing_factors(x, com=com, adjust=adjust, ignore_na=ignore_na))

@slow
def test_expanding_consistency(self):
base_functions = [
(mom.expanding_count, lambda v: Series(v).count(), None),
Expand Down Expand Up @@ -931,7 +934,7 @@ def test_expanding_consistency(self):
cov_unbiased=lambda x, y: mom.expanding_cov(x, y, min_periods=min_periods),
var_biased=lambda x: mom.expanding_var(x, min_periods=min_periods, ddof=0),
std_biased=lambda x: mom.expanding_std(x, min_periods=min_periods, ddof=0),
cov_biased=None,
cov_biased=lambda x, y: mom.expanding_cov(x, y, min_periods=min_periods, ddof=0),
var_debiasing_factors=lambda x: mom.expanding_count(x) / (mom.expanding_count(x) - 1.).replace(0., np.nan)
)

Expand Down Expand Up @@ -967,6 +970,7 @@ def test_expanding_consistency(self):
expected.iloc[:, i, j] = expanding_f(x.iloc[:, i], x.iloc[:, j], min_periods=min_periods)
assert_panel_equal(expanding_f_result, expected)

@slow
def test_rolling_consistency(self):
base_functions = [
(mom.rolling_count, lambda v: Series(v).count(), None),
Expand All @@ -979,7 +983,7 @@ def test_rolling_consistency(self):
(mom.rolling_corr, lambda v: Series(v).corr(Series(v)), None),
(mom.rolling_var, lambda v: Series(v).var(), 1),
#(mom.rolling_skew, lambda v: Series(v).skew(), 3), # restore once GH 8086 is fixed
# (mom.rolling_kurt, lambda v: Series(v).kurt(), 4), # restore once GH 8086 is fixed
#(mom.rolling_kurt, lambda v: Series(v).kurt(), 4), # restore once GH 8086 is fixed
#(lambda x, window, min_periods, center: mom.rolling_quantile(x, window, 0.3, min_periods=min_periods, center=center),
# lambda v: Series(v).quantile(0.3), None), # restore once GH 8084 is fixed
(mom.rolling_median, lambda v: Series(v).median(), None),
Expand Down Expand Up @@ -1026,7 +1030,7 @@ def test_rolling_consistency(self):
cov_unbiased=lambda x, y: mom.rolling_cov(x, y, window=window, min_periods=min_periods, center=center),
var_biased=lambda x: mom.rolling_var(x, window=window, min_periods=min_periods, center=center, ddof=0),
std_biased=lambda x: mom.rolling_std(x, window=window, min_periods=min_periods, center=center, ddof=0),
cov_biased=None,
cov_biased=lambda x, y: mom.rolling_cov(x, y, window=window, min_periods=min_periods, center=center, ddof=0),
var_debiasing_factors=lambda x: mom.rolling_count(x, window=window, center=center).divide(
(mom.rolling_count(x, window=window, center=center) - 1.).replace(0., np.nan)),
)
Expand Down