Skip to content

Add expanding moment functions and related tests. #1785

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 8, 2012
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 152 additions & 1 deletion pandas/stats/moments.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@
'rolling_corr', 'rolling_var', 'rolling_skew', 'rolling_kurt',
'rolling_quantile', 'rolling_median', 'rolling_apply',
'rolling_corr_pairwise',
'ewma', 'ewmvar', 'ewmstd', 'ewmvol', 'ewmcorr', 'ewmcov']
'ewma', 'ewmvar', 'ewmstd', 'ewmvol', 'ewmcorr', 'ewmcov',
'expanding_count', 'expanding_max', 'expanding_min',
'expanding_sum', 'expanding_mean', 'expanding_std',
'expanding_cov', 'expanding_corr', 'expanding_var',
'expanding_skew', 'expanding_kurt', 'expanding_quantile',
'expanding_median', 'expanding_apply', 'expanding_corr_pairwise']

#-------------------------------------------------------------------------------
# Docs
Expand Down Expand Up @@ -79,6 +84,24 @@
y : type of input argument
"""


_expanding_doc = """
%s

Parameters
----------
%s
min_periods : int
Minimum number of observations in window required to have a value
freq : None or string alias / date offset object, default=None
Frequency to conform to before computing statistic

Returns
-------
%s
"""


_type_of_input = "y : type of input argument"

_flex_retval = """y : type depends on inputs
Expand Down Expand Up @@ -465,3 +488,131 @@ def call_cython(arg, window, minp):
return lib.roll_generic(arg, window, minp, func)
return _rolling_moment(arg, window, call_cython, min_periods,
freq=freq, time_rule=time_rule)


def _expanding_func(func, desc, check_minp=_use_window):
@Substitution(desc, _unary_arg, _type_of_input)
@Appender(_expanding_doc)
@wraps(func)
def f(arg, min_periods=1, freq=None, time_rule=None, **kwargs):
window = len(arg)

def call_cython(arg, window, minp, **kwds):
minp = check_minp(minp, window)
return func(arg, window, minp, **kwds)
return _rolling_moment(arg, window, call_cython, min_periods,
freq=freq, time_rule=time_rule, **kwargs)

return f

expanding_max = _expanding_func(lib.roll_max2, 'Expanding maximum')
expanding_min = _expanding_func(lib.roll_min2, 'Expanding minimum')
expanding_sum = _expanding_func(lib.roll_sum, 'Expanding sum')
expanding_mean = _expanding_func(lib.roll_mean, 'Expanding mean')
expanding_median = _expanding_func(lib.roll_median_cython, 'Expanding median')

expanding_std = _expanding_func(_ts_std,
'Unbiased expanding standard deviation',
check_minp=_require_min_periods(2))
expanding_var = _expanding_func(lib.roll_var, 'Unbiased expanding variance',
check_minp=_require_min_periods(2))
expanding_skew = _expanding_func(lib.roll_skew, 'Unbiased expanding skewness',
check_minp=_require_min_periods(3))
expanding_kurt = _expanding_func(lib.roll_kurt, 'Unbiased expanding kurtosis',
check_minp=_require_min_periods(4))


def expanding_count(arg, freq=None, time_rule=None):
"""
Expanding count of number of non-NaN observations.

Parameters
----------
arg : DataFrame or numpy ndarray-like
freq : None or string alias / date offset object, default=None
Frequency to conform to before computing statistic

Returns
-------
expanding_count : type of caller
"""
return rolling_count(arg, len(arg), freq=freq, time_rule=time_rule)


def expanding_quantile(arg, quantile, min_periods=1, freq=None,
time_rule=None):
"""Expanding quantile

Parameters
----------
arg : Series, DataFrame
quantile : 0 <= quantile <= 1
min_periods : int
Minimum number of observations in window required to have a value
freq : None or string alias / date offset object, default=None
Frequency to conform to before computing statistic

Returns
-------
y : type of input argument
"""
return rolling_quantile(arg, len(arg), quantile, min_periods=min_periods,
freq=freq, time_rule=time_rule)


@Substitution("Unbiased expanding covariance", _binary_arg_flex, _flex_retval)
@Appender(_expanding_doc)
def expanding_cov(arg1, arg2, min_periods=1, time_rule=None):
window = max(len(arg1), len(arg2))
return rolling_cov(arg1, arg2, window,
min_periods=min_periods, time_rule=time_rule)


@Substitution("Expanding sample correlation", _binary_arg_flex, _flex_retval)
@Appender(_expanding_doc)
def expanding_corr(arg1, arg2, min_periods=1, time_rule=None):
window = max(len(arg1), len(arg2))
return rolling_corr(arg1, arg2, window,
min_periods=min_periods, time_rule=time_rule)


def expanding_corr_pairwise(df, min_periods=1):
"""
Computes pairwise expanding correlation matrices as Panel whose items are
dates

Parameters
----------
df : DataFrame
min_periods : int, default 1

Returns
-------
correls : Panel
"""

window = len(df)

return rolling_corr_pairwise(df, window, min_periods=min_periods)


def expanding_apply(arg, func, min_periods=1, freq=None, time_rule=None):
"""Generic expanding function application

Parameters
----------
arg : Series, DataFrame
func : function
Must produce a single value from an ndarray input
min_periods : int
Minimum number of observations in window required to have a value
freq : None or string alias / date offset object, default=None
Frequency to conform to before computing statistic

Returns
-------
y : type of input argument
"""
window = len(arg)
return rolling_apply(arg, window, func, min_periods=min_periods, freq=freq,
time_rule=time_rule)
108 changes: 108 additions & 0 deletions pandas/stats/tests/test_moments.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,114 @@ def _check_binary_ew(self, func):

self.assertRaises(Exception, func, A, randn(50), 20, min_periods=5)

def test_expanding_apply(self):
ser = Series([])
assert_series_equal(ser, mom.expanding_apply(ser, lambda x: x.mean()))

def expanding_mean(x, min_periods=1, freq=None):
return mom.expanding_apply(x,
lambda x: x.mean(),
min_periods=min_periods,
freq=freq)
self._check_expanding(expanding_mean, np.mean)

def test_expanding_corr(self):
A = self.series.dropna()
B = (A + randn(len(A)))[:-5]

result = mom.expanding_corr(A, B)

rolling_result = mom.rolling_corr(A, B, len(A), min_periods=1)

assert_almost_equal(rolling_result, result)

def test_expanding_count(self):
result = mom.expanding_count(self.series)
assert_almost_equal(result, mom.rolling_count(self.series,
len(self.series)))

def test_expanding_quantile(self):
result = mom.expanding_quantile(self.series, 0.5)

rolling_result = mom.rolling_quantile(self.series,
len(self.series),
0.5, min_periods=1)

assert_almost_equal(result, rolling_result)

def test_expanding_cov(self):
A = self.series
B = (A + randn(len(A)))[:-5]

result = mom.expanding_cov(A, B)

rolling_result = mom.rolling_cov(A, B, len(A), min_periods=1)

assert_almost_equal(rolling_result, result)

def test_expanding_max(self):
self._check_expanding(mom.expanding_max, np.max, preserve_nan=False)

def test_expanding_corr_pairwise(self):
result = mom.expanding_corr_pairwise(self.frame)

rolling_result = mom.rolling_corr_pairwise(self.frame,
len(self.frame),
min_periods=1)

for i in result.items:
assert_almost_equal(result[i], rolling_result[i])

def _check_expanding_ndarray(self, func, static_comp, has_min_periods=True,
has_time_rule=True, preserve_nan=True):
result = func(self.arr)

assert_almost_equal(result[10],
static_comp(self.arr[:11]))

if preserve_nan:
assert(np.isnan(result[self._nan_locs]).all())

arr = randn(50)

if has_min_periods:
result = func(arr, min_periods=30)
assert(np.isnan(result[:29]).all())
assert_almost_equal(result[-1], static_comp(arr[:50]))

# min_periods is working correctly
result = func(arr, min_periods=15)
self.assert_(np.isnan(result[13]))
self.assert_(not np.isnan(result[14]))

arr2 = randn(20)
result = func(arr2, min_periods=5)
self.assert_(isnull(result[3]))
self.assert_(notnull(result[4]))

# min_periods=0
result0 = func(arr, min_periods=0)
result1 = func(arr, min_periods=1)
assert_almost_equal(result0, result1)
else:
result = func(arr)
assert_almost_equal(result[-1], static_comp(arr[:50]))

def _check_expanding_structures(self, func):
series_result = func(self.series)
self.assert_(isinstance(series_result, Series))
frame_result = func(self.frame)
self.assertEquals(type(frame_result), DataFrame)

def _check_expanding(self, func, static_comp, has_min_periods=True,
has_time_rule=True,
preserve_nan=True):
self._check_expanding_ndarray(func, static_comp,
has_min_periods=has_min_periods,
has_time_rule=has_time_rule,
preserve_nan=preserve_nan)
self._check_expanding_structures(func)

if __name__ == '__main__':
import nose
nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],
Expand Down