diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index 29df509832ea6..1da95e120a397 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -19,7 +19,12 @@ 'rolling_corr', 'rolling_var', 'rolling_skew', 'rolling_kurt', 'rolling_quantile', 'rolling_median', 'rolling_apply', 'rolling_corr_pairwise', - 'ewma', 'ewmvar', 'ewmstd', 'ewmvol', 'ewmcorr', 'ewmcov'] + 'ewma', 'ewmvar', 'ewmstd', 'ewmvol', 'ewmcorr', 'ewmcov', + 'expanding_count', 'expanding_max', 'expanding_min', + 'expanding_sum', 'expanding_mean', 'expanding_std', + 'expanding_cov', 'expanding_corr', 'expanding_var', + 'expanding_skew', 'expanding_kurt', 'expanding_quantile', + 'expanding_median', 'expanding_apply', 'expanding_corr_pairwise'] #------------------------------------------------------------------------------- # Docs @@ -79,6 +84,24 @@ y : type of input argument """ + +_expanding_doc = """ +%s + +Parameters +---------- +%s +min_periods : int + Minimum number of observations in window required to have a value +freq : None or string alias / date offset object, default=None + Frequency to conform to before computing statistic + +Returns +------- +%s +""" + + _type_of_input = "y : type of input argument" _flex_retval = """y : type depends on inputs @@ -465,3 +488,131 @@ def call_cython(arg, window, minp): return lib.roll_generic(arg, window, minp, func) return _rolling_moment(arg, window, call_cython, min_periods, freq=freq, time_rule=time_rule) + + +def _expanding_func(func, desc, check_minp=_use_window): + @Substitution(desc, _unary_arg, _type_of_input) + @Appender(_expanding_doc) + @wraps(func) + def f(arg, min_periods=1, freq=None, time_rule=None, **kwargs): + window = len(arg) + + def call_cython(arg, window, minp, **kwds): + minp = check_minp(minp, window) + return func(arg, window, minp, **kwds) + return _rolling_moment(arg, window, call_cython, min_periods, + freq=freq, time_rule=time_rule, **kwargs) + + return f + +expanding_max = _expanding_func(lib.roll_max2, 'Expanding maximum') +expanding_min = _expanding_func(lib.roll_min2, 'Expanding minimum') +expanding_sum = _expanding_func(lib.roll_sum, 'Expanding sum') +expanding_mean = _expanding_func(lib.roll_mean, 'Expanding mean') +expanding_median = _expanding_func(lib.roll_median_cython, 'Expanding median') + +expanding_std = _expanding_func(_ts_std, + 'Unbiased expanding standard deviation', + check_minp=_require_min_periods(2)) +expanding_var = _expanding_func(lib.roll_var, 'Unbiased expanding variance', + check_minp=_require_min_periods(2)) +expanding_skew = _expanding_func(lib.roll_skew, 'Unbiased expanding skewness', + check_minp=_require_min_periods(3)) +expanding_kurt = _expanding_func(lib.roll_kurt, 'Unbiased expanding kurtosis', + check_minp=_require_min_periods(4)) + + +def expanding_count(arg, freq=None, time_rule=None): + """ + Expanding count of number of non-NaN observations. + + Parameters + ---------- + arg : DataFrame or numpy ndarray-like + freq : None or string alias / date offset object, default=None + Frequency to conform to before computing statistic + + Returns + ------- + expanding_count : type of caller + """ + return rolling_count(arg, len(arg), freq=freq, time_rule=time_rule) + + +def expanding_quantile(arg, quantile, min_periods=1, freq=None, + time_rule=None): + """Expanding quantile + + Parameters + ---------- + arg : Series, DataFrame + quantile : 0 <= quantile <= 1 + min_periods : int + Minimum number of observations in window required to have a value + freq : None or string alias / date offset object, default=None + Frequency to conform to before computing statistic + + Returns + ------- + y : type of input argument + """ + return rolling_quantile(arg, len(arg), quantile, min_periods=min_periods, + freq=freq, time_rule=time_rule) + + +@Substitution("Unbiased expanding covariance", _binary_arg_flex, _flex_retval) +@Appender(_expanding_doc) +def expanding_cov(arg1, arg2, min_periods=1, time_rule=None): + window = max(len(arg1), len(arg2)) + return rolling_cov(arg1, arg2, window, + min_periods=min_periods, time_rule=time_rule) + + +@Substitution("Expanding sample correlation", _binary_arg_flex, _flex_retval) +@Appender(_expanding_doc) +def expanding_corr(arg1, arg2, min_periods=1, time_rule=None): + window = max(len(arg1), len(arg2)) + return rolling_corr(arg1, arg2, window, + min_periods=min_periods, time_rule=time_rule) + + +def expanding_corr_pairwise(df, min_periods=1): + """ + Computes pairwise expanding correlation matrices as Panel whose items are + dates + + Parameters + ---------- + df : DataFrame + min_periods : int, default 1 + + Returns + ------- + correls : Panel + """ + + window = len(df) + + return rolling_corr_pairwise(df, window, min_periods=min_periods) + + +def expanding_apply(arg, func, min_periods=1, freq=None, time_rule=None): + """Generic expanding function application + + Parameters + ---------- + arg : Series, DataFrame + func : function + Must produce a single value from an ndarray input + min_periods : int + Minimum number of observations in window required to have a value + freq : None or string alias / date offset object, default=None + Frequency to conform to before computing statistic + + Returns + ------- + y : type of input argument + """ + window = len(arg) + return rolling_apply(arg, window, func, min_periods=min_periods, freq=freq, + time_rule=time_rule) diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index 1d80cb167b4cd..6fcb3caa9f4d7 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -344,6 +344,114 @@ def _check_binary_ew(self, func): self.assertRaises(Exception, func, A, randn(50), 20, min_periods=5) + def test_expanding_apply(self): + ser = Series([]) + assert_series_equal(ser, mom.expanding_apply(ser, lambda x: x.mean())) + + def expanding_mean(x, min_periods=1, freq=None): + return mom.expanding_apply(x, + lambda x: x.mean(), + min_periods=min_periods, + freq=freq) + self._check_expanding(expanding_mean, np.mean) + + def test_expanding_corr(self): + A = self.series.dropna() + B = (A + randn(len(A)))[:-5] + + result = mom.expanding_corr(A, B) + + rolling_result = mom.rolling_corr(A, B, len(A), min_periods=1) + + assert_almost_equal(rolling_result, result) + + def test_expanding_count(self): + result = mom.expanding_count(self.series) + assert_almost_equal(result, mom.rolling_count(self.series, + len(self.series))) + + def test_expanding_quantile(self): + result = mom.expanding_quantile(self.series, 0.5) + + rolling_result = mom.rolling_quantile(self.series, + len(self.series), + 0.5, min_periods=1) + + assert_almost_equal(result, rolling_result) + + def test_expanding_cov(self): + A = self.series + B = (A + randn(len(A)))[:-5] + + result = mom.expanding_cov(A, B) + + rolling_result = mom.rolling_cov(A, B, len(A), min_periods=1) + + assert_almost_equal(rolling_result, result) + + def test_expanding_max(self): + self._check_expanding(mom.expanding_max, np.max, preserve_nan=False) + + def test_expanding_corr_pairwise(self): + result = mom.expanding_corr_pairwise(self.frame) + + rolling_result = mom.rolling_corr_pairwise(self.frame, + len(self.frame), + min_periods=1) + + for i in result.items: + assert_almost_equal(result[i], rolling_result[i]) + + def _check_expanding_ndarray(self, func, static_comp, has_min_periods=True, + has_time_rule=True, preserve_nan=True): + result = func(self.arr) + + assert_almost_equal(result[10], + static_comp(self.arr[:11])) + + if preserve_nan: + assert(np.isnan(result[self._nan_locs]).all()) + + arr = randn(50) + + if has_min_periods: + result = func(arr, min_periods=30) + assert(np.isnan(result[:29]).all()) + assert_almost_equal(result[-1], static_comp(arr[:50])) + + # min_periods is working correctly + result = func(arr, min_periods=15) + self.assert_(np.isnan(result[13])) + self.assert_(not np.isnan(result[14])) + + arr2 = randn(20) + result = func(arr2, min_periods=5) + self.assert_(isnull(result[3])) + self.assert_(notnull(result[4])) + + # min_periods=0 + result0 = func(arr, min_periods=0) + result1 = func(arr, min_periods=1) + assert_almost_equal(result0, result1) + else: + result = func(arr) + assert_almost_equal(result[-1], static_comp(arr[:50])) + + def _check_expanding_structures(self, func): + series_result = func(self.series) + self.assert_(isinstance(series_result, Series)) + frame_result = func(self.frame) + self.assertEquals(type(frame_result), DataFrame) + + def _check_expanding(self, func, static_comp, has_min_periods=True, + has_time_rule=True, + preserve_nan=True): + self._check_expanding_ndarray(func, static_comp, + has_min_periods=has_min_periods, + has_time_rule=has_time_rule, + preserve_nan=preserve_nan) + self._check_expanding_structures(func) + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],