Skip to content

Commit 9a92ad2

Browse files
Tobias Brandttobias-aam
Tobias Brandt
authored andcommitted
ENH: Implemented pairwise rolling moment functions.
Implemented rolling_cov_pairwise function. Implemented ewmcov_pairwise and ewmcorr_pairwise. Implemented expanding_cov_pairwise. Refactored the rolling moment functions to use _flex_pairwise_moment.
1 parent 50c0a03 commit 9a92ad2

File tree

1 file changed

+122
-70
lines changed

1 file changed

+122
-70
lines changed

pandas/stats/moments.py

+122-70
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,15 @@
2020
'rolling_sum', 'rolling_mean', 'rolling_std', 'rolling_cov',
2121
'rolling_corr', 'rolling_var', 'rolling_skew', 'rolling_kurt',
2222
'rolling_quantile', 'rolling_median', 'rolling_apply',
23-
'rolling_corr_pairwise', 'rolling_window',
23+
'rolling_cov_pairwise', 'rolling_corr_pairwise', 'rolling_window',
2424
'ewma', 'ewmvar', 'ewmstd', 'ewmvol', 'ewmcorr', 'ewmcov',
25+
'ewmcorr_pairwise', 'ewmcov_pairwise',
2526
'expanding_count', 'expanding_max', 'expanding_min',
2627
'expanding_sum', 'expanding_mean', 'expanding_std',
2728
'expanding_cov', 'expanding_corr', 'expanding_var',
2829
'expanding_skew', 'expanding_kurt', 'expanding_quantile',
29-
'expanding_median', 'expanding_apply', 'expanding_corr_pairwise']
30+
'expanding_median', 'expanding_apply',
31+
'expanding_cov_pairwise', 'expanding_corr_pairwise']
3032

3133
#------------------------------------------------------------------------------
3234
# Docs
@@ -102,7 +104,7 @@
102104
103105
Returns
104106
-------
105-
y : type of input argument
107+
%s
106108
"""
107109

108110

@@ -139,6 +141,8 @@
139141
DataFrame / Series -> Computes result for each column
140142
Series / Series -> Series"""
141143

144+
_pairwise_retval = "y : Panel whose items are df1.index values"
145+
142146
_unary_arg = "arg : Series, DataFrame"
143147

144148
_binary_arg_flex = """arg1 : Series, DataFrame, or ndarray
@@ -147,6 +151,9 @@
147151
_binary_arg = """arg1 : Series, DataFrame, or ndarray
148152
arg2 : Series, DataFrame, or ndarray"""
149153

154+
_pairwise_arg = """df1 : DataFrame
155+
df2 : DataFrame"""
156+
150157
_bias_doc = r"""bias : boolean, default False
151158
Use a standard estimation bias correction
152159
"""
@@ -232,7 +239,8 @@ def _flex_binary_moment(arg1, arg2, f):
232239
raise TypeError("arguments to moment function must be of type "
233240
"np.ndarray/Series/DataFrame")
234241

235-
if isinstance(arg1, (np.ndarray,Series)) and isinstance(arg2, (np.ndarray,Series)):
242+
if isinstance(arg1, (np.ndarray, Series)) and \
243+
isinstance(arg2, (np.ndarray,Series)):
236244
X, Y = _prep_binary(arg1, arg2)
237245
return f(X, Y)
238246
elif isinstance(arg1, DataFrame):
@@ -258,40 +266,55 @@ def _flex_binary_moment(arg1, arg2, f):
258266
return _flex_binary_moment(arg2, arg1, f)
259267

260268

261-
def rolling_corr_pairwise(df, window, min_periods=None):
262-
"""
263-
Computes pairwise rolling correlation matrices as Panel whose items are
264-
dates.
265-
266-
Parameters
267-
----------
268-
df : DataFrame
269-
window : int
270-
Size of the moving window. This is the number of observations used for
271-
calculating the statistic.
272-
min_periods : int, default None
273-
Minimum number of observations in window required to have a value
274-
(otherwise result is NA).
275-
276-
Returns
277-
-------
278-
correls : Panel
279-
"""
280-
from pandas import Panel
269+
def _flex_pairwise_moment(moment_func, df1, df2, **kwargs):
281270
from collections import defaultdict
282271

272+
# Detect symmetry
273+
if df2 is df1:
274+
symmetric = True
275+
else:
276+
symmetric = False
277+
283278
all_results = defaultdict(dict)
284279

285-
for i, k1 in enumerate(df.columns):
286-
for k2 in df.columns[i:]:
287-
corr = rolling_corr(df[k1], df[k2], window,
288-
min_periods=min_periods)
289-
all_results[k1][k2] = corr
290-
all_results[k2][k1] = corr
280+
for i, k1 in enumerate(df1.columns):
281+
for j, k2 in enumerate(df2.columns):
282+
if j<i and symmetric:
283+
all_results[k1][k2] = all_results[k2][k1]
284+
else:
285+
all_results[k1][k2] = moment_func(df1[k1], df2[k2], **kwargs)
291286

292287
return Panel.from_dict(all_results).swapaxes('items', 'major')
293288

294289

290+
@Substitution("Pairwise unbiased moving covariance", _pairwise_arg,
291+
_pairwise_retval)
292+
@Appender(_doc_template)
293+
def rolling_cov_pairwise(df1, df2, window=None, min_periods=None, freq=None,
294+
center=False, time_rule=None):
295+
# Try to preserve the previous API
296+
if window is None and isinstance(df2, (int, float)):
297+
window = df2
298+
df2 = df1
299+
return _flex_pairwise_moment(rolling_cov, df1, df2, window=window,
300+
min_periods=min_periods, freq=freq,
301+
center=center, time_rule=time_rule)
302+
303+
304+
@Substitution("Pairwise moving sample correlation", _pairwise_arg,
305+
_pairwise_retval)
306+
@Appender(_doc_template)
307+
def rolling_corr_pairwise(df1, df2, window=None, min_periods=None, freq=None,
308+
center=False, time_rule=None):
309+
# Try to preserve the previous API
310+
if window is None and isinstance(df2, (int, float)):
311+
window = df2
312+
df2 = df1
313+
return _flex_pairwise_moment(rolling_corr, df1, df2, window=window,
314+
min_periods=min_periods, freq=freq,
315+
center=center, time_rule=time_rule)
316+
317+
295318
def _rolling_moment(arg, window, func, minp, axis=0, freq=None, center=False,
296319
time_rule=None, args=(), kwargs={}, **kwds):
297320
"""
@@ -401,7 +424,8 @@ def _get_center_of_mass(com, span, halflife):
401424
return float(com)
402425

403426

404-
@Substitution("Exponentially-weighted moving average", _unary_arg, "")
427+
@Substitution("Exponentially-weighted moving average", _unary_arg, "",
428+
_type_of_input)
405429
@Appender(_ewm_doc)
406430
def ewma(arg, com=None, span=None, halflife=None, min_periods=0, freq=None, time_rule=None,
407431
adjust=True):
@@ -424,7 +448,8 @@ def _first_valid_index(arr):
424448
return notnull(arr).argmax() if len(arr) else 0
425449

426450

427-
@Substitution("Exponentially-weighted moving variance", _unary_arg, _bias_doc)
451+
@Substitution("Exponentially-weighted moving variance", _unary_arg, _bias_doc,
452+
_type_of_input)
428453
@Appender(_ewm_doc)
429454
def ewmvar(arg, com=None, span=None, halflife=None, min_periods=0, bias=False,
430455
freq=None, time_rule=None):
@@ -440,7 +465,8 @@ def ewmvar(arg, com=None, span=None, halflife=None, min_periods=0, bias=False,
440465
return result
441466

442467

443-
@Substitution("Exponentially-weighted moving std", _unary_arg, _bias_doc)
468+
@Substitution("Exponentially-weighted moving std", _unary_arg, _bias_doc,
469+
_type_of_input)
444470
@Appender(_ewm_doc)
445471
def ewmstd(arg, com=None, span=None, halflife=None, min_periods=0, bias=False,
446472
time_rule=None):
@@ -451,38 +477,62 @@ def ewmstd(arg, com=None, span=None, halflife=None, min_periods=0, bias=False,
451477
ewmvol = ewmstd
452478

453479

454-
@Substitution("Exponentially-weighted moving covariance", _binary_arg, "")
480+
@Substitution("Exponentially-weighted moving covariance", _binary_arg, "",
481+
_type_of_input)
455482
@Appender(_ewm_doc)
456483
def ewmcov(arg1, arg2, com=None, span=None, halflife=None, min_periods=0, bias=False,
457484
freq=None, time_rule=None):
458-
X, Y = _prep_binary(arg1, arg2)
459-
460-
X = _conv_timerule(X, freq, time_rule)
461-
Y = _conv_timerule(Y, freq, time_rule)
462-
463-
mean = lambda x: ewma(x, com=com, span=span, halflife=halflife, min_periods=min_periods)
485+
arg1 = _conv_timerule(arg1, freq, time_rule)
486+
arg2 = _conv_timerule(arg2, freq, time_rule)
464487

465-
result = (mean(X * Y) - mean(X) * mean(Y))
466-
com = _get_center_of_mass(com, span, halflife)
488+
def _get_ewmcov(X, Y):
489+
mean = lambda x: ewma(x, com=com, span=span, halflife=halflife, min_periods=min_periods)
490+
return (mean(X * Y) - mean(X) * mean(Y))
491+
result = _flex_binary_moment(arg1, arg2, _get_ewmcov)
467492
if not bias:
493+
com = _get_center_of_mass(com, span, halflife)
468494
result *= (1.0 + 2.0 * com) / (2.0 * com)
469495

470496
return result
471497

472498

473-
@Substitution("Exponentially-weighted moving " "correlation", _binary_arg, "")
499+
@Substitution("Pairwise exponentially-weighted moving covariance",
500+
_pairwise_arg, "", _pairwise_retval)
501+
@Appender(_ewm_doc)
502+
def ewmcov_pairwise(df1, df2=None, com=None, span=None, min_periods=0,
503+
bias=False, freq=None, time_rule=None):
504+
if df2 is None:
505+
df2 = df1
506+
return _flex_pairwise_moment(ewmcov, df1, df2, com=com, span=span,
507+
min_periods=min_periods, bias=bias, freq=freq, time_rule=time_rule)
508+
509+
510+
@Substitution("Exponentially-weighted moving correlation", _binary_arg, "",
511+
_type_of_input)
474512
@Appender(_ewm_doc)
475513
def ewmcorr(arg1, arg2, com=None, span=None, halflife=None, min_periods=0,
476514
freq=None, time_rule=None):
477-
X, Y = _prep_binary(arg1, arg2)
515+
arg1 = _conv_timerule(arg1, freq, time_rule)
516+
arg2 = _conv_timerule(arg2, freq, time_rule)
478517

479-
X = _conv_timerule(X, freq, time_rule)
480-
Y = _conv_timerule(Y, freq, time_rule)
518+
def _get_ewmcorr(X, Y):
519+
mean = lambda x: ewma(x, com=com, span=span, halflife=halflife, min_periods=min_periods)
520+
var = lambda x: ewmvar(x, com=com, span=span, halflife=halflife, min_periods=min_periods,
521+
bias=True)
522+
return (mean(X * Y) - mean(X) * mean(Y)) / _zsqrt(var(X) * var(Y))
523+
result = _flex_binary_moment(arg1, arg2, _get_ewmcorr)
524+
return result
481525

482-
mean = lambda x: ewma(x, com=com, span=span, halflife=halflife, min_periods=min_periods)
483-
var = lambda x: ewmvar(x, com=com, span=span, halflife=halflife, min_periods=min_periods,
484-
bias=True)
485-
return (mean(X * Y) - mean(X) * mean(Y)) / _zsqrt(var(X) * var(Y))
526+
527+
@Substitution("Pairwise exponentially-weighted moving correlation",
528+
_pairwise_arg, "", _pairwise_retval)
529+
@Appender(_ewm_doc)
530+
def ewmcorr_pairwise(df1, df2=None, com=None, span=None, min_periods=0,
531+
freq=None, time_rule=None):
532+
if df2 is None:
533+
df2 = df1
534+
return _flex_pairwise_moment(ewmcorr, df1, df2, com=com, span=span,
535+
min_periods=min_periods, freq=freq, time_rule=time_rule)
486536

487537

488538
def _zsqrt(x):
@@ -886,6 +936,18 @@ def expanding_cov(arg1, arg2, min_periods=1, freq=None, center=False,
886936
center=center, time_rule=time_rule)
887937

888938

939+
@Substitution("Pairwise unbiased expanding covariance", _pairwise_arg,
940+
_pairwise_retval)
941+
@Appender(_expanding_doc)
942+
def expanding_cov_pairwise(df1, df2=None, min_periods=1, freq=None,
943+
center=False, time_rule=None):
944+
if df2 is None:
945+
df2 = df1
946+
return _flex_pairwise_moment(expanding_cov, df1, df2,
947+
min_periods=min_periods, freq=freq,
948+
center=center, time_rule=time_rule)
949+
950+
889951
@Substitution("Expanding sample correlation.", _binary_arg_flex, _flex_retval)
890952
@Appender(_expanding_doc)
891953
def expanding_corr(arg1, arg2, min_periods=1, freq=None, center=False,
@@ -896,26 +958,16 @@ def expanding_corr(arg1, arg2, min_periods=1, freq=None, center=False,
896958
freq=freq, center=center, time_rule=time_rule)
897959

898960

899-
def expanding_corr_pairwise(df, min_periods=1):
900-
"""
901-
Computes pairwise expanding correlation matrices as Panel whose items are
902-
dates.
903-
904-
Parameters
905-
----------
906-
df : DataFrame
907-
min_periods : int, default 1
908-
Minimum number of observations in window required to have a value
909-
(otherwise result is NA).
910-
911-
Returns
912-
-------
913-
correls : Panel
914-
"""
915-
916-
window = len(df)
917-
918-
return rolling_corr_pairwise(df, window, min_periods=min_periods)
961+
@Substitution("Pairwise expanding sample correlation", _pairwise_arg,
962+
_pairwise_retval)
963+
@Appender(_expanding_doc)
964+
def expanding_corr_pairwise(df1, df2=None, min_periods=1, freq=None,
965+
center=False, time_rule=None):
966+
if df2 is None:
967+
df2 = df1
968+
return _flex_pairwise_moment(expanding_corr, df1, df2,
969+
min_periods=min_periods, freq=freq,
970+
center=center, time_rule=time_rule)
919971

920972

921973
def expanding_apply(arg, func, min_periods=1, freq=None, center=False,

0 commit comments

Comments
 (0)