Skip to content

Commit aeba806

Browse files
Tobias Brandttobias-aam
Tobias Brandt
authored andcommitted
ENH: Folded the *_pairwise rolling moment functions into the base function API.
1 parent 9a92ad2 commit aeba806

File tree

2 files changed

+116
-136
lines changed

2 files changed

+116
-136
lines changed

pandas/stats/moments.py

+104-118
Original file line numberDiff line numberDiff line change
@@ -5,30 +5,28 @@
55
from __future__ import division
66

77
from functools import wraps
8+
from collections import defaultdict
89

910
from numpy import NaN
1011
import numpy as np
1112

1213
from pandas.core.api import DataFrame, Series, Panel, notnull
1314
import pandas.algos as algos
14-
import pandas.core.common as com
15-
from pandas.core.common import _values_from_object
15+
import pandas.core.common as pdcom
1616

1717
from pandas.util.decorators import Substitution, Appender
1818

1919
__all__ = ['rolling_count', 'rolling_max', 'rolling_min',
2020
'rolling_sum', 'rolling_mean', 'rolling_std', 'rolling_cov',
2121
'rolling_corr', 'rolling_var', 'rolling_skew', 'rolling_kurt',
2222
'rolling_quantile', 'rolling_median', 'rolling_apply',
23-
'rolling_cov_pairwise', 'rolling_corr_pairwise', 'rolling_window',
23+
'rolling_corr_pairwise', 'rolling_window',
2424
'ewma', 'ewmvar', 'ewmstd', 'ewmvol', 'ewmcorr', 'ewmcov',
25-
'ewmcorr_pairwise', 'ewmcov_pairwise',
2625
'expanding_count', 'expanding_max', 'expanding_min',
2726
'expanding_sum', 'expanding_mean', 'expanding_std',
2827
'expanding_cov', 'expanding_corr', 'expanding_var',
2928
'expanding_skew', 'expanding_kurt', 'expanding_quantile',
30-
'expanding_median', 'expanding_apply',
31-
'expanding_cov_pairwise', 'expanding_corr_pairwise']
29+
'expanding_median', 'expanding_apply', 'expanding_corr_pairwise']
3230

3331
#------------------------------------------------------------------------------
3432
# Docs
@@ -203,25 +201,43 @@ def rolling_count(arg, window, freq=None, center=False, time_rule=None):
203201

204202
@Substitution("Unbiased moving covariance.", _binary_arg_flex, _flex_retval)
205203
@Appender(_doc_template)
206-
def rolling_cov(arg1, arg2, window, min_periods=None, freq=None,
207-
center=False, time_rule=None):
204+
def rolling_cov(arg1, arg2=None, window=None, min_periods=None, freq=None,
205+
center=False, time_rule=None, pairwise=None):
206+
if window is None and isinstance(arg2, (int, float)):
207+
window = arg2
208+
arg2 = arg1
209+
pairwise = True if pairwise is None else pairwise # only default unset
210+
elif arg2 is None:
211+
arg2 = arg1
212+
pairwise = True if pairwise is None else pairwise # only default unset
208213
arg1 = _conv_timerule(arg1, freq, time_rule)
209214
arg2 = _conv_timerule(arg2, freq, time_rule)
210215
window = min(window, len(arg1), len(arg2))
211216

212217
def _get_cov(X, Y):
213-
mean = lambda x: rolling_mean(x, window, min_periods,center=center)
214-
count = rolling_count(X + Y, window,center=center)
218+
mean = lambda x: rolling_mean(x, window, min_periods, center=center)
219+
count = rolling_count(X + Y, window, center=center)
215220
bias_adj = count / (count - 1)
216221
return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj
217-
rs = _flex_binary_moment(arg1, arg2, _get_cov)
222+
rs = _flex_binary_moment(arg1, arg2, _get_cov, pairwise=bool(pairwise))
218223
return rs
219224

220225

221226
@Substitution("Moving sample correlation.", _binary_arg_flex, _flex_retval)
222227
@Appender(_doc_template)
223-
def rolling_corr(arg1, arg2, window, min_periods=None, freq=None,
224-
center=False, time_rule=None):
228+
def rolling_corr(arg1, arg2=None, window=None, min_periods=None, freq=None,
229+
center=False, time_rule=None, pairwise=None):
230+
if window is None and isinstance(arg2, (int, float)):
231+
window = arg2
232+
arg2 = arg1
233+
pairwise = True if pairwise is None else pairwise # only default unset
234+
elif arg2 is None:
235+
arg2 = arg1
236+
pairwise = True if pairwise is None else pairwise # only default unset
237+
arg1 = _conv_timerule(arg1, freq, time_rule)
238+
arg2 = _conv_timerule(arg2, freq, time_rule)
239+
window = min(window, len(arg1), len(arg2))
240+
225241
def _get_corr(a, b):
226242
num = rolling_cov(a, b, window, min_periods, freq=freq,
227243
center=center, time_rule=time_rule)
@@ -230,10 +246,10 @@ def _get_corr(a, b):
230246
rolling_std(b, window, min_periods, freq=freq,
231247
center=center, time_rule=time_rule))
232248
return num / den
233-
return _flex_binary_moment(arg1, arg2, _get_corr)
249+
return _flex_binary_moment(arg1, arg2, _get_corr, pairwise=bool(pairwise))
234250

235251

236-
def _flex_binary_moment(arg1, arg2, f):
252+
def _flex_binary_moment(arg1, arg2, f, pairwise=False):
237253
if not (isinstance(arg1,(np.ndarray, Series, DataFrame)) and
238254
isinstance(arg2,(np.ndarray, Series, DataFrame))):
239255
raise TypeError("arguments to moment function must be of type "
@@ -249,10 +265,23 @@ def _flex_binary_moment(arg1, arg2, f):
249265
X, Y = arg1.align(arg2, join='outer')
250266
X = X + 0 * Y
251267
Y = Y + 0 * X
252-
res_columns = arg1.columns.union(arg2.columns)
253-
for col in res_columns:
254-
if col in X and col in Y:
255-
results[col] = f(X[col], Y[col])
268+
if pairwise is False:
269+
res_columns = arg1.columns.union(arg2.columns)
270+
for col in res_columns:
271+
if col in X and col in Y:
272+
results[col] = f(X[col], Y[col])
273+
elif pairwise is True:
274+
results = defaultdict(dict)
275+
for i, k1 in enumerate(arg1.columns):
276+
for j, k2 in enumerate(arg2.columns):
277+
if j<i and arg2 is arg1:
278+
# Symmetric case
279+
results[k1][k2] = results[k2][k1]
280+
else:
281+
results[k1][k2] = f(arg1[k1], arg2[k2])
282+
return Panel.from_dict(results).swapaxes('items', 'major')
283+
else:
284+
raise ValueError("'pairwise' is not True/False")
256285
else:
257286
res_columns = arg1.columns
258287
X, Y = arg1.align(arg2, axis=0, join='outer')
@@ -266,53 +295,14 @@ def _flex_binary_moment(arg1, arg2, f):
266295
return _flex_binary_moment(arg2, arg1, f)
267296

268297

269-
def _flex_pairwise_moment(moment_func, df1, df2, **kwargs):
270-
from collections import defaultdict
271-
272-
# Detect symmetry
273-
if df2 is df1:
274-
symmetric = True
275-
else:
276-
symmetric = False
277-
278-
all_results = defaultdict(dict)
279-
280-
for i, k1 in enumerate(df1.columns):
281-
for j, k2 in enumerate(df2.columns):
282-
if j<i and symmetric:
283-
all_results[k1][k2] = all_results[k2][k1]
284-
else:
285-
all_results[k1][k2] = moment_func(df1[k1], df2[k2], **kwargs)
286-
287-
return Panel.from_dict(all_results).swapaxes('items', 'major')
288-
289-
290-
@Substitution("Pairwise unbiased moving covariance", _pairwise_arg,
291-
_pairwise_retval)
292-
@Appender(_doc_template)
293-
def rolling_cov_pairwise(df1, df2, window=None, min_periods=None, freq=None,
294-
center=False, time_rule=None):
295-
# Try to preserve the previous API
296-
if window is None and isinstance(df2, (int, float)):
297-
window = df2
298-
df2 = df1
299-
return _flex_pairwise_moment(rolling_cov, df1, df2, window=window,
300-
min_periods=min_periods, freq=freq,
301-
center=center, time_rule=time_rule)
302-
303-
304298
@Substitution("Pairwise moving sample correlation", _pairwise_arg,
305299
_pairwise_retval)
306300
@Appender(_doc_template)
307-
def rolling_corr_pairwise(df1, df2, window=None, min_periods=None, freq=None,
308-
center=False, time_rule=None):
309-
# Try to preserve the previous API
310-
if window is None and isinstance(df2, (int, float)):
311-
window = df2
312-
df2 = df1
313-
return _flex_pairwise_moment(rolling_corr, df1, df2, window=window,
314-
min_periods=min_periods, freq=freq,
315-
center=center, time_rule=time_rule)
301+
def rolling_corr_pairwise(df1, df2=None, window=None, min_periods=None,
302+
freq=None, center=False, time_rule=None):
303+
return rolling_corr(df1, df2, window=window, min_periods=min_periods,
304+
freq=freq, center=center, time_rule=time_rule,
305+
pairwise=True)
316306

317307

318308
def _rolling_moment(arg, window, func, minp, axis=0, freq=None, center=False,
@@ -361,7 +351,8 @@ def _rolling_moment(arg, window, func, minp, axis=0, freq=None, center=False,
361351

362352
def _center_window(rs, window, axis):
363353
if axis > rs.ndim-1:
364-
raise ValueError("Requested axis is larger then no. of argument dimensions")
354+
raise ValueError("Requested axis is larger then no. of argument "
355+
"dimensions")
365356

366357
offset = int((window - 1) / 2.)
367358
if isinstance(rs, (Series, DataFrame, Panel)):
@@ -480,61 +471,55 @@ def ewmstd(arg, com=None, span=None, halflife=None, min_periods=0, bias=False,
480471
@Substitution("Exponentially-weighted moving covariance", _binary_arg, "",
481472
_type_of_input)
482473
@Appender(_ewm_doc)
483-
def ewmcov(arg1, arg2, com=None, span=None, halflife=None, min_periods=0, bias=False,
484-
freq=None, time_rule=None):
474+
def ewmcov(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0, bias=False,
475+
freq=None, time_rule=None, pairwise=None):
476+
if arg2 is None:
477+
arg2 = arg1
478+
pairwise = True if pairwise is None else pairwise
479+
elif isinstance(arg2, (int, float)) and com is None:
480+
com = arg2
481+
arg2 = arg1
482+
pairwise = True if pairwise is None else pairwise
485483
arg1 = _conv_timerule(arg1, freq, time_rule)
486484
arg2 = _conv_timerule(arg2, freq, time_rule)
487485

488486
def _get_ewmcov(X, Y):
489487
mean = lambda x: ewma(x, com=com, span=span, halflife=halflife, min_periods=min_periods)
490488
return (mean(X * Y) - mean(X) * mean(Y))
491-
result = _flex_binary_moment(arg1, arg2, _get_ewmcov)
489+
result = _flex_binary_moment(arg1, arg2, _get_ewmcov,
490+
pairwise=bool(pairwise))
492491
if not bias:
493492
com = _get_center_of_mass(com, span, halflife)
494493
result *= (1.0 + 2.0 * com) / (2.0 * com)
495494

496495
return result
497496

498497

499-
@Substitution("Pairwise exponentially-weighted moving covariance",
500-
_pairwise_arg, "", _pairwise_retval)
501-
@Appender(_ewm_doc)
502-
def ewmcov_pairwise(df1, df2=None, com=None, span=None, min_periods=0,
503-
bias=False, freq=None, time_rule=None):
504-
if df2 is None:
505-
df2 = df1
506-
return _flex_pairwise_moment(ewmcov, df1, df2, com=com, span=span,
507-
min_periods=min_periods, bias=bias, freq=freq, time_rule=time_rule)
508-
509-
510498
@Substitution("Exponentially-weighted moving correlation", _binary_arg, "",
511499
_type_of_input)
512500
@Appender(_ewm_doc)
513-
def ewmcorr(arg1, arg2, com=None, span=None, halflife=None, min_periods=0,
514-
freq=None, time_rule=None):
501+
def ewmcorr(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0,
502+
freq=None, time_rule=None, pairwise=None):
503+
if arg2 is None:
504+
arg2 = arg1
505+
pairwise = True if pairwise is None else pairwise
506+
elif isinstance(arg2, (int, float)) and com is None:
507+
com = arg2
508+
arg2 = arg1
509+
pairwise = True if pairwise is None else pairwise
515510
arg1 = _conv_timerule(arg1, freq, time_rule)
516511
arg2 = _conv_timerule(arg2, freq, time_rule)
517512

518513
def _get_ewmcorr(X, Y):
519514
mean = lambda x: ewma(x, com=com, span=span, halflife=halflife, min_periods=min_periods)
520515
var = lambda x: ewmvar(x, com=com, span=span, halflife=halflife, min_periods=min_periods,
521-
bias=True)
516+
bias=True)
522517
return (mean(X * Y) - mean(X) * mean(Y)) / _zsqrt(var(X) * var(Y))
523-
result = _flex_binary_moment(arg1, arg2, _get_ewmcorr)
518+
result = _flex_binary_moment(arg1, arg2, _get_ewmcorr,
519+
pairwise=bool(pairwise))
524520
return result
525521

526522

527-
@Substitution("Pairwise exponentially-weighted moving correlation",
528-
_pairwise_arg, "", _pairwise_retval)
529-
@Appender(_ewm_doc)
530-
def ewmcorr_pairwise(df1, df2=None, com=None, span=None, min_periods=0,
531-
freq=None, time_rule=None):
532-
if df2 is None:
533-
df2 = df1
534-
return _flex_pairwise_moment(ewmcorr, df1, df2, com=com, span=span,
535-
min_periods=min_periods, freq=freq, time_rule=time_rule)
536-
537-
538523
def _zsqrt(x):
539524
result = np.sqrt(x)
540525
mask = x < 0
@@ -779,8 +764,8 @@ def rolling_window(arg, window=None, win_type=None, min_periods=None,
779764
if win_type is not None:
780765
raise ValueError(('Do not specify window type if using custom '
781766
'weights'))
782-
window = com._asarray_tuplesafe(window).astype(float)
783-
elif com.is_integer(window): # window size
767+
window = pdcom._asarray_tuplesafe(window).astype(float)
768+
elif pdcom.is_integer(window): # window size
784769
if win_type is None:
785770
raise ValueError('Must specify window type')
786771
try:
@@ -928,46 +913,47 @@ def expanding_quantile(arg, quantile, min_periods=1, freq=None,
928913

929914
@Substitution("Unbiased expanding covariance.", _binary_arg_flex, _flex_retval)
930915
@Appender(_expanding_doc)
931-
def expanding_cov(arg1, arg2, min_periods=1, freq=None, center=False,
932-
time_rule=None):
916+
def expanding_cov(arg1, arg2=None, min_periods=1, freq=None, center=False,
917+
time_rule=None, pairwise=None):
918+
if arg2 is None:
919+
arg2 = arg1
920+
pairwise = True if pairwise is None else pairwise
921+
elif isinstance(arg2, (int, float)) and min_periods is None:
922+
min_periods = arg2
923+
arg2 = arg1
924+
pairwise = True if pairwise is None else pairwise
933925
window = max(len(arg1), len(arg2))
934926
return rolling_cov(arg1, arg2, window,
935927
min_periods=min_periods, freq=freq,
936-
center=center, time_rule=time_rule)
937-
938-
939-
@Substitution("Pairwise unbiased expanding covariance", _pairwise_arg,
940-
_pairwise_retval)
941-
@Appender(_expanding_doc)
942-
def expanding_cov_pairwise(df1, df2=None, min_periods=1, freq=None,
943-
center=False, time_rule=None):
944-
if df2 is None:
945-
df2 = df1
946-
return _flex_pairwise_moment(expanding_cov, df1, df2,
947-
min_periods=min_periods, freq=freq,
948-
center=center, time_rule=time_rule)
928+
center=center, time_rule=time_rule, pairwise=pairwise)
949929

950930

951931
@Substitution("Expanding sample correlation.", _binary_arg_flex, _flex_retval)
952932
@Appender(_expanding_doc)
953-
def expanding_corr(arg1, arg2, min_periods=1, freq=None, center=False,
954-
time_rule=None):
933+
def expanding_corr(arg1, arg2=None, min_periods=1, freq=None, center=False,
934+
time_rule=None, pairwise=None):
935+
if arg2 is None:
936+
arg2 = arg1
937+
pairwise = True if pairwise is None else pairwise
938+
elif isinstance(arg2, (int, float)) and min_periods is None:
939+
min_periods = arg2
940+
arg2 = arg1
941+
pairwise = True if pairwise is None else pairwise
955942
window = max(len(arg1), len(arg2))
956943
return rolling_corr(arg1, arg2, window,
957944
min_periods=min_periods,
958-
freq=freq, center=center, time_rule=time_rule)
945+
freq=freq, center=center, time_rule=time_rule,
946+
pairwise=pairwise)
959947

960948

961949
@Substitution("Pairwise expanding sample correlation", _pairwise_arg,
962950
_pairwise_retval)
963951
@Appender(_expanding_doc)
964952
def expanding_corr_pairwise(df1, df2=None, min_periods=1, freq=None,
965953
center=False, time_rule=None):
966-
if df2 is None:
967-
df2 = df1
968-
return _flex_pairwise_moment(expanding_corr, df1, df2,
969-
min_periods=min_periods, freq=freq,
970-
center=center, time_rule=time_rule)
954+
return expanding_corr(df1, df2, min_periods=min_periods,
955+
freq=freq, center=center, time_rule=time_rule,
956+
pairwise=True)
971957

972958

973959
def expanding_apply(arg, func, min_periods=1, freq=None, center=False,

0 commit comments

Comments
 (0)