ENH: rolling_corr/rolling_cov can take DF/DF and DF/Series combos for easy multi-application, GH #462

wesm · wesm · commit 31f56624516a · 2011-12-11T22:56:50.000-05:00
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -61,6 +61,8 @@ pandas 0.6.1
     #453)
   - Optimize scalar value lookups in the general case by 25% or more in Series
     and DataFrame
+  - Can pass DataFrame/DataFrame and DataFrame/Series to
+    rolling_corr/rolling_cov (GH #462)
 
 **Bug fixes**
 
@@ -80,6 +82,9 @@ pandas 0.6.1
   - Bug fix in left join Cython code with duplicate monotonic labels
   - Fix bug when unstacking multiple levels described in #451
   - Exclude NA values in dtype=object arrays, regression from 0.5.0 (GH #469)
+  - Use Cython map_infer function in DataFrame.applymap to properly infer
+    output type, handle tuple return values and other things that were breaking
+    (GH #465)
 
 Thanks
 ------
diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py
@@ -46,18 +46,47 @@ def rolling_count(arg, window, time_rule=None):
     return return_hook(result)
 
 def rolling_cov(arg1, arg2, window, min_periods=None, time_rule=None):
-    X, Y = _prep_binary(arg1, arg2)
-    mean = lambda x: rolling_mean(x, window, min_periods, time_rule)
-    count = rolling_count(X + Y, window, time_rule)
-    bias_adj = count / (count - 1)
-    return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj
+    def _get_cov(X, Y):
+        mean = lambda x: rolling_mean(x, window, min_periods, time_rule)
+        count = rolling_count(X + Y, window, time_rule)
+        bias_adj = count / (count - 1)
+        return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj
+    return _flex_binary_moment(arg1, arg2, _get_cov)
 
 def rolling_corr(arg1, arg2, window, min_periods=None, time_rule=None):
-    X, Y = _prep_binary(arg1, arg2)
-    num = rolling_cov(X, Y, window, min_periods, time_rule)
-    den  = (rolling_std(X, window, min_periods, time_rule) *
-            rolling_std(Y, window, min_periods, time_rule))
-    return num / den
+    def _get_corr(a, b):
+        num = rolling_cov(a, b, window, min_periods, time_rule)
+        den  = (rolling_std(a, window, min_periods, time_rule) *
+                rolling_std(b, window, min_periods, time_rule))
+        return num / den
+    return _flex_binary_moment(arg1, arg2, _get_corr)
+
+def _flex_binary_moment(arg1, arg2, f):
+    if isinstance(arg1, np.ndarray) and isinstance(arg2, np.ndarray):
+        X, Y = _prep_binary(arg1, arg2)
+        return f(X, Y)
+    elif isinstance(arg1, DataFrame):
+        results = {}
+        if isinstance(arg2, DataFrame):
+            X, Y = arg1.align(arg2, join='outer')
+            X = X + 0 * Y
+            Y = Y + 0 * X
+            res_columns = arg1.columns.union(arg2.columns)
+            for col in res_columns:
+                if col in X and col in Y:
+                    results[col] = f(X[col], Y[col])
+        else:
+            res_columns = arg1.columns
+            X, Y = arg1.align(arg2, axis=0, join='outer')
+            results = {}
+
+            for col in res_columns:
+                results[col] = f(X[col], Y)
+
+        return DataFrame(results, index=X.index, columns=res_columns)
+    else:
+        return _flex_binary_moment(arg2, arg1, f)
+
 
 def _rolling_moment(arg, window, func, minp, axis=0, time_rule=None):
     """
@@ -219,7 +248,7 @@ def _prep_binary(arg1, arg2):
 
 Returns
 -------
-y : type of input argument
+%s
 """
 
 
@@ -256,18 +285,29 @@ def _prep_binary(arg1, arg2):
 y : type of input argument
 """
 
+_type_of_input = "y : type of input argument"
+
+_flex_retval = """y : type depends on inputs
+    DataFrame / DataFrame -> DataFrame (matches on columns)
+    DataFrame / Series -> Computes result for each column
+    Series / Series -> Series"""
+
 _unary_arg = "arg : Series, DataFrame"
+
+_binary_arg_flex = """arg1 : Series, DataFrame, or ndarray
+arg2 : Series, DataFrame, or ndarray"""
+
 _binary_arg = """arg1 : Series, DataFrame, or ndarray
-arg2 : type of arg1"""
+arg2 : Series, DataFrame, or ndarray"""
 
 _bias_doc = r"""bias : boolean, default False
     Use a standard estimation bias correction
 """
 
 rolling_cov.__doc__ = _doc_template % ("Unbiased moving covariance",
-                                       _binary_arg)
+                                       _binary_arg_flex, _flex_retval)
 rolling_corr.__doc__ = _doc_template % ("Moving sample correlation",
-                                        _binary_arg)
+                                        _binary_arg_flex, _flex_retval)
 
 ewma.__doc__ = _ewm_doc % ("Exponentially-weighted moving average",
                            _unary_arg, "")
@@ -314,7 +354,7 @@ def call_cython(arg, window, minp):
         return _rolling_moment(arg, window, call_cython, min_periods,
                                time_rule=time_rule)
 
-    f.__doc__ = _doc_template % (desc, _unary_arg)
+    f.__doc__ = _doc_template % (desc, _unary_arg, _type_of_input)
 
     return f
 
diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py
@@ -8,7 +8,7 @@
 from pandas.core.api import Series, DataFrame, DateRange
 from pandas.util.testing import assert_almost_equal
 import pandas.core.datetools as datetools
-import pandas.stats.moments as moments
+import pandas.stats.moments as mom
 import pandas.util.testing as tm
 
 N, K = 100, 10
@@ -31,25 +31,25 @@ def setUp(self):
                                columns=np.arange(K))
 
     def test_rolling_sum(self):
-        self._check_moment_func(moments.rolling_sum, np.sum)
+        self._check_moment_func(mom.rolling_sum, np.sum)
 
     def test_rolling_count(self):
         counter = lambda x: np.isfinite(x).astype(float).sum()
-        self._check_moment_func(moments.rolling_count, counter,
+        self._check_moment_func(mom.rolling_count, counter,
                                 has_min_periods=False,
                                 preserve_nan=False)
 
     def test_rolling_mean(self):
-        self._check_moment_func(moments.rolling_mean, np.mean)
+        self._check_moment_func(mom.rolling_mean, np.mean)
 
     def test_rolling_median(self):
-        self._check_moment_func(moments.rolling_median, np.median)
+        self._check_moment_func(mom.rolling_median, np.median)
 
     def test_rolling_min(self):
-        self._check_moment_func(moments.rolling_min, np.min)
+        self._check_moment_func(mom.rolling_min, np.min)
 
     def test_rolling_max(self):
-        self._check_moment_func(moments.rolling_max, np.max)
+        self._check_moment_func(mom.rolling_max, np.max)
 
     def test_rolling_quantile(self):
         qs = [.1, .5, .9]
@@ -62,7 +62,7 @@ def scoreatpercentile(a, per):
 
         for q in qs:
             def f(x, window, min_periods=None, time_rule=None):
-                return moments.rolling_quantile(x, window, q,
+                return mom.rolling_quantile(x, window, q,
                                                 min_periods=min_periods,
                                                 time_rule=time_rule)
             def alt(x):
@@ -72,34 +72,34 @@ def alt(x):
 
     def test_rolling_apply(self):
         def roll_mean(x, window, min_periods=None, time_rule=None):
-            return moments.rolling_apply(x, window,
+            return mom.rolling_apply(x, window,
                                          lambda x: x[np.isfinite(x)].mean(),
                                          min_periods=min_periods,
                                          time_rule=time_rule)
         self._check_moment_func(roll_mean, np.mean)
 
     def test_rolling_std(self):
-        self._check_moment_func(moments.rolling_std,
+        self._check_moment_func(mom.rolling_std,
                                 lambda x: np.std(x, ddof=1))
 
     def test_rolling_var(self):
-        self._check_moment_func(moments.rolling_var,
+        self._check_moment_func(mom.rolling_var,
                                 lambda x: np.var(x, ddof=1))
 
     def test_rolling_skew(self):
         try:
             from scipy.stats import skew
         except ImportError:
             raise nose.SkipTest('no scipy')
-        self._check_moment_func(moments.rolling_skew,
+        self._check_moment_func(mom.rolling_skew,
                                 lambda x: skew(x, bias=False))
 
     def test_rolling_kurt(self):
         try:
             from scipy.stats import kurtosis
         except ImportError:
             raise nose.SkipTest('no scipy')
-        self._check_moment_func(moments.rolling_kurt,
+        self._check_moment_func(mom.rolling_kurt,
                                 lambda x: kurtosis(x, bias=False))
 
     def _check_moment_func(self, func, static_comp, window=50,
@@ -186,21 +186,21 @@ def _check_structures(self, func, static_comp,
                                 trunc_frame.apply(static_comp))
 
     def test_ewma(self):
-        self._check_ew(moments.ewma)
+        self._check_ew(mom.ewma)
 
     def test_ewmvar(self):
-        self._check_ew(moments.ewmvar)
+        self._check_ew(mom.ewmvar)
 
     def test_ewmvol(self):
-        self._check_ew(moments.ewmvol)
+        self._check_ew(mom.ewmvol)
 
     def test_ewma_span_com_args(self):
-        A = moments.ewma(self.arr, com=9.5)
-        B = moments.ewma(self.arr, span=20)
+        A = mom.ewma(self.arr, com=9.5)
+        B = mom.ewma(self.arr, span=20)
         assert_almost_equal(A, B)
 
-        self.assertRaises(Exception, moments.ewma, self.arr, com=9.5, span=20)
-        self.assertRaises(Exception, moments.ewma, self.arr)
+        self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, span=20)
+        self.assertRaises(Exception, mom.ewma, self.arr)
 
     def _check_ew(self, func):
         self._check_ew_ndarray(func)
@@ -233,14 +233,14 @@ def test_rolling_cov(self):
         A = self.series
         B = A + randn(len(A))
 
-        result = moments.rolling_cov(A, B, 50, min_periods=25)
+        result = mom.rolling_cov(A, B, 50, min_periods=25)
         assert_almost_equal(result[-1], np.cov(A[-50:], B[-50:])[0, 1])
 
     def test_rolling_corr(self):
         A = self.series
         B = A + randn(len(A))
 
-        result = moments.rolling_corr(A, B, 50, min_periods=25)
+        result = mom.rolling_corr(A, B, 50, min_periods=25)
         assert_almost_equal(result[-1], np.corrcoef(A[-50:], B[-50:])[0, 1])
 
         # test for correct bias correction
@@ -249,14 +249,37 @@ def test_rolling_corr(self):
         a[:5] = np.nan
         b[:10] = np.nan
 
-        result = moments.rolling_corr(a, b, len(a), min_periods=1)
+        result = mom.rolling_corr(a, b, len(a), min_periods=1)
         assert_almost_equal(result[-1], a.corr(b))
 
+    def test_flex_binary_frame(self):
+        def _check(method):
+            series = self.frame[1]
+
+            res = method(series, self.frame, 10)
+            res2 = method(self.frame, series, 10)
+            exp = self.frame.apply(lambda x: method(series, x, 10))
+
+            tm.assert_frame_equal(res, exp)
+            tm.assert_frame_equal(res2, exp)
+
+            frame2 = self.frame.copy()
+            frame2.values[:] = np.random.randn(*frame2.shape)
+
+            res3 = method(self.frame, frame2, 10)
+            exp = DataFrame(dict((k, method(self.frame[k], frame2[k], 10))
+                                 for k in self.frame))
+            tm.assert_frame_equal(res3, exp)
+
+        methods = [mom.rolling_corr, mom.rolling_cov]
+        for meth in methods:
+            _check(meth)
+
     def test_ewmcov(self):
-        self._check_binary_ew(moments.ewmcov)
+        self._check_binary_ew(mom.ewmcov)
 
     def test_ewmcorr(self):
-        self._check_binary_ew(moments.ewmcorr)
+        self._check_binary_ew(mom.ewmcorr)
 
     def _check_binary_ew(self, func):
         A = Series(randn(50), index=np.arange(50))