diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index 42da19f1a241d..1f6142648a916 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -209,11 +209,11 @@ def rolling_cov(arg1, arg2=None, window=None, min_periods=None, freq=None, pairwise = True if pairwise is None else pairwise # only default unset arg1 = _conv_timerule(arg1, freq, how) arg2 = _conv_timerule(arg2, freq, how) - window = min(window, len(arg1), len(arg2)) def _get_cov(X, Y): - mean = lambda x: rolling_mean(x, window, min_periods, center=center) - count = rolling_count(X + Y, window, center=center) + adj_window = min(window, len(X), len(Y)) + mean = lambda x: rolling_mean(x, adj_window, min_periods, center=center) + count = rolling_count(X + Y, adj_window, center=center) bias_adj = count / (count - 1) return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj rs = _flex_binary_moment(arg1, arg2, _get_cov, pairwise=bool(pairwise)) @@ -234,14 +234,14 @@ def rolling_corr(arg1, arg2=None, window=None, min_periods=None, freq=None, pairwise = True if pairwise is None else pairwise # only default unset arg1 = _conv_timerule(arg1, freq, how) arg2 = _conv_timerule(arg2, freq, how) - window = min(window, len(arg1), len(arg2)) def _get_corr(a, b): - num = rolling_cov(a, b, window, min_periods, freq=freq, + adj_window = min(window, len(a), len(b)) + num = rolling_cov(a, b, adj_window, min_periods, freq=freq, center=center) - den = (rolling_std(a, window, min_periods, freq=freq, + den = (rolling_std(a, adj_window, min_periods, freq=freq, center=center) * - rolling_std(b, window, min_periods, freq=freq, + rolling_std(b, adj_window, min_periods, freq=freq, center=center)) return num / den return _flex_binary_moment(arg1, arg2, _get_corr, pairwise=bool(pairwise)) @@ -261,9 +261,9 @@ def _flex_binary_moment(arg1, arg2, f, pairwise=False): results = {} if isinstance(arg2, DataFrame): X, Y = arg1.align(arg2, join='outer') - X = X + 0 * Y - Y = Y + 0 * X if pairwise is False: + X = X + 0 * Y + Y = Y + 0 * X res_columns = arg1.columns.union(arg2.columns) for col in res_columns: if col in X and col in Y: @@ -276,7 +276,7 @@ def _flex_binary_moment(arg1, arg2, f, pairwise=False): # Symmetric case results[k1][k2] = results[k2][k1] else: - results[k1][k2] = f(arg1[k1], arg2[k2]) + results[k1][k2] = f(*_prep_binary(arg1[k1], arg2[k2])) return Panel.from_dict(results).swapaxes('items', 'major') else: raise ValueError("'pairwise' is not True/False") @@ -917,7 +917,7 @@ def expanding_cov(arg1, arg2=None, min_periods=1, freq=None, center=False, min_periods = arg2 arg2 = arg1 pairwise = True if pairwise is None else pairwise - window = max(len(arg1), len(arg2)) + window = len(arg1) + len(arg2) return rolling_cov(arg1, arg2, window, min_periods=min_periods, freq=freq, center=center, pairwise=pairwise) @@ -935,7 +935,7 @@ def expanding_corr(arg1, arg2=None, min_periods=1, freq=None, center=False, min_periods = arg2 arg2 = arg1 pairwise = True if pairwise is None else pairwise - window = max(len(arg1), len(arg2)) + window = len(arg1) + len(arg2) return rolling_corr(arg1, arg2, window, min_periods=min_periods, freq=freq, center=center, pairwise=pairwise) diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index dd91952cf537c..23a7ccae0da4e 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -759,6 +759,43 @@ def test_expanding_corr_pairwise(self): for i in result.items: assert_almost_equal(result[i], rolling_result[i]) + def test_expanding_cov_diff_length(self): + s1 = Series([1, 2, 3], index=[0, 1, 2]) + s2 = Series([1, 3], index=[0, 2]) + result = mom.expanding_cov(s1, s2) + expected = Series([None, None, 2.0]) + assert_series_equal(result, expected) + + s2a = Series([1, None, 3], index=[0, 1, 2]) + result = mom.expanding_cov(s1, s2a) + assert_series_equal(result, expected) + + def test_expanding_corr_diff_length(self): + s1 = Series([1, 2, 3], index=[0, 1, 2]) + s2 = Series([1, 3], index=[0, 2]) + result = mom.expanding_corr(s1, s2) + expected = Series([None, None, 1.0]) + assert_series_equal(result, expected) + + s2a = Series([1, None, 3], index=[0, 1, 2]) + result = mom.expanding_corr(s1, s2a) + assert_series_equal(result, expected) + + def test_expanding_corr_pairwise_diff_length(self): + df1 = DataFrame([[1,2], [3, 2], [3,4]], columns=['A','B']) + df1a = DataFrame([[1,2], [3,4]], index=[0,2], columns=['A','B']) + df2 = DataFrame([[5,6], [None,None], [2,1]], columns=['X','Y']) + df2a = DataFrame([[5,6], [2,1]], index=[0,2], columns=['X','Y']) + result1 = mom.expanding_corr(df1, df2, pairwise=True)[2] + result2 = mom.expanding_corr(df1, df2a, pairwise=True)[2] + result3 = mom.expanding_corr(df1a, df2, pairwise=True)[2] + result4 = mom.expanding_corr(df1a, df2a, pairwise=True)[2] + expected = DataFrame([[-1.0, -1.0], [-1.0, -1.0]], index=['A','B'], columns=['X','Y']) + assert_frame_equal(result1, expected) + assert_frame_equal(result2, expected) + assert_frame_equal(result3, expected) + assert_frame_equal(result4, expected) + def test_rolling_skew_edge_cases(self): all_nan = Series([np.NaN] * 5)