From 76584e4f880012b04507d2a7065c90d8f3629c20 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sun, 30 Sep 2018 12:32:15 -0500 Subject: [PATCH] PR #22761: add cookbook entry for callable correlation method --- doc/source/cookbook.rst | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index a4dc99383a562..21c8ab4128188 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -1223,6 +1223,42 @@ Computation `Numerical integration (sample-based) of a time series `__ +Correlation +*********** + +The `method` argument within `DataFrame.corr` can accept a callable in addition to the named correlation types. Here we compute the `distance correlation `__ matrix for a `DataFrame` object. + +.. ipython:: python + + def distcorr(x, y): + n = len(x) + a = np.zeros(shape=(n, n)) + b = np.zeros(shape=(n, n)) + + for i in range(n): + for j in range(i + 1, n): + a[i, j] = abs(x[i] - x[j]) + b[i, j] = abs(y[i] - y[j]) + + a += a.T + b += b.T + + a_bar = np.vstack([np.nanmean(a, axis=0)] * n) + b_bar = np.vstack([np.nanmean(b, axis=0)] * n) + + A = a - a_bar - a_bar.T + np.full(shape=(n, n), fill_value=a_bar.mean()) + B = b - b_bar - b_bar.T + np.full(shape=(n, n), fill_value=b_bar.mean()) + + cov_ab = np.sqrt(np.nansum(A * B)) / n + std_a = np.sqrt(np.sqrt(np.nansum(A**2)) / n) + std_b = np.sqrt(np.sqrt(np.nansum(B**2)) / n) + + return cov_ab / std_a / std_b + + df = pd.DataFrame(np.random.normal(size=(100, 3))) + + df.corr(method=distcorr) + Timedeltas ----------