diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index 21c8ab4128188..be8457fc14a4f 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -1228,36 +1228,40 @@ Correlation The `method` argument within `DataFrame.corr` can accept a callable in addition to the named correlation types. Here we compute the `distance correlation `__ matrix for a `DataFrame` object. -.. ipython:: python - - def distcorr(x, y): - n = len(x) - a = np.zeros(shape=(n, n)) - b = np.zeros(shape=(n, n)) - - for i in range(n): - for j in range(i + 1, n): - a[i, j] = abs(x[i] - x[j]) - b[i, j] = abs(y[i] - y[j]) - - a += a.T - b += b.T - - a_bar = np.vstack([np.nanmean(a, axis=0)] * n) - b_bar = np.vstack([np.nanmean(b, axis=0)] * n) - - A = a - a_bar - a_bar.T + np.full(shape=(n, n), fill_value=a_bar.mean()) - B = b - b_bar - b_bar.T + np.full(shape=(n, n), fill_value=b_bar.mean()) - - cov_ab = np.sqrt(np.nansum(A * B)) / n - std_a = np.sqrt(np.sqrt(np.nansum(A**2)) / n) - std_b = np.sqrt(np.sqrt(np.nansum(B**2)) / n) - - return cov_ab / std_a / std_b - - df = pd.DataFrame(np.random.normal(size=(100, 3))) +.. code-block:: python - df.corr(method=distcorr) + >>> def distcorr(x, y): + ... n = len(x) + ... a = np.zeros(shape=(n, n)) + ... b = np.zeros(shape=(n, n)) + ... + ... for i in range(n): + ... for j in range(i + 1, n): + ... a[i, j] = abs(x[i] - x[j]) + ... b[i, j] = abs(y[i] - y[j]) + ... + ... a += a.T + ... b += b.T + ... + ... a_bar = np.vstack([np.nanmean(a, axis=0)] * n) + ... b_bar = np.vstack([np.nanmean(b, axis=0)] * n) + ... + ... A = a - a_bar - a_bar.T + np.full(shape=(n, n), fill_value=a_bar.mean()) + ... B = b - b_bar - b_bar.T + np.full(shape=(n, n), fill_value=b_bar.mean()) + ... + ... cov_ab = np.sqrt(np.nansum(A * B)) / n + ... std_a = np.sqrt(np.sqrt(np.nansum(A**2)) / n) + ... std_b = np.sqrt(np.sqrt(np.nansum(B**2)) / n) + ... + ... return cov_ab / std_a / std_b + ... + >>> df = pd.DataFrame(np.random.normal(size=(100, 3))) + ... + >>> df.corr(method=distcorr) + 0 1 2 + 0 1.000000 0.171368 0.145302 + 1 0.171368 1.000000 0.189919 + 2 0.145302 0.189919 1.000000 Timedeltas ----------