From 97fbc5142298e1d04ff57d1bae5f575a16f2a0d6 Mon Sep 17 00:00:00 2001
From: daniel saxton <danielsaxton@BRMACLT-04.local>
Date: Wed, 15 Aug 2018 19:00:27 -0500
Subject: [PATCH 01/10] ENH: Enable corrwith to compute rank and callable
 correlation methods

---
 asv_bench/benchmarks/stat_ops.py     |  7 +++
 doc/source/whatsnew/v0.24.0.rst      |  1 +
 pandas/core/frame.py                 | 81 +++++++++++++++++++++-------
 pandas/tests/frame/test_analytics.py | 35 ++++++++++++
 4 files changed, 104 insertions(+), 20 deletions(-)

diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py
index 500e4d74d4c4f..7fdc713f076ed 100644
--- a/asv_bench/benchmarks/stat_ops.py
+++ b/asv_bench/benchmarks/stat_ops.py
@@ -106,6 +106,7 @@ def setup(self, method, use_bottleneck):
             from pandas.core import nanops
             nanops._USE_BOTTLENECK = use_bottleneck
         self.df = pd.DataFrame(np.random.randn(1000, 30))
+        self.df2 = pd.DataFrame(np.random.randn(1000, 30))
         self.s = pd.Series(np.random.randn(1000))
         self.s2 = pd.Series(np.random.randn(1000))
 
@@ -115,6 +116,12 @@ def time_corr(self, method, use_bottleneck):
     def time_corr_series(self, method, use_bottleneck):
         self.s.corr(self.s2, method=method)
 
+    def time_corrwith_cols(self, method, use_bottleneck):
+        self.df.corrwith(self.df2, method=method)
+
+    def time_corrwith_rows(self, method, use_bottleneck):
+        self.df.corrwith(self.df2, axis=1, method=method)
+
 
 class Covariance(object):
 
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index 29ab51c582a97..04c5207f6ae40 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -413,6 +413,7 @@ Other Enhancements
 - The ``scatter_matrix``, ``andrews_curves``, ``parallel_coordinates``, ``lag_plot``, ``autocorrelation_plot``, ``bootstrap_plot``, and ``radviz`` plots from the ``pandas.plotting`` module are now accessible from calling :meth:`DataFrame.plot` (:issue:`11978`)
 - :class:`IntervalIndex` has gained the :attr:`~IntervalIndex.is_overlapping` attribute to indicate if the ``IntervalIndex`` contains any overlapping intervals (:issue:`23309`)
 - :func:`pandas.DataFrame.to_sql` has gained the ``method`` argument to control SQL insertion clause. See the :ref:`insertion method <io.sql.method>` section in the documentation. (:issue:`8953`)
+- :meth:`DataFrame.corrwith` now supports Spearman's rank correlation, Kendall's tau as well as callable correlation methods. (:issue:`21925`)
 
 .. _whatsnew_0240.api_breaking:
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index a34a34186cf45..242bf4fd5b2a9 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6868,6 +6868,11 @@ def corr(self, method='pearson', min_periods=1):
               dogs cats
         dogs   1.0  0.3
         cats   0.3  1.0
+
+        See Also
+        -------
+        DataFrame.corrwith
+        Series.corr
         """
         numeric_df = self._get_numeric_data()
         cols = numeric_df.columns
@@ -7021,10 +7026,11 @@ def cov(self, min_periods=None):
 
         return self._constructor(baseCov, index=idx, columns=cols)
 
-    def corrwith(self, other, axis=0, drop=False):
+    def corrwith(self, other, axis=0, drop=False, method='pearson'):
         """
-        Compute pairwise correlation between rows or columns of two DataFrame
-        objects.
+        Compute pairwise correlation between rows or columns of DataFrame
+        with rows or columns of Series or DataFrame.  DataFrames are first
+        aligned along both axes before computing the correlations.
 
         Parameters
         ----------
@@ -7032,43 +7038,78 @@ def corrwith(self, other, axis=0, drop=False):
         axis : {0 or 'index', 1 or 'columns'}, default 0
             0 or 'index' to compute column-wise, 1 or 'columns' for row-wise
         drop : boolean, default False
-            Drop missing indices from result, default returns union of all
+            Drop missing indices from result
+        method : {'pearson', 'kendall', 'spearman'} or callable
+            * pearson : standard correlation coefficient
+            * kendall : Kendall Tau correlation coefficient
+            * spearman : Spearman rank correlation
+            * callable: callable with input two 1d ndarrays
+                and returning a float
+
+            .. versionadded:: 0.24.0
 
         Returns
         -------
         correls : Series
+
+        See Also
+        -------
+        DataFrame.corr
         """
+        if method not in ['pearson', 'spearman', 'kendall']:
+            raise ValueError("method must be either 'pearson', "
+                             "'spearman', or 'kendall', '{method}' "
+                             "was supplied".format(method=method))
+
         axis = self._get_axis_number(axis)
         this = self._get_numeric_data()
 
         if isinstance(other, Series):
-            return this.apply(other.corr, axis=axis)
+            return this.apply(lambda x: other.corr(x, method=method),
+                              axis=axis)
 
         other = other._get_numeric_data()
-
         left, right = this.align(other, join='inner', copy=False)
 
-        # mask missing values
-        left = left + right * 0
-        right = right + left * 0
-
         if axis == 1:
             left = left.T
             right = right.T
 
-        # demeaned data
-        ldem = left - left.mean()
-        rdem = right - right.mean()
+        if method == 'pearson':
+            # mask missing values
+            left = left + right * 0
+            right = right + left * 0
+
+            if axis == 1:
+                left = left.T
+                right = right.T
 
-        num = (ldem * rdem).sum()
-        dom = (left.count() - 1) * left.std() * right.std()
+            # demeaned data
+            ldem = left - left.mean()
+            rdem = right - right.mean()
 
-        correl = num / dom
+            num = (ldem * rdem).sum()
+            dom = (left.count() - 1) * left.std() * right.std()
 
-        if not drop:
-            raxis = 1 if axis == 0 else 0
-            result_index = this._get_axis(raxis).union(other._get_axis(raxis))
-            correl = correl.reindex(result_index)
+            correl = num / dom
+
+            if not drop:
+                raxis = 1 if axis == 0 else 0
+                result_index = (this._get_axis(raxis).
+                                union(other._get_axis(raxis)))
+                correl = correl.reindex(result_index)
+
+        else:
+            def c(x):
+                return Series(x[0]).corr(Series(x[1]),
+                                         method=method)
+
+            correl = Series(map(c,
+                                zip(left.values.T, right.values.T)),
+                            index=left.columns)
+
+            if drop:
+                correl.dropna(inplace=True)
 
         return correl
 
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index 6f68828b94a84..4a3024ff3b2c8 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -466,6 +466,41 @@ def test_corrwith_mixed_dtypes(self):
         expected = pd.Series(data=corrs, index=['a', 'b'])
         tm.assert_series_equal(result, expected)
 
+    def test_corrwith_dup_cols(self):
+        # GH 21925
+        df1 = pd.DataFrame(np.vstack([np.arange(10)] * 3).T)
+        df2 = df1.copy()
+        df2 = pd.concat((df2, df2[0]), axis=1)
+
+        result = df1.corrwith(df2).values
+        expected = np.ones(4)
+        tm.assert_almost_equal(result, expected)
+
+    @td.skip_if_no_scipy
+    def test_corrwith_spearman(self):
+        # GH 21925
+        df = pd.DataFrame(np.random.random(size=(100, 3)))
+        result = df.corrwith(df**2, method="spearman")
+        expected = Series(np.ones(len(result)))
+        tm.assert_series_equal(result, expected)
+
+    @td.skip_if_no_scipy
+    def test_corrwith_kendall(self):
+        # GH 21925
+        df = pd.DataFrame(np.random.random(size=(100, 3)))
+        result = df.corrwith(df**2, method="kendall")
+        expected = Series(np.ones(len(result)))
+        tm.assert_series_equal(result, expected)
+
+    def test_corrwith_invalid_method(self):
+        # GH 21925
+        df = pd.DataFrame(np.random.normal(size=(10, 2)))
+        s = pd.Series(np.random.randn(10))
+        msg = ("method must be either 'pearson', 'spearman', "
+               "or 'kendall'")
+        with tm.assert_raises_regex(ValueError, msg):
+            df.corrwith(s, method="____")
+
     def test_bool_describe_in_mixed_frame(self):
         df = DataFrame({
             'string_data': ['a', 'b', 'c', 'd', 'e'],

From 0c84f8565368a8b09422ec4ff90bc4c95cb7c41b Mon Sep 17 00:00:00 2001
From: Daniel Saxton <daniel.saxton@gmail.com>
Date: Sun, 18 Nov 2018 16:45:24 -0600
Subject: [PATCH 02/10] Fix corrwith

* Remove incorrect error (didn't account for callables)
* Add xfail to duplicate columns test
* Fix transpose (was taken twice for Pearson)
* Remove inplace usage for dropna
---
 pandas/core/frame.py                 | 22 +++++-----------------
 pandas/tests/frame/test_analytics.py |  1 +
 2 files changed, 6 insertions(+), 17 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 242bf4fd5b2a9..ce9ce66d53801 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -7056,11 +7056,6 @@ def corrwith(self, other, axis=0, drop=False, method='pearson'):
         -------
         DataFrame.corr
         """
-        if method not in ['pearson', 'spearman', 'kendall']:
-            raise ValueError("method must be either 'pearson', "
-                             "'spearman', or 'kendall', '{method}' "
-                             "was supplied".format(method=method))
-
         axis = self._get_axis_number(axis)
         this = self._get_numeric_data()
 
@@ -7080,10 +7075,6 @@ def corrwith(self, other, axis=0, drop=False, method='pearson'):
             left = left + right * 0
             right = right + left * 0
 
-            if axis == 1:
-                left = left.T
-                right = right.T
-
             # demeaned data
             ldem = left - left.mean()
             rdem = right - right.mean()
@@ -7093,12 +7084,6 @@ def corrwith(self, other, axis=0, drop=False, method='pearson'):
 
             correl = num / dom
 
-            if not drop:
-                raxis = 1 if axis == 0 else 0
-                result_index = (this._get_axis(raxis).
-                                union(other._get_axis(raxis)))
-                correl = correl.reindex(result_index)
-
         else:
             def c(x):
                 return Series(x[0]).corr(Series(x[1]),
@@ -7108,8 +7093,11 @@ def c(x):
                                 zip(left.values.T, right.values.T)),
                             index=left.columns)
 
-            if drop:
-                correl.dropna(inplace=True)
+        if not drop:
+            raxis = 1 if axis == 0 else 0
+            result_index = (this._get_axis(raxis).
+                            union(other._get_axis(raxis)))
+            correl = correl.reindex(result_index)
 
         return correl
 
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index 4a3024ff3b2c8..453a059baace7 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -466,6 +466,7 @@ def test_corrwith_mixed_dtypes(self):
         expected = pd.Series(data=corrs, index=['a', 'b'])
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.xfail
     def test_corrwith_dup_cols(self):
         # GH 21925
         df1 = pd.DataFrame(np.vstack([np.arange(10)] * 3).T)

From 48f459ebe9624796f03a0feb457e3bfe36e693fa Mon Sep 17 00:00:00 2001
From: Daniel Saxton <daniel.saxton@gmail.com>
Date: Sun, 18 Nov 2018 19:40:10 -0600
Subject: [PATCH 03/10] Remove test that's no longer correct

---
 pandas/tests/frame/test_analytics.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index 453a059baace7..421fc31e8b88f 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -493,15 +493,6 @@ def test_corrwith_kendall(self):
         expected = Series(np.ones(len(result)))
         tm.assert_series_equal(result, expected)
 
-    def test_corrwith_invalid_method(self):
-        # GH 21925
-        df = pd.DataFrame(np.random.normal(size=(10, 2)))
-        s = pd.Series(np.random.randn(10))
-        msg = ("method must be either 'pearson', 'spearman', "
-               "or 'kendall'")
-        with tm.assert_raises_regex(ValueError, msg):
-            df.corrwith(s, method="____")
-
     def test_bool_describe_in_mixed_frame(self):
         df = DataFrame({
             'string_data': ['a', 'b', 'c', 'd', 'e'],

From 2356dea6f3ab43be1c0b37c405fa3c2fd52643d1 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <daniel.saxton@gmail.com>
Date: Thu, 27 Dec 2018 11:33:51 -0500
Subject: [PATCH 04/10] Update corrwith

* Check for invalid method
* Do not cast arrays to Series in function c
---
 pandas/core/frame.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index ce9ce66d53801..998e8b1839fd2 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -7084,15 +7084,17 @@ def corrwith(self, other, axis=0, drop=False, method='pearson'):
 
             correl = num / dom
 
-        else:
+        elif method in ['kendall', 'spearman'] or callable(method):
             def c(x):
-                return Series(x[0]).corr(Series(x[1]),
-                                         method=method)
+                return nanops.nancorr(x[0], x[1], method=method)
 
             correl = Series(map(c,
                                 zip(left.values.T, right.values.T)),
                             index=left.columns)
 
+        else:
+            raise ValueError('Invalid method')
+
         if not drop:
             raxis = 1 if axis == 0 else 0
             result_index = (this._get_axis(raxis).

From 9010169d7ac25d72c322feb3631c91a385ae9bb3 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <daniel.saxton@gmail.com>
Date: Thu, 27 Dec 2018 12:18:26 -0500
Subject: [PATCH 05/10] Fix error message

---
 pandas/core/frame.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 998e8b1839fd2..aa09176d8ef19 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -7093,7 +7093,10 @@ def c(x):
                             index=left.columns)
 
         else:
-            raise ValueError('Invalid method')
+            raise ValueError("Invalid method {method} was passed, "
+                             "valid methods are: 'pearson', 'kendall', "
+                             "'spearman', or callable".
+                             format(method=str(method)))
 
         if not drop:
             raxis = 1 if axis == 0 else 0

From 6ebdd685b4869dd0b926487ca8ab30d023f9bb53 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <daniel.saxton@gmail.com>
Date: Thu, 27 Dec 2018 21:41:36 -0500
Subject: [PATCH 06/10] Compare Series not arrays in test

---
 pandas/tests/frame/test_analytics.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index 421fc31e8b88f..21fe7ae2affbd 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -473,9 +473,9 @@ def test_corrwith_dup_cols(self):
         df2 = df1.copy()
         df2 = pd.concat((df2, df2[0]), axis=1)
 
-        result = df1.corrwith(df2).values
-        expected = np.ones(4)
-        tm.assert_almost_equal(result, expected)
+        result = df1.corrwith(df2)
+        expected = pd.Series(np.ones(4), index=[0, 0, 1, 2])
+        tm.assert_series_equal(result, expected)
 
     @td.skip_if_no_scipy
     def test_corrwith_spearman(self):

From c027574de76df95c66131afd3f8c051add474751 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <daniel.saxton@gmail.com>
Date: Sat, 29 Dec 2018 11:54:51 -0500
Subject: [PATCH 07/10] Allow duplicate index labels

---
 pandas/core/frame.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index aa09176d8ef19..04deae78f3417 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -7102,7 +7102,9 @@ def c(x):
             raxis = 1 if axis == 0 else 0
             result_index = (this._get_axis(raxis).
                             union(other._get_axis(raxis)))
-            correl = correl.reindex(result_index)
+            idx_diff = result_index.difference(correl.index)
+            correl = correl.append(pd.Series([np.nan] * len(idx_diff),
+                                             index=idx_diff))
 
         return correl
 

From 11ac73a8c3032315d0bb5bd73570b2c970d103fa Mon Sep 17 00:00:00 2001
From: Daniel Saxton <daniel.saxton@gmail.com>
Date: Sat, 29 Dec 2018 12:04:11 -0500
Subject: [PATCH 08/10] Remove xfail from duplicate column test

---
 pandas/tests/frame/test_analytics.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index 21fe7ae2affbd..ea24a50e82cbc 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -466,7 +466,6 @@ def test_corrwith_mixed_dtypes(self):
         expected = pd.Series(data=corrs, index=['a', 'b'])
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.xfail
     def test_corrwith_dup_cols(self):
         # GH 21925
         df1 = pd.DataFrame(np.vstack([np.arange(10)] * 3).T)

From fdb5415ea31587920af519cf00f5c2be22b515e8 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <daniel.saxton@gmail.com>
Date: Sat, 29 Dec 2018 13:52:52 -0500
Subject: [PATCH 09/10] Use Series not pd.Series

---
 pandas/core/frame.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 04deae78f3417..55f46c28620a8 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -7103,8 +7103,8 @@ def c(x):
             result_index = (this._get_axis(raxis).
                             union(other._get_axis(raxis)))
             idx_diff = result_index.difference(correl.index)
-            correl = correl.append(pd.Series([np.nan] * len(idx_diff),
-                                             index=idx_diff))
+            correl = correl.append(Series([np.nan] * len(idx_diff),
+                                          index=idx_diff))
 
         return correl
 

From 870d1a3f7a85cd15e3d2b6e8ed0779deda14ce49 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <daniel.saxton@gmail.com>
Date: Sun, 30 Dec 2018 17:45:41 -0500
Subject: [PATCH 10/10] Update corrwith

* Add comment for when drop is False
* Check if len(idx_diff) > 0
* Remove unnecessary string casting in error message
---
 pandas/core/frame.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 55f46c28620a8..2fe18339de791 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -7096,15 +7096,19 @@ def c(x):
             raise ValueError("Invalid method {method} was passed, "
                              "valid methods are: 'pearson', 'kendall', "
                              "'spearman', or callable".
-                             format(method=str(method)))
+                             format(method=method))
 
         if not drop:
+            # Find non-matching labels along the given axis
+            # and append missing correlations (GH 22375)
             raxis = 1 if axis == 0 else 0
             result_index = (this._get_axis(raxis).
                             union(other._get_axis(raxis)))
             idx_diff = result_index.difference(correl.index)
-            correl = correl.append(Series([np.nan] * len(idx_diff),
-                                          index=idx_diff))
+
+            if len(idx_diff) > 0:
+                correl = correl.append(Series([np.nan] * len(idx_diff),
+                                              index=idx_diff))
 
         return correl