Skip to content

Commit b03b76e

Browse files
author
MarcoGorelli
committed
Revert "PERF: faster corrwith method for pearson and spearman correlation when other is a Series and axis = 0 (column-wise) (#46174)"
This reverts commit 5efb570.
1 parent 38ea8da commit b03b76e

File tree

1 file changed

+1
-41
lines changed

1 file changed

+1
-41
lines changed

pandas/core/frame.py

+1-41
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,6 @@
161161
from pandas.core.array_algos.take import take_2d_multi
162162
from pandas.core.arraylike import OpsMixin
163163
from pandas.core.arrays import (
164-
BaseMaskedArray,
165164
DatetimeArray,
166165
ExtensionArray,
167166
PeriodArray,
@@ -10578,47 +10577,8 @@ def corrwith(
1057810577
if numeric_only is lib.no_default and len(this.columns) < len(self.columns):
1057910578
com.deprecate_numeric_only_default(type(self), "corrwith")
1058010579

10581-
# GH46174: when other is a Series object and axis=0, we achieve a speedup over
10582-
# passing .corr() to .apply() by taking the columns as ndarrays and iterating
10583-
# over the transposition row-wise. Then we delegate the correlation coefficient
10584-
# computation and null-masking to np.corrcoef and np.isnan respectively,
10585-
# which are much faster. We exploit the fact that the Spearman correlation
10586-
# of two vectors is equal to the Pearson correlation of their ranks to use
10587-
# substantially the same method for Pearson and Spearman,
10588-
# just with intermediate argsorts on the latter.
1058910580
if isinstance(other, Series):
10590-
if axis == 0 and method in ["pearson", "spearman"]:
10591-
corrs = {}
10592-
if numeric_only:
10593-
cols = self.select_dtypes(include=np.number).columns
10594-
else:
10595-
cols = self.columns
10596-
k = other.values
10597-
k_mask = ~other.isna()
10598-
if isinstance(k, BaseMaskedArray):
10599-
k = k._data
10600-
if method == "pearson":
10601-
for col in cols:
10602-
val = self[col].values
10603-
nonnull_mask = ~self[col].isna() & k_mask
10604-
if isinstance(val, BaseMaskedArray):
10605-
val = val._data
10606-
corrs[col] = np.corrcoef(val[nonnull_mask], k[nonnull_mask])[
10607-
0, 1
10608-
]
10609-
else:
10610-
for col in cols:
10611-
val = self[col].values
10612-
nonnull_mask = ~self[col].isna() & k_mask
10613-
if isinstance(val, BaseMaskedArray):
10614-
val = val._data
10615-
corrs[col] = np.corrcoef(
10616-
libalgos.rank_1d(val[nonnull_mask]),
10617-
libalgos.rank_1d(k[nonnull_mask]),
10618-
)[0, 1]
10619-
return Series(corrs)
10620-
else:
10621-
return this.apply(lambda x: other.corr(x, method=method), axis=axis)
10581+
return this.apply(lambda x: other.corr(x, method=method), axis=axis)
1062210582

1062310583
if numeric_only_bool:
1062410584
other = other._get_numeric_data()

0 commit comments

Comments
 (0)