Skip to content

Commit 55da48f

Browse files
liwhpeterpanmj
liwh
authored andcommitted
BUG: Allow pairwise calcuation when comparing the column with itself (pandas-dev#25781)
1 parent e7e7b40 commit 55da48f

File tree

2 files changed

+21
-1
lines changed

2 files changed

+21
-1
lines changed

pandas/core/frame.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -9396,6 +9396,7 @@ def corr(
93969396
self,
93979397
method: str | Callable[[np.ndarray, np.ndarray], float] = "pearson",
93989398
min_periods: int = 1,
9399+
calculate_diagonal: bool = False,
93999400
) -> DataFrame:
94009401
"""
94019402
Compute pairwise correlation of columns, excluding NA/null values.
@@ -9416,6 +9417,10 @@ def corr(
94169417
Minimum number of observations required per pair of columns
94179418
to have a valid result. Currently only available for Pearson
94189419
and Spearman correlation.
9420+
calculate_diagonal : bool, optional
9421+
Whether to calculate pairwise correlation using supplied callable.
9422+
Ignored when method argument is not callable. If False, pairwise
9423+
correlation between a column and itself is default to 1.
94199424
94209425
Returns
94219426
-------
@@ -9465,7 +9470,7 @@ def corr(
94659470
valid = mask[i] & mask[j]
94669471
if valid.sum() < min_periods:
94679472
c = np.nan
9468-
elif i == j:
9473+
elif i == j and not calculate_diagonal:
94699474
c = 1.0
94709475
elif not valid.all():
94719476
c = corrf(ac[valid], bc[valid])

pandas/tests/frame/methods/test_cov_corr.py

+15
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,21 @@ def test_corr_min_periods_greater_than_length(self, method):
236236
)
237237
tm.assert_frame_equal(result, expected)
238238

239+
@pytest.mark.filterwarnings("ignore: An input array is constant")
240+
@td.skip_if_no_scipy
241+
@pytest.mark.parametrize("array_creator", [np.ones, np.zeros, np.random.random])
242+
def test_corr_diagonal_not_ones(self, array_creator):
243+
from scipy.stats import pearsonr
244+
245+
frame_size = 4
246+
df = DataFrame(array_creator((frame_size, frame_size)))
247+
cor_mat = df.corr(
248+
method=lambda x, y: pearsonr(x, y)[0], calculate_diagonal=True
249+
)
250+
result_diag = [cor_mat.loc[i, i] for i in range(frame_size)]
251+
expected_diag = [pearsonr(df[i], df[i])[0] for i in range(frame_size)]
252+
tm.assert_almost_equal(result_diag, expected_diag)
253+
239254

240255
class TestDataFrameCorrWith:
241256
def test_corrwith(self, datetime_frame):

0 commit comments

Comments
 (0)