Skip to content

Commit 49cd89b

Browse files
committed
Merge pull request #11560 from roman-khomenko/roman-khomenko/fix-kendall-for-num-and-bool
BUG: Fix bug for kendall corr when in DF num and bool
2 parents a6d50bc + f6b11fe commit 49cd89b

File tree

3 files changed

+22
-3
lines changed

3 files changed

+22
-3
lines changed

doc/source/whatsnew/v0.17.1.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ Bug Fixes
123123

124124
- Bug in ``DataFrame.to_dict()`` produces a ``np.datetime64`` object instead of ``Timestamp`` when only datetime is present in data (:issue:`11327`)
125125

126-
126+
- Bug in ``DataFrame.corr()`` raises exception when computes Kendall correlation for DataFrames with boolean and not boolean columns (:issue:`11560`)
127127

128128
- Bug in the link-time error caused by C ``inline`` functions on FreeBSD 10+ (with ``clang``) (:issue:`10510`)
129129
- Bug in ``DataFrame.to_csv`` in passing through arguments for formatting ``MultiIndexes``, including ``date_format`` (:issue:`7791`)

pandas/core/frame.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -4411,16 +4411,21 @@ def corr(self, method='pearson', min_periods=1):
44114411
else:
44124412
if min_periods is None:
44134413
min_periods = 1
4414-
mat = mat.T
4414+
mat = com._ensure_float64(mat).T
44154415
corrf = nanops.get_corr_func(method)
44164416
K = len(cols)
44174417
correl = np.empty((K, K), dtype=float)
44184418
mask = np.isfinite(mat)
44194419
for i, ac in enumerate(mat):
44204420
for j, bc in enumerate(mat):
4421+
if i > j:
4422+
continue
4423+
44214424
valid = mask[i] & mask[j]
44224425
if valid.sum() < min_periods:
44234426
c = NA
4427+
elif i == j:
4428+
c = 1.
44244429
elif not valid.all():
44254430
c = corrf(ac[valid], bc[valid])
44264431
else:

pandas/tests/test_frame.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -8009,12 +8009,14 @@ def test_corr_nooverlap(self):
80098009
# nothing in common
80108010
for meth in ['pearson', 'kendall', 'spearman']:
80118011
df = DataFrame({'A': [1, 1.5, 1, np.nan, np.nan, np.nan],
8012-
'B': [np.nan, np.nan, np.nan, 1, 1.5, 1]})
8012+
'B': [np.nan, np.nan, np.nan, 1, 1.5, 1],
8013+
'C': [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]})
80138014
rs = df.corr(meth)
80148015
self.assertTrue(isnull(rs.ix['A', 'B']))
80158016
self.assertTrue(isnull(rs.ix['B', 'A']))
80168017
self.assertEqual(rs.ix['A', 'A'], 1)
80178018
self.assertEqual(rs.ix['B', 'B'], 1)
8019+
self.assertTrue(isnull(rs.ix['C', 'C']))
80188020

80198021
def test_corr_constant(self):
80208022
tm._skip_if_no_scipy()
@@ -8035,6 +8037,18 @@ def test_corr_int(self):
80358037
df3.cov()
80368038
df3.corr()
80378039

8040+
def test_corr_int_and_boolean(self):
8041+
tm._skip_if_no_scipy()
8042+
8043+
# when dtypes of pandas series are different
8044+
# then ndarray will have dtype=object,
8045+
# so it need to be properly handled
8046+
df = DataFrame({"a": [True, False], "b": [1, 0]})
8047+
8048+
expected = DataFrame(np.ones((2, 2)), index=['a', 'b'], columns=['a', 'b'])
8049+
for meth in ['pearson', 'kendall', 'spearman']:
8050+
assert_frame_equal(df.corr(meth), expected)
8051+
80388052
def test_cov(self):
80398053
# min_periods no NAs (corner case)
80408054
expected = self.frame.cov()

0 commit comments

Comments
 (0)