Skip to content

Commit 14e7f51

Browse files
author
Daniel Saxton
committed
Avoid unnecessary ranking
1 parent 1118a3c commit 14e7f51

File tree

1 file changed

+12
-6
lines changed

1 file changed

+12
-6
lines changed

pandas/_libs/algos.pyx

+12-6
Original file line numberDiff line numberDiff line change
@@ -309,21 +309,27 @@ def nancorr_spearman(const float64_t[:, :] mat, Py_ssize_t minp=1):
309309
mask = np.isfinite(mat).view(np.uint8)
310310

311311
ranked_mat = np.empty((N, K), dtype=np.float64)
312-
313-
for i in range(K):
314-
ranked_mat[:, i] = rank_1d_float64(mat[:, i])
312+
cached_ranks = set({})
315313

316314
for xi in range(K):
317315
for yi in range(xi + 1):
318316
nobs = 0
319317
# Keep track of whether the two columns have the same
320-
# missing pattern, if not we need to recalculate ranks
318+
# missing pattern, if so save off the ranks
321319
same_miss_pat = True
322320
for i in range(N):
323321
same_miss_pat &= not (mask[i, xi] ^ mask[i, yi])
324322
if mask[i, xi] and mask[i, yi]:
325323
nobs += 1
326324

325+
if same_miss_pat:
326+
if xi not in cached_ranks:
327+
ranked_mat[:, xi] = rank_1d_float64(mat[:, xi])
328+
cached_ranks.add(xi)
329+
if yi not in cached_ranks:
330+
ranked_mat[:, yi] = rank_1d_float64(mat[:, yi])
331+
cached_ranks.add(yi)
332+
327333
if nobs < minp:
328334
result[xi, yi] = result[yi, xi] = NaN
329335
else:
@@ -333,8 +339,8 @@ def nancorr_spearman(const float64_t[:, :] mat, Py_ssize_t minp=1):
333339

334340
for i in range(N):
335341
if mask[i, xi] and mask[i, yi]:
336-
maskedx[j] = ranked_mat[i, xi]
337-
maskedy[j] = ranked_mat[i, yi]
342+
maskedx[j] = ranked_mat[i, xi] if same_miss_pat else mat[i, xi]
343+
maskedy[j] = ranked_mat[i, yi] if same_miss_pat else mat[i, yi]
338344
j += 1
339345

340346
if not same_miss_pat:

0 commit comments

Comments
 (0)