@@ -393,6 +393,100 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1) -> ndarr
393
393
return result
394
394
395
395
396
+ # ----------------------------------------------------------------------
397
+ # Kendall correlation
398
+ # Wikipedia article: https://en.wikipedia.org/wiki/Kendall_rank_correlation_coefficient
399
+
400
+ @ cython.boundscheck (False )
401
+ @ cython.wraparound (False )
402
+ def nancorr_kendall (ndarray[float64_t , ndim = 2 ] mat, Py_ssize_t minp = 1 ) -> ndarray:
403
+ """
404
+ Perform kendall correlation on a 2d array
405
+
406
+ Parameters
407
+ ----------
408
+ mat : np.ndarray[float64_t , ndim = 2 ]
409
+ Array to compute kendall correlation on
410
+ minp : int , default 1
411
+ Minimum number of observations required per pair of columns
412
+ to have a valid result.
413
+
414
+ Returns
415
+ -------
416
+ numpy.ndarray[float64_t , ndim = 2 ]
417
+ Correlation matrix
418
+ """
419
+ cdef:
420
+ Py_ssize_t i , j , k , xi , yi , N , K
421
+ ndarray[float64_t , ndim = 2 ] result
422
+ ndarray[float64_t , ndim = 2 ] ranked_mat
423
+ ndarray[uint8_t , ndim = 2 ] mask
424
+ float64_t currj
425
+ ndarray[uint8_t , ndim = 1 ] valid
426
+ ndarray[int64_t] sorted_idxs
427
+ ndarray[float64_t , ndim = 1 ] col
428
+ int64_t n_concordant
429
+ int64_t total_concordant = 0
430
+ int64_t total_discordant = 0
431
+ float64_t kendall_tau
432
+ int64_t n_obs
433
+ const int64_t[:] labels_n
434
+
435
+ N , K = (< object > mat).shape
436
+
437
+ result = np.empty((K, K), dtype = np.float64)
438
+ mask = np.isfinite(mat)
439
+
440
+ ranked_mat = np.empty((N, K), dtype = np.float64)
441
+ # For compatibility when calling rank_1d
442
+ labels_n = np.zeros(N, dtype = np.int64)
443
+
444
+ for i in range(K ):
445
+ ranked_mat[:, i] = rank_1d(mat[:, i], labels_n)
446
+
447
+ for xi in range (K):
448
+ sorted_idxs = ranked_mat[:, xi].argsort()
449
+ ranked_mat = ranked_mat[sorted_idxs]
450
+ mask = mask[sorted_idxs]
451
+ for yi in range (xi + 1 , K):
452
+ valid = mask[:, xi] & mask[:, yi]
453
+ if valid.sum() < minp:
454
+ result[xi, yi] = NaN
455
+ result[yi, xi] = NaN
456
+ else :
457
+ # Get columns and order second column using 1st column ranks
458
+ if not valid.all():
459
+ col = ranked_mat[valid.nonzero()][:, yi]
460
+ else :
461
+ col = ranked_mat[:, yi]
462
+ n_obs = col.shape[0 ]
463
+ total_concordant = 0
464
+ total_discordant = 0
465
+ for j in range (n_obs - 1 ):
466
+ currj = col[j]
467
+ # Count num concordant and discordant pairs
468
+ n_concordant = 0
469
+ for k in range (j, n_obs):
470
+ if col[k] > currj:
471
+ n_concordant += 1
472
+ total_concordant += n_concordant
473
+ total_discordant += (n_obs - 1 - j - n_concordant)
474
+ # Note: we do total_concordant+total_discordant here which is
475
+ # equivalent to the C(n, 2), the total # of pairs,
476
+ # listed on wikipedia
477
+ kendall_tau = (total_concordant - total_discordant) / \
478
+ (total_concordant + total_discordant)
479
+ result[xi, yi] = kendall_tau
480
+ result[yi, xi] = kendall_tau
481
+
482
+ if mask[:, xi].sum() > minp:
483
+ result[xi, xi] = 1
484
+ else :
485
+ result[xi, xi] = NaN
486
+
487
+ return result
488
+
489
+
396
490
# ----------------------------------------------------------------------
397
491
398
492
ctypedef fused algos_t:
0 commit comments