@@ -326,8 +326,12 @@ def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None):
326
326
Py_ssize_t i, j, xi, yi, N, K
327
327
bint minpv
328
328
float64_t[:, ::1 ] result
329
+ # Initialize to None since we only use in the no missing value case
330
+ float64_t[::1 ] means= None , ssqds= None
329
331
ndarray[uint8_t, ndim= 2 ] mask
332
+ bint no_nans
330
333
int64_t nobs = 0
334
+ float64_t mean, ssqd, val
331
335
float64_t vx, vy, dx, dy, meanx, meany, divisor, ssqdmx, ssqdmy, covxy
332
336
333
337
N, K = (< object > mat).shape
@@ -339,25 +343,57 @@ def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None):
339
343
340
344
result = np.empty((K, K), dtype = np.float64)
341
345
mask = np.isfinite(mat).view(np.uint8)
346
+ no_nans = mask.all()
347
+
348
+ # Computing the online means and variances is expensive - so if possible we can
349
+ # precompute these and avoid repeating the computations each time we handle
350
+ # an (xi, yi) pair
351
+ if no_nans:
352
+ means = np.empty(K, dtype = np.float64)
353
+ ssqds = np.empty(K, dtype = np.float64)
354
+
355
+ with nogil:
356
+ for j in range (K):
357
+ ssqd = mean = 0
358
+ for i in range (N):
359
+ val = mat[i, j]
360
+ dx = val - mean
361
+ mean += 1 / (i + 1 ) * dx
362
+ ssqd += (val - mean) * dx
363
+
364
+ means[j] = mean
365
+ ssqds[j] = ssqd
342
366
343
367
with nogil:
344
368
for xi in range (K):
345
369
for yi in range (xi + 1 ):
346
- # Welford's method for the variance-calculation
347
- # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
348
- nobs = ssqdmx = ssqdmy = covxy = meanx = meany = 0
349
- for i in range (N):
350
- if mask[i, xi] and mask[i, yi]:
370
+ covxy = 0
371
+ if no_nans:
372
+ for i in range (N):
351
373
vx = mat[i, xi]
352
374
vy = mat[i, yi]
353
- nobs += 1
354
- dx = vx - meanx
355
- dy = vy - meany
356
- meanx += 1 / nobs * dx
357
- meany += 1 / nobs * dy
358
- ssqdmx += (vx - meanx) * dx
359
- ssqdmy += (vy - meany) * dy
360
- covxy += (vx - meanx) * dy
375
+ covxy += (vx - means[xi]) * (vy - means[yi])
376
+
377
+ ssqdmx = ssqds[xi]
378
+ ssqdmy = ssqds[yi]
379
+ nobs = N
380
+
381
+ else :
382
+ nobs = ssqdmx = ssqdmy = covxy = meanx = meany = 0
383
+ for i in range (N):
384
+ # Welford's method for the variance-calculation
385
+ # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
386
+ if mask[i, xi] and mask[i, yi]:
387
+ vx = mat[i, xi]
388
+ vy = mat[i, yi]
389
+ nobs += 1
390
+ dx = vx - meanx
391
+ dy = vy - meany
392
+ meanx += 1 / nobs * dx
393
+ meany += 1 / nobs * dy
394
+ ssqdmx += (vx - meanx) * dx
395
+ ssqdmy += (vy - meany) * dy
396
+ covxy += (vx - meanx) * dy
361
397
362
398
if nobs < minpv:
363
399
result[xi, yi] = result[yi, xi] = NaN
0 commit comments