Skip to content

PERF: support parallel calculation of nancorr #24795

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 75 additions & 34 deletions pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@

import cython
from cython import Py_ssize_t
from cython.parallel import prange

from libc.stdlib cimport malloc, free
from libc.string cimport memmove
from libc.math cimport fabs, sqrt
from cpython cimport bool
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does cython treat this differently from built-in bool?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this is the type used by cython to declare a python boolean (an object which means many operations can only be taken on it when gil is taken).


import numpy as np
cimport numpy as cnp
Expand Down Expand Up @@ -230,14 +232,15 @@ def kth_smallest(numeric[:] a, Py_ssize_t k) -> numeric:

@cython.boundscheck(False)
@cython.wraparound(False)
def nancorr(ndarray[float64_t, ndim=2] mat, bint cov=0, minp=None):
def nancorr(float64_t[:, :] mat, bint cov=0, minp=None, bool parallel=False):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 on changing to memoryviews where viable. if we don't alter mat inplace, might want to add the const modifier.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

cdef:
Py_ssize_t i, j, xi, yi, N, K
bint minpv
ndarray[float64_t, ndim=2] result
ndarray[uint8_t, ndim=2] mask
float64_t[:, :] result
uint8_t[:, :] mask
int64_t nobs = 0
float64_t vx, vy, sumx, sumy, sumxx, sumyy, meanx, meany, divisor
int64_t blah = 0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

whats this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unnecessary, will be removed.


N, K = (<object>mat).shape

Expand All @@ -249,44 +252,82 @@ def nancorr(ndarray[float64_t, ndim=2] mat, bint cov=0, minp=None):
result = np.empty((K, K), dtype=np.float64)
mask = np.isfinite(mat).view(np.uint8)

with nogil:
for xi in range(K):
for yi in range(xi + 1):
nobs = sumxx = sumyy = sumx = sumy = 0
for i in range(N):
if mask[i, xi] and mask[i, yi]:
vx = mat[i, xi]
vy = mat[i, yi]
nobs += 1
sumx += vx
sumy += vy
if parallel:
with nogil:
for xi in prange(K, schedule='dynamic'):
nancorr_single_row(mat, N, K, result, xi, mask, minpv, cov)
else:
with nogil:
for xi in range(K):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could this be collapsed with something like range_func = range(K) if not parallel else prange(K, schedule='dynamic')?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAIK No, because prange is not really a function, it is converted into a #pragma of sorts in the c code.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. I think there is an issue on Cython's GH about making a maybe_nogil that would be semantically similar to maybe_prange. Might be worth commenting there

nancorr_single_row(mat, N, K, result, xi, mask, minpv, cov)

if nobs < minpv:
result[xi, yi] = result[yi, xi] = NaN
else:
meanx = sumx / nobs
meany = sumy / nobs
return np.asarray(result)

# now the cov numerator
sumx = 0

for i in range(N):
if mask[i, xi] and mask[i, yi]:
vx = mat[i, xi] - meanx
vy = mat[i, yi] - meany
@cython.boundscheck(False)
@cython.wraparound(False)
cdef void nancorr_single_row(float64_t[:, :] mat,
Py_ssize_t N,
Py_ssize_t K,
float64_t[:, :] result,
Py_ssize_t xi,
uint8_t[:, :] mask,
bint minpv,
bint cov=0) nogil:
for yi in range(xi + 1):
nancorr_single(mat, N, K, result, xi, yi, mask, minpv, cov)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if nancorr_single isn't used elsewhere, I'd inline it here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍


sumx += vx * vy
sumxx += vx * vx
sumyy += vy * vy

divisor = (nobs - 1.0) if cov else sqrt(sumxx * sumyy)
@cython.boundscheck(False)
@cython.wraparound(False)
cdef void nancorr_single(float64_t[:, :] mat,
Py_ssize_t N,
Py_ssize_t K,
float64_t[:, :] result,
Py_ssize_t xi,
Py_ssize_t yi,
uint8_t[:, :] mask,
bint minpv,
bint cov=0) nogil:
cdef:
Py_ssize_t i, j
int64_t nobs = 0
float64_t vx, vy, sumx, sumy, sumxx, sumyy, meanx, meany, divisor

if divisor != 0:
result[xi, yi] = result[yi, xi] = sumx / divisor
else:
result[xi, yi] = result[yi, xi] = NaN
nobs = sumxx = sumyy = sumx = sumy = 0
for i in range(N):
if mask[i, xi] and mask[i, yi]:
vx = mat[i, xi]
vy = mat[i, yi]
nobs += 1
sumx += vx
sumy += vy

if nobs < minpv:
result[xi, yi] = result[yi, xi] = NaN
else:
meanx = sumx / nobs
meany = sumy / nobs

# now the cov numerator
sumx = 0

for i in range(N):
if mask[i, xi] and mask[i, yi]:
vx = mat[i, xi] - meanx
vy = mat[i, yi] - meany

sumx += vx * vy
sumxx += vx * vx
sumyy += vy * vy

divisor = (nobs - 1.0) if cov else sqrt(sumxx * sumyy)

if divisor != 0:
result[xi, yi] = result[yi, xi] = sumx / divisor
else:
result[xi, yi] = result[yi, xi] = NaN

return result

# ----------------------------------------------------------------------
# Pairwise Spearman correlation
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6996,7 +6996,8 @@ def corr(self, method='pearson', min_periods=1):
mat = numeric_df.values

if method == 'pearson':
correl = libalgos.nancorr(ensure_float64(mat), minp=min_periods)
correl = libalgos.nancorr(ensure_float64(mat), minp=min_periods,
parallel=True)
elif method == 'spearman':
correl = libalgos.nancorr_spearman(ensure_float64(mat),
minp=min_periods)
Expand Down
13 changes: 8 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import os
from os.path import join as pjoin

import numpy
import pkg_resources
import platform
from distutils.sysconfig import get_config_var
Expand Down Expand Up @@ -677,10 +678,11 @@ def srcpath(name=None, suffix='.pyx', subdir='src'):
obj = Extension('pandas.{name}'.format(name=name),
sources=sources,
depends=data.get('depends', []),
include_dirs=include,
include_dirs=include + [numpy.get_include()],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this different from the pkg_resources version that is already in there?

language=data.get('language', 'c'),
define_macros=data.get('macros', macros),
extra_compile_args=extra_compile_args)
extra_compile_args=['-fopenmp'] + extra_compile_args,
extra_link_args=['-fopenmp'])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this platform-dependent in any way? i.e. do we need to check that it is available?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this is the gcc flag to link against openmp and I think clang uses the same flag but I'm not sure.


extensions.append(obj)

Expand All @@ -704,12 +706,13 @@ def srcpath(name=None, suffix='.pyx', subdir='src'):
np_datetime_sources),
include_dirs=['pandas/_libs/src/ujson/python',
'pandas/_libs/src/ujson/lib',
'pandas/_libs/src/datetime'],
extra_compile_args=(['-D_GNU_SOURCE'] +
'pandas/_libs/src/datetime',
numpy.get_include()],
extra_compile_args=(['-D_GNU_SOURCE', '-fopenmp'] +
extra_compile_args),
extra_link_args=['-fopenmp'],
define_macros=macros)


extensions.append(ujson_ext)

# ----------------------------------------------------------------------
Expand Down