Skip to content

[POC] PERF: 1d version of the cython grouped aggregation algorithms #39861

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,117 @@ group_mean_float32 = _group_mean['float']
group_mean_float64 = _group_mean['double']


@cython.wraparound(False)
@cython.boundscheck(False)
def _group_mean_transposed(floating[:, :] out,
int64_t[:] counts,
ndarray[floating, ndim=2] values,
const int64_t[:] labels,
Py_ssize_t min_count=-1):
cdef:
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
floating val, count, y, t
floating[:, :] sumx, compensation
int64_t[:, :] nobs
Py_ssize_t len_values = len(values), len_labels = len(labels)

assert min_count == -1, "'min_count' only used in add and prod"

# if len_values != len_labels:
# raise ValueError("len(index) != len(labels)")

nobs = np.zeros((<object>out).shape, dtype=np.int64)
sumx = np.zeros_like(out)
compensation = np.zeros_like(out)

K, N = (<object>values).shape

if N != len_labels:
raise ValueError("len(index) != len(labels)")

with nogil:
for i in range(N):
lab = labels[i]
if lab < 0:
continue

counts[lab] += 1
for j in range(K):
val = values[j, i]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would it make a difference if we defined temp_values = values[:, i] (or values[i] in non-transposed version) outside the j-loop and then set val = temp_values[j] inside the loop?

definitely seems weird that this would have a big perf impact

# not nan
if val == val:
nobs[j, lab] += 1
y = val - compensation[j, lab]
t = sumx[j, lab] + y
compensation[j, lab] = t - sumx[j, lab] - y
sumx[j, lab] = t

for i in range(ncounts):
for j in range(K):
count = nobs[j, i]
if nobs[j, i] == 0:
out[j, i] = NAN
else:
out[j, i] = sumx[j, i] / count


group_mean_transposed_float32 = _group_mean_transposed['float']
group_mean_transposed_float64 = _group_mean_transposed['double']


@cython.wraparound(False)
@cython.boundscheck(False)
def _group_mean_1d(floating[:] out,
int64_t[:] counts,
ndarray[floating, ndim=1] values,
const int64_t[:] labels,
Py_ssize_t min_count=-1):
cdef:
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
floating val, count, y, t
floating[:] sumx, compensation
int64_t[:] nobs
Py_ssize_t len_values = len(values), len_labels = len(labels)

assert min_count == -1, "'min_count' only used in add and prod"

if len_values != len_labels:
raise ValueError("len(index) != len(labels)")

nobs = np.zeros((<object>out).shape, dtype=np.int64)
sumx = np.zeros_like(out)
compensation = np.zeros_like(out)

N, = (<object>values).shape

with nogil:
for i in range(N):
lab = labels[i]
if lab < 0:
continue

counts[lab] += 1
val = values[i]
# not nan
if val == val:
nobs[lab] += 1
y = val - compensation[lab]
t = sumx[lab] + y
compensation[lab] = t - sumx[lab] - y
sumx[lab] = t

for i in range(ncounts):
count = nobs[i]
if nobs[i] == 0:
out[i] = NAN
else:
out[i] = sumx[i] / count


group_mean_1d_float32 = _group_mean_1d['float']
group_mean_1d_float64 = _group_mean_1d['double']


@cython.wraparound(False)
@cython.boundscheck(False)
def _group_ohlc(floating[:, :] out,
Expand Down