-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
add uint64 support for some libgroupby funcs #28931
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,7 @@ ctypedef fused rank_t: | |
float64_t | ||
float32_t | ||
int64_t | ||
uint64_t | ||
object | ||
|
||
|
||
|
@@ -34,6 +35,7 @@ def group_last(rank_t[:, :] out, | |
rank_t val | ||
ndarray[rank_t, ndim=2] resx | ||
ndarray[int64_t, ndim=2] nobs | ||
bint runtime_error = False | ||
|
||
assert min_count == -1, "'min_count' only used in add and prod" | ||
|
||
|
@@ -106,11 +108,20 @@ def group_last(rank_t[:, :] out, | |
if nobs[i, j] == 0: | ||
if rank_t is int64_t: | ||
out[i, j] = NPY_NAT | ||
elif rank_t is uint64_t: | ||
runtime_error = True | ||
break | ||
else: | ||
out[i, j] = NAN | ||
else: | ||
out[i, j] = resx[i, j] | ||
|
||
if runtime_error: | ||
# We cannot raise directly above because that is within a nogil | ||
# block. | ||
raise RuntimeError("empty group with uint64_t") | ||
|
||
|
||
group_last_float64 = group_last["float64_t"] | ||
group_last_float32 = group_last["float32_t"] | ||
group_last_int64 = group_last["int64_t"] | ||
|
@@ -132,6 +143,7 @@ def group_nth(rank_t[:, :] out, | |
rank_t val | ||
ndarray[rank_t, ndim=2] resx | ||
ndarray[int64_t, ndim=2] nobs | ||
bint runtime_error = False | ||
|
||
assert min_count == -1, "'min_count' only used in add and prod" | ||
|
||
|
@@ -199,11 +211,19 @@ def group_nth(rank_t[:, :] out, | |
if nobs[i, j] == 0: | ||
if rank_t is int64_t: | ||
out[i, j] = NPY_NAT | ||
elif rank_t is uint64_t: | ||
runtime_error = True | ||
break | ||
else: | ||
out[i, j] = NAN | ||
else: | ||
out[i, j] = resx[i, j] | ||
|
||
if runtime_error: | ||
# We cannot raise directly above because that is within a nogil | ||
# block. | ||
raise RuntimeError("empty group with uint64_t") | ||
|
||
|
||
group_nth_float64 = group_nth["float64_t"] | ||
group_nth_float32 = group_nth["float32_t"] | ||
|
@@ -282,12 +302,16 @@ def group_rank(float64_t[:, :] out, | |
if ascending ^ (na_option == 'top'): | ||
if rank_t is int64_t: | ||
nan_fill_val = np.iinfo(np.int64).max | ||
elif rank_t is uint64_t: | ||
nan_fill_val = np.iinfo(np.uint64).max | ||
else: | ||
nan_fill_val = np.inf | ||
order = (masked_vals, mask, labels) | ||
else: | ||
if rank_t is int64_t: | ||
nan_fill_val = np.iinfo(np.int64).min | ||
elif rank_t is uint64_t: | ||
nan_fill_val = 0 | ||
else: | ||
nan_fill_val = -np.inf | ||
|
||
|
@@ -397,6 +421,7 @@ def group_rank(float64_t[:, :] out, | |
group_rank_float64 = group_rank["float64_t"] | ||
group_rank_float32 = group_rank["float32_t"] | ||
group_rank_int64 = group_rank["int64_t"] | ||
group_rank_uint64 = group_rank["uint64_t"] | ||
# Note: we do not have a group_rank_object because that would require a | ||
# not-nogil implementation, see GH#19560 | ||
|
||
|
@@ -410,6 +435,7 @@ ctypedef fused groupby_t: | |
float64_t | ||
float32_t | ||
int64_t | ||
uint64_t | ||
|
||
|
||
@cython.wraparound(False) | ||
|
@@ -426,6 +452,7 @@ def group_max(groupby_t[:, :] out, | |
Py_ssize_t i, j, N, K, lab, ncounts = len(counts) | ||
groupby_t val, count, nan_val | ||
ndarray[groupby_t, ndim=2] maxx, nobs | ||
bint runtime_error = False | ||
|
||
assert min_count == -1, "'min_count' only used in add and prod" | ||
|
||
|
@@ -439,6 +466,9 @@ def group_max(groupby_t[:, :] out, | |
# Note: evaluated at compile-time | ||
maxx[:] = -_int64_max | ||
nan_val = NPY_NAT | ||
elif groupby_t is uint64_t: | ||
maxx[:] = 0 | ||
nan_val = 0 # We better not need this! TODO: Can we use e.g. NULL? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you clarify what this is? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We have to define a value to treat as NA, and it has to be of the appropriate dtype. But for uint64 there is no such value, so defining it here is just as a placeholder. I'll clarify the comment in the code. |
||
else: | ||
maxx[:] = -np.inf | ||
nan_val = NAN | ||
|
@@ -470,10 +500,18 @@ def group_max(groupby_t[:, :] out, | |
for i in range(ncounts): | ||
for j in range(K): | ||
if nobs[i, j] == 0: | ||
if groupby_t is uint64_t: | ||
runtime_error = True | ||
break | ||
out[i, j] = nan_val | ||
else: | ||
out[i, j] = maxx[i, j] | ||
|
||
if runtime_error: | ||
# We cannot raise directly above because that is within a nogil | ||
# block. | ||
raise RuntimeError("empty group with uint64_t") | ||
|
||
|
||
@cython.wraparound(False) | ||
@cython.boundscheck(False) | ||
|
@@ -489,6 +527,7 @@ def group_min(groupby_t[:, :] out, | |
Py_ssize_t i, j, N, K, lab, ncounts = len(counts) | ||
groupby_t val, count, nan_val | ||
ndarray[groupby_t, ndim=2] minx, nobs | ||
bint runtime_error = False | ||
|
||
assert min_count == -1, "'min_count' only used in add and prod" | ||
|
||
|
@@ -501,6 +540,9 @@ def group_min(groupby_t[:, :] out, | |
if groupby_t is int64_t: | ||
minx[:] = _int64_max | ||
nan_val = NPY_NAT | ||
elif groupby_t is uint64_t: | ||
minx[:] = np.iinfo(np.uint64).max | ||
nan_val = 0 # We better not need this! TODO: Can we use e.g. NULL? | ||
else: | ||
minx[:] = np.inf | ||
nan_val = NAN | ||
|
@@ -532,10 +574,18 @@ def group_min(groupby_t[:, :] out, | |
for i in range(ncounts): | ||
for j in range(K): | ||
if nobs[i, j] == 0: | ||
if groupby_t is uint64_t: | ||
runtime_error = True | ||
break | ||
out[i, j] = nan_val | ||
else: | ||
out[i, j] = minx[i, j] | ||
|
||
if runtime_error: | ||
# We cannot raise directly above because that is within a nogil | ||
# block. | ||
raise RuntimeError("empty group with uint64_t") | ||
|
||
|
||
@cython.boundscheck(False) | ||
@cython.wraparound(False) | ||
|
@@ -575,6 +625,8 @@ def group_cummin(groupby_t[:, :] out, | |
accum = np.empty((ngroups, K), dtype=np.asarray(values).dtype) | ||
if groupby_t is int64_t: | ||
accum[:] = _int64_max | ||
elif groupby_t is uint64_t: | ||
accum[:] = np.iinfo(np.uint64).max | ||
else: | ||
accum[:] = np.inf | ||
|
||
|
@@ -642,6 +694,8 @@ def group_cummax(groupby_t[:, :] out, | |
accum = np.empty((ngroups, K), dtype=np.asarray(values).dtype) | ||
if groupby_t is int64_t: | ||
accum[:] = -_int64_max | ||
elif groupby_t is uint64_t: | ||
accum[:] = 0 | ||
else: | ||
accum[:] = -np.inf | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
where are these caught?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this would get caught by one of the
except Exception
s in the groupby code that i'm trying to make more specific.