Skip to content

REF: de-duplicate cython funcs #46735

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion pandas/_libs/algos.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from pandas._libs.dtypes cimport numeric_t
from pandas._libs.dtypes cimport (
numeric_object_t,
numeric_t,
)


cdef numeric_t kth_smallest_c(numeric_t* arr, Py_ssize_t k, Py_ssize_t n) nogil
Expand All @@ -10,3 +13,10 @@ cdef enum TiebreakEnumType:
TIEBREAK_FIRST
TIEBREAK_FIRST_DESCENDING
TIEBREAK_DENSE


cdef numeric_object_t get_rank_nan_fill_val(
bint rank_nans_highest,
numeric_object_t val,
bint is_datetimelike=*,
)
13 changes: 10 additions & 3 deletions pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -822,13 +822,17 @@ def is_monotonic(ndarray[numeric_object_t, ndim=1] arr, bint timelike):

cdef numeric_object_t get_rank_nan_fill_val(
bint rank_nans_highest,
numeric_object_t[:] _=None
numeric_object_t val,
bint is_datetimelike=False,
):
"""
Return the value we'll use to represent missing values when sorting depending
on if we'd like missing values to end up at the top/bottom. (The second parameter
is unused, but needed for fused type specialization)
"""
if numeric_object_t is int64_t and is_datetimelike and not rank_nans_highest:
return NPY_NAT + 1

if rank_nans_highest:
if numeric_object_t is object:
return Infinity()
Expand All @@ -854,6 +858,9 @@ cdef numeric_object_t get_rank_nan_fill_val(
if numeric_object_t is object:
return NegInfinity()
elif numeric_object_t is int64_t:
# Note(jbrockmendel) 2022-03-15 for reasons unknown, using util.INT64_MIN
# instead of NPY_NAT here causes build warnings and failure in
# test_cummax_i8_at_implementation_bound
return NPY_NAT
elif numeric_object_t is int32_t:
return util.INT32_MIN
Expand Down Expand Up @@ -975,7 +982,7 @@ def rank_1d(
# will flip the ordering to still end up with lowest rank.
# Symmetric logic applies to `na_option == 'bottom'`
nans_rank_highest = ascending ^ (na_option == 'top')
nan_fill_val = get_rank_nan_fill_val[numeric_object_t](nans_rank_highest)
nan_fill_val = get_rank_nan_fill_val(nans_rank_highest, <numeric_object_t>0)
if nans_rank_highest:
order = [masked_vals, mask]
else:
Expand Down Expand Up @@ -1335,7 +1342,7 @@ def rank_2d(

nans_rank_highest = ascending ^ (na_option == 'top')
if check_mask:
nan_fill_val = get_rank_nan_fill_val[numeric_object_t](nans_rank_highest)
nan_fill_val = get_rank_nan_fill_val(nans_rank_highest, <numeric_object_t>0)

if numeric_object_t is object:
mask = missing.isnaobj2d(values).view(np.uint8)
Expand Down
41 changes: 12 additions & 29 deletions pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,10 @@ from numpy.math cimport NAN
cnp.import_array()

from pandas._libs cimport util
from pandas._libs.algos cimport kth_smallest_c
from pandas._libs.algos cimport (
get_rank_nan_fill_val,
kth_smallest_c,
)

from pandas._libs.algos import (
ensure_platform_int,
Expand Down Expand Up @@ -989,36 +992,16 @@ cdef inline bint _treat_as_na(numeric_object_t val, bint is_datetimelike) nogil:
return False


cdef numeric_t _get_min_or_max(numeric_t val, bint compute_max, bint is_datetimelike):
cdef numeric_object_t _get_min_or_max(numeric_object_t val, bint compute_max, bint is_datetimelike):
"""
Find either the min or the max supported by numeric_t; 'val' is a placeholder
to effectively make numeric_t an argument.
Find either the min or the max supported by numeric_object_t; 'val' is a
placeholder to effectively make numeric_object_t an argument.
"""
if numeric_t is int64_t:
if compute_max and is_datetimelike:
return -_int64_max
# Note(jbrockmendel) 2022-03-15 for reasons unknown, using util.INT64_MIN
# instead of NPY_NAT here causes build warnings and failure in
# test_cummax_i8_at_implementation_bound
return NPY_NAT if compute_max else util.INT64_MAX
elif numeric_t is int32_t:
return util.INT32_MIN if compute_max else util.INT32_MAX
elif numeric_t is int16_t:
return util.INT16_MIN if compute_max else util.INT16_MAX
elif numeric_t is int8_t:
return util.INT8_MIN if compute_max else util.INT8_MAX

elif numeric_t is uint64_t:
return 0 if compute_max else util.UINT64_MAX
elif numeric_t is uint32_t:
return 0 if compute_max else util.UINT32_MAX
elif numeric_t is uint16_t:
return 0 if compute_max else util.UINT16_MAX
elif numeric_t is uint8_t:
return 0 if compute_max else util.UINT8_MAX

else:
return -np.inf if compute_max else np.inf
return get_rank_nan_fill_val(
not compute_max,
val=val,
is_datetimelike=is_datetimelike,
)


cdef numeric_t _get_na_val(numeric_t val, bint is_datetimelike):
Expand Down