Skip to content

Commit 58f3fb8

Browse files
jbrockmendelyehoshuadimarsky
authored andcommitted
REF: de-duplicate cython funcs (pandas-dev#46735)
1 parent a535a69 commit 58f3fb8

File tree

3 files changed

+33
-33
lines changed

3 files changed

+33
-33
lines changed

pandas/_libs/algos.pxd

+11-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
from pandas._libs.dtypes cimport numeric_t
1+
from pandas._libs.dtypes cimport (
2+
numeric_object_t,
3+
numeric_t,
4+
)
25

36

47
cdef numeric_t kth_smallest_c(numeric_t* arr, Py_ssize_t k, Py_ssize_t n) nogil
@@ -10,3 +13,10 @@ cdef enum TiebreakEnumType:
1013
TIEBREAK_FIRST
1114
TIEBREAK_FIRST_DESCENDING
1215
TIEBREAK_DENSE
16+
17+
18+
cdef numeric_object_t get_rank_nan_fill_val(
19+
bint rank_nans_highest,
20+
numeric_object_t val,
21+
bint is_datetimelike=*,
22+
)

pandas/_libs/algos.pyx

+10-3
Original file line numberDiff line numberDiff line change
@@ -822,13 +822,17 @@ def is_monotonic(ndarray[numeric_object_t, ndim=1] arr, bint timelike):
822822

823823
cdef numeric_object_t get_rank_nan_fill_val(
824824
bint rank_nans_highest,
825-
numeric_object_t[:] _=None
825+
numeric_object_t val,
826+
bint is_datetimelike=False,
826827
):
827828
"""
828829
Return the value we'll use to represent missing values when sorting depending
829830
on if we'd like missing values to end up at the top/bottom. (The second parameter
830831
is unused, but needed for fused type specialization)
831832
"""
833+
if numeric_object_t is int64_t and is_datetimelike and not rank_nans_highest:
834+
return NPY_NAT + 1
835+
832836
if rank_nans_highest:
833837
if numeric_object_t is object:
834838
return Infinity()
@@ -854,6 +858,9 @@ cdef numeric_object_t get_rank_nan_fill_val(
854858
if numeric_object_t is object:
855859
return NegInfinity()
856860
elif numeric_object_t is int64_t:
861+
# Note(jbrockmendel) 2022-03-15 for reasons unknown, using util.INT64_MIN
862+
# instead of NPY_NAT here causes build warnings and failure in
863+
# test_cummax_i8_at_implementation_bound
857864
return NPY_NAT
858865
elif numeric_object_t is int32_t:
859866
return util.INT32_MIN
@@ -975,7 +982,7 @@ def rank_1d(
975982
# will flip the ordering to still end up with lowest rank.
976983
# Symmetric logic applies to `na_option == 'bottom'`
977984
nans_rank_highest = ascending ^ (na_option == 'top')
978-
nan_fill_val = get_rank_nan_fill_val[numeric_object_t](nans_rank_highest)
985+
nan_fill_val = get_rank_nan_fill_val(nans_rank_highest, <numeric_object_t>0)
979986
if nans_rank_highest:
980987
order = [masked_vals, mask]
981988
else:
@@ -1335,7 +1342,7 @@ def rank_2d(
13351342

13361343
nans_rank_highest = ascending ^ (na_option == 'top')
13371344
if check_mask:
1338-
nan_fill_val = get_rank_nan_fill_val[numeric_object_t](nans_rank_highest)
1345+
nan_fill_val = get_rank_nan_fill_val(nans_rank_highest, <numeric_object_t>0)
13391346

13401347
if numeric_object_t is object:
13411348
mask = missing.isnaobj2d(values).view(np.uint8)

pandas/_libs/groupby.pyx

+12-29
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,10 @@ from numpy.math cimport NAN
3131
cnp.import_array()
3232

3333
from pandas._libs cimport util
34-
from pandas._libs.algos cimport kth_smallest_c
34+
from pandas._libs.algos cimport (
35+
get_rank_nan_fill_val,
36+
kth_smallest_c,
37+
)
3538

3639
from pandas._libs.algos import (
3740
ensure_platform_int,
@@ -989,36 +992,16 @@ cdef inline bint _treat_as_na(numeric_object_t val, bint is_datetimelike) nogil:
989992
return False
990993

991994

992-
cdef numeric_t _get_min_or_max(numeric_t val, bint compute_max, bint is_datetimelike):
995+
cdef numeric_object_t _get_min_or_max(numeric_object_t val, bint compute_max, bint is_datetimelike):
993996
"""
994-
Find either the min or the max supported by numeric_t; 'val' is a placeholder
995-
to effectively make numeric_t an argument.
997+
Find either the min or the max supported by numeric_object_t; 'val' is a
998+
placeholder to effectively make numeric_object_t an argument.
996999
"""
997-
if numeric_t is int64_t:
998-
if compute_max and is_datetimelike:
999-
return -_int64_max
1000-
# Note(jbrockmendel) 2022-03-15 for reasons unknown, using util.INT64_MIN
1001-
# instead of NPY_NAT here causes build warnings and failure in
1002-
# test_cummax_i8_at_implementation_bound
1003-
return NPY_NAT if compute_max else util.INT64_MAX
1004-
elif numeric_t is int32_t:
1005-
return util.INT32_MIN if compute_max else util.INT32_MAX
1006-
elif numeric_t is int16_t:
1007-
return util.INT16_MIN if compute_max else util.INT16_MAX
1008-
elif numeric_t is int8_t:
1009-
return util.INT8_MIN if compute_max else util.INT8_MAX
1010-
1011-
elif numeric_t is uint64_t:
1012-
return 0 if compute_max else util.UINT64_MAX
1013-
elif numeric_t is uint32_t:
1014-
return 0 if compute_max else util.UINT32_MAX
1015-
elif numeric_t is uint16_t:
1016-
return 0 if compute_max else util.UINT16_MAX
1017-
elif numeric_t is uint8_t:
1018-
return 0 if compute_max else util.UINT8_MAX
1019-
1020-
else:
1021-
return -np.inf if compute_max else np.inf
1000+
return get_rank_nan_fill_val(
1001+
not compute_max,
1002+
val=val,
1003+
is_datetimelike=is_datetimelike,
1004+
)
10221005

10231006

10241007
cdef numeric_t _get_na_val(numeric_t val, bint is_datetimelike):

0 commit comments

Comments
 (0)