@@ -45,6 +45,7 @@ from numpy cimport (
45
45
cnp.import_array()
46
46
47
47
cimport pandas._libs.util as util
48
+ from pandas._libs.dtypes cimport numeric_object_t
48
49
from pandas._libs.khash cimport (
49
50
kh_destroy_int64,
50
51
kh_get_int64,
@@ -860,34 +861,30 @@ def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike):
860
861
# rank_1d, rank_2d
861
862
# ----------------------------------------------------------------------
862
863
863
- ctypedef fused rank_t:
864
- object
865
- float64_t
866
- uint64_t
867
- int64_t
868
-
869
-
870
- cdef rank_t get_rank_nan_fill_val(bint rank_nans_highest, rank_t[:] _ = None ):
864
+ cdef numeric_object_t get_rank_nan_fill_val(
865
+ bint rank_nans_highest,
866
+ numeric_object_t[:] _ = None
867
+ ):
871
868
"""
872
869
Return the value we'll use to represent missing values when sorting depending
873
870
on if we'd like missing values to end up at the top/bottom. (The second parameter
874
871
is unused, but needed for fused type specialization)
875
872
"""
876
873
if rank_nans_highest:
877
- if rank_t is object :
874
+ if numeric_object_t is object :
878
875
return Infinity()
879
- elif rank_t is int64_t:
876
+ elif numeric_object_t is int64_t:
880
877
return util.INT64_MAX
881
- elif rank_t is uint64_t:
878
+ elif numeric_object_t is uint64_t:
882
879
return util.UINT64_MAX
883
880
else :
884
881
return np.inf
885
882
else :
886
- if rank_t is object :
883
+ if numeric_object_t is object :
887
884
return NegInfinity()
888
- elif rank_t is int64_t:
885
+ elif numeric_object_t is int64_t:
889
886
return NPY_NAT
890
- elif rank_t is uint64_t:
887
+ elif numeric_object_t is uint64_t:
891
888
return 0
892
889
else :
893
890
return - np.inf
@@ -896,7 +893,7 @@ cdef rank_t get_rank_nan_fill_val(bint rank_nans_highest, rank_t[:] _=None):
896
893
@ cython.wraparound (False )
897
894
@ cython.boundscheck (False )
898
895
def rank_1d (
899
- ndarray[rank_t , ndim = 1 ] values,
896
+ ndarray[numeric_object_t , ndim = 1 ] values,
900
897
const intp_t[:] labels = None ,
901
898
bint is_datetimelike = False ,
902
899
ties_method = " average" ,
@@ -909,7 +906,7 @@ def rank_1d(
909
906
910
907
Parameters
911
908
----------
912
- values : array of rank_t values to be ranked
909
+ values : array of numeric_object_t values to be ranked
913
910
labels : np.ndarray[np.intp] or None
914
911
Array containing unique label for each group, with its ordering
915
912
matching up to the corresponding record in `values`. If not called
@@ -939,11 +936,11 @@ def rank_1d(
939
936
int64_t[::1 ] grp_sizes
940
937
intp_t[:] lexsort_indexer
941
938
float64_t[::1 ] out
942
- ndarray[rank_t , ndim= 1 ] masked_vals
943
- rank_t [:] masked_vals_memview
939
+ ndarray[numeric_object_t , ndim= 1 ] masked_vals
940
+ numeric_object_t [:] masked_vals_memview
944
941
uint8_t[:] mask
945
942
bint keep_na, nans_rank_highest, check_labels, check_mask
946
- rank_t nan_fill_val
943
+ numeric_object_t nan_fill_val
947
944
948
945
tiebreak = tiebreakers[ties_method]
949
946
if tiebreak == TIEBREAK_FIRST:
@@ -964,21 +961,22 @@ def rank_1d(
964
961
check_labels = labels is not None
965
962
966
963
# For cases where a mask is not possible, we can avoid mask checks
967
- check_mask = not (rank_t is uint64_t or (rank_t is int64_t and not is_datetimelike))
964
+ check_mask = not (numeric_object_t is uint64_t or
965
+ (numeric_object_t is int64_t and not is_datetimelike))
968
966
969
967
# Copy values into new array in order to fill missing data
970
968
# with mask, without obfuscating location of missing data
971
969
# in values array
972
- if rank_t is object and values.dtype != np.object_:
970
+ if numeric_object_t is object and values.dtype != np.object_:
973
971
masked_vals = values.astype(' O' )
974
972
else :
975
973
masked_vals = values.copy()
976
974
977
- if rank_t is object :
975
+ if numeric_object_t is object :
978
976
mask = missing.isnaobj(masked_vals)
979
- elif rank_t is int64_t and is_datetimelike:
977
+ elif numeric_object_t is int64_t and is_datetimelike:
980
978
mask = (masked_vals == NPY_NAT).astype(np.uint8)
981
- elif rank_t is float64_t:
979
+ elif numeric_object_t is float64_t:
982
980
mask = np.isnan(masked_vals).astype(np.uint8)
983
981
else :
984
982
mask = np.zeros(shape = len (masked_vals), dtype = np.uint8)
@@ -990,7 +988,7 @@ def rank_1d(
990
988
# will flip the ordering to still end up with lowest rank.
991
989
# Symmetric logic applies to `na_option == 'bottom'`
992
990
nans_rank_highest = ascending ^ (na_option == ' top' )
993
- nan_fill_val = get_rank_nan_fill_val[rank_t ](nans_rank_highest)
991
+ nan_fill_val = get_rank_nan_fill_val[numeric_object_t ](nans_rank_highest)
994
992
if nans_rank_highest:
995
993
order = [masked_vals, mask]
996
994
else :
@@ -1037,7 +1035,7 @@ cdef void rank_sorted_1d(
1037
1035
int64_t[::1 ] grp_sizes,
1038
1036
const intp_t[:] sort_indexer,
1039
1037
# Can make const with cython3 (https://github.com/cython/cython/issues/3222)
1040
- rank_t [:] masked_vals,
1038
+ numeric_object_t [:] masked_vals,
1041
1039
const uint8_t[:] mask,
1042
1040
bint check_mask,
1043
1041
Py_ssize_t N,
@@ -1061,7 +1059,7 @@ cdef void rank_sorted_1d(
1061
1059
if labels is None.
1062
1060
sort_indexer : intp_t[:]
1063
1061
Array of indices which sorts masked_vals
1064
- masked_vals : rank_t [:]
1062
+ masked_vals : numeric_object_t [:]
1065
1063
The values input to rank_1d, with missing values replaced by fill values
1066
1064
mask : uint8_t[:]
1067
1065
Array where entries are True if the value is missing, False otherwise.
@@ -1093,7 +1091,7 @@ cdef void rank_sorted_1d(
1093
1091
# that sorted value for retrieval back from the original
1094
1092
# values / masked_vals arrays
1095
1093
# TODO: de-duplicate once cython supports conditional nogil
1096
- if rank_t is object :
1094
+ if numeric_object_t is object :
1097
1095
with gil:
1098
1096
for i in range (N):
1099
1097
at_end = i == N - 1
@@ -1301,7 +1299,7 @@ cdef void rank_sorted_1d(
1301
1299
1302
1300
1303
1301
def rank_2d (
1304
- ndarray[rank_t , ndim = 2 ] in_arr,
1302
+ ndarray[numeric_object_t , ndim = 2 ] in_arr,
1305
1303
int axis = 0 ,
1306
1304
bint is_datetimelike = False ,
1307
1305
ties_method = " average" ,
@@ -1316,13 +1314,13 @@ def rank_2d(
1316
1314
Py_ssize_t k, n, col
1317
1315
float64_t[::1 , :] out # Column-major so columns are contiguous
1318
1316
int64_t[::1 ] grp_sizes
1319
- ndarray[rank_t , ndim= 2 ] values
1320
- rank_t [:, :] masked_vals
1317
+ ndarray[numeric_object_t , ndim= 2 ] values
1318
+ numeric_object_t [:, :] masked_vals
1321
1319
intp_t[:, :] sort_indexer
1322
1320
uint8_t[:, :] mask
1323
1321
TiebreakEnumType tiebreak
1324
1322
bint check_mask, keep_na, nans_rank_highest
1325
- rank_t nan_fill_val
1323
+ numeric_object_t nan_fill_val
1326
1324
1327
1325
tiebreak = tiebreakers[ties_method]
1328
1326
if tiebreak == TIEBREAK_FIRST:
@@ -1332,24 +1330,25 @@ def rank_2d(
1332
1330
keep_na = na_option == ' keep'
1333
1331
1334
1332
# For cases where a mask is not possible, we can avoid mask checks
1335
- check_mask = not (rank_t is uint64_t or (rank_t is int64_t and not is_datetimelike))
1333
+ check_mask = not (numeric_object_t is uint64_t or
1334
+ (numeric_object_t is int64_t and not is_datetimelike))
1336
1335
1337
1336
if axis == 1 :
1338
1337
values = np.asarray(in_arr).T.copy()
1339
1338
else :
1340
1339
values = np.asarray(in_arr).copy()
1341
1340
1342
- if rank_t is object :
1341
+ if numeric_object_t is object :
1343
1342
if values.dtype != np.object_:
1344
1343
values = values.astype(' O' )
1345
1344
1346
1345
nans_rank_highest = ascending ^ (na_option == ' top' )
1347
1346
if check_mask:
1348
- nan_fill_val = get_rank_nan_fill_val[rank_t ](nans_rank_highest)
1347
+ nan_fill_val = get_rank_nan_fill_val[numeric_object_t ](nans_rank_highest)
1349
1348
1350
- if rank_t is object :
1349
+ if numeric_object_t is object :
1351
1350
mask = missing.isnaobj2d(values).view(np.uint8)
1352
- elif rank_t is float64_t:
1351
+ elif numeric_object_t is float64_t:
1353
1352
mask = np.isnan(values).view(np.uint8)
1354
1353
1355
1354
# int64 and datetimelike
0 commit comments