From 17d8554ae634c3ddbf55f9fd19dbe4169f4dcbe3 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 10 Apr 2022 17:49:03 -0700 Subject: [PATCH] REF: de-duplicate cython funcs --- pandas/_libs/algos.pxd | 12 +++++++++++- pandas/_libs/algos.pyx | 13 ++++++++++--- pandas/_libs/groupby.pyx | 41 ++++++++++++---------------------------- 3 files changed, 33 insertions(+), 33 deletions(-) diff --git a/pandas/_libs/algos.pxd b/pandas/_libs/algos.pxd index fdeff2ed11805..c3b83b9bd40cb 100644 --- a/pandas/_libs/algos.pxd +++ b/pandas/_libs/algos.pxd @@ -1,4 +1,7 @@ -from pandas._libs.dtypes cimport numeric_t +from pandas._libs.dtypes cimport ( + numeric_object_t, + numeric_t, +) cdef numeric_t kth_smallest_c(numeric_t* arr, Py_ssize_t k, Py_ssize_t n) nogil @@ -10,3 +13,10 @@ cdef enum TiebreakEnumType: TIEBREAK_FIRST TIEBREAK_FIRST_DESCENDING TIEBREAK_DENSE + + +cdef numeric_object_t get_rank_nan_fill_val( + bint rank_nans_highest, + numeric_object_t val, + bint is_datetimelike=*, +) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 44234013c7a3c..5a7c6dad311b5 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -822,13 +822,17 @@ def is_monotonic(ndarray[numeric_object_t, ndim=1] arr, bint timelike): cdef numeric_object_t get_rank_nan_fill_val( bint rank_nans_highest, - numeric_object_t[:] _=None + numeric_object_t val, + bint is_datetimelike=False, ): """ Return the value we'll use to represent missing values when sorting depending on if we'd like missing values to end up at the top/bottom. (The second parameter is unused, but needed for fused type specialization) """ + if numeric_object_t is int64_t and is_datetimelike and not rank_nans_highest: + return NPY_NAT + 1 + if rank_nans_highest: if numeric_object_t is object: return Infinity() @@ -854,6 +858,9 @@ cdef numeric_object_t get_rank_nan_fill_val( if numeric_object_t is object: return NegInfinity() elif numeric_object_t is int64_t: + # Note(jbrockmendel) 2022-03-15 for reasons unknown, using util.INT64_MIN + # instead of NPY_NAT here causes build warnings and failure in + # test_cummax_i8_at_implementation_bound return NPY_NAT elif numeric_object_t is int32_t: return util.INT32_MIN @@ -975,7 +982,7 @@ def rank_1d( # will flip the ordering to still end up with lowest rank. # Symmetric logic applies to `na_option == 'bottom'` nans_rank_highest = ascending ^ (na_option == 'top') - nan_fill_val = get_rank_nan_fill_val[numeric_object_t](nans_rank_highest) + nan_fill_val = get_rank_nan_fill_val(nans_rank_highest, 0) if nans_rank_highest: order = [masked_vals, mask] else: @@ -1335,7 +1342,7 @@ def rank_2d( nans_rank_highest = ascending ^ (na_option == 'top') if check_mask: - nan_fill_val = get_rank_nan_fill_val[numeric_object_t](nans_rank_highest) + nan_fill_val = get_rank_nan_fill_val(nans_rank_highest, 0) if numeric_object_t is object: mask = missing.isnaobj2d(values).view(np.uint8) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 899c56a0a075e..4b033e9c3bb53 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -31,7 +31,10 @@ from numpy.math cimport NAN cnp.import_array() from pandas._libs cimport util -from pandas._libs.algos cimport kth_smallest_c +from pandas._libs.algos cimport ( + get_rank_nan_fill_val, + kth_smallest_c, +) from pandas._libs.algos import ( ensure_platform_int, @@ -989,36 +992,16 @@ cdef inline bint _treat_as_na(numeric_object_t val, bint is_datetimelike) nogil: return False -cdef numeric_t _get_min_or_max(numeric_t val, bint compute_max, bint is_datetimelike): +cdef numeric_object_t _get_min_or_max(numeric_object_t val, bint compute_max, bint is_datetimelike): """ - Find either the min or the max supported by numeric_t; 'val' is a placeholder - to effectively make numeric_t an argument. + Find either the min or the max supported by numeric_object_t; 'val' is a + placeholder to effectively make numeric_object_t an argument. """ - if numeric_t is int64_t: - if compute_max and is_datetimelike: - return -_int64_max - # Note(jbrockmendel) 2022-03-15 for reasons unknown, using util.INT64_MIN - # instead of NPY_NAT here causes build warnings and failure in - # test_cummax_i8_at_implementation_bound - return NPY_NAT if compute_max else util.INT64_MAX - elif numeric_t is int32_t: - return util.INT32_MIN if compute_max else util.INT32_MAX - elif numeric_t is int16_t: - return util.INT16_MIN if compute_max else util.INT16_MAX - elif numeric_t is int8_t: - return util.INT8_MIN if compute_max else util.INT8_MAX - - elif numeric_t is uint64_t: - return 0 if compute_max else util.UINT64_MAX - elif numeric_t is uint32_t: - return 0 if compute_max else util.UINT32_MAX - elif numeric_t is uint16_t: - return 0 if compute_max else util.UINT16_MAX - elif numeric_t is uint8_t: - return 0 if compute_max else util.UINT8_MAX - - else: - return -np.inf if compute_max else np.inf + return get_rank_nan_fill_val( + not compute_max, + val=val, + is_datetimelike=is_datetimelike, + ) cdef numeric_t _get_na_val(numeric_t val, bint is_datetimelike):