pandas-dev · jreback · Mar 19, 2022 · Mar 18, 2022 · Mar 18, 2022 · Mar 19, 2022
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
@@ -466,6 +466,7 @@ Groupby/resample/rolling
 - Bug in :meth:`.GroupBy.cumsum` with ``timedelta64[ns]`` dtype failing to recognize ``NaT`` as a null value (:issue:`46216`)
 - Bug in :meth:`GroupBy.cummin` and :meth:`GroupBy.cummax` with nullable dtypes incorrectly altering the original data in place (:issue:`46220`)
 - Bug in :meth:`GroupBy.cummax` with ``int64`` dtype with leading value being the smallest possible int64 (:issue:`46382`)
+- Bug in :meth:`GroupBy.max` with empty groups and ``uint64`` dtype incorrectly raising ``RuntimeError`` (:issue:`46408`)
 -
 
 Reshaping

diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx
@@ -53,7 +53,7 @@ def hash_object_array(
     """
     cdef:
         Py_ssize_t i, n
-        uint64_t[:] result
+        uint64_t[::1] result
         bytes data, k
         uint8_t *kb
         uint64_t *lens

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -503,7 +503,7 @@ cdef class {{name}}HashTable(HashTable):
             int ret = 0
             {{c_type}} val
             khiter_t k
-            intp_t[:] locs = np.empty(n, dtype=np.intp)
+            intp_t[::1] locs = np.empty(n, dtype=np.intp)
 
         with nogil:
             for i in range(n):
@@ -561,7 +561,7 @@ cdef class {{name}}HashTable(HashTable):
         """
         cdef:
             Py_ssize_t i, idx, count = count_prior, n = len(values)
-            intp_t[:] labels
+            intp_t[::1] labels
             int ret = 0
             {{c_type}} val, na_value2
             khiter_t k
@@ -710,7 +710,7 @@ cdef class {{name}}HashTable(HashTable):
         # tuple[np.ndarray[np.intp], np.ndarray[{{dtype}}]]
         cdef:
             Py_ssize_t i, n = len(values)
-            intp_t[:] labels
+            intp_t[::1] labels
             Py_ssize_t idx, count = 0
             int ret = 0
             {{c_type}} val
@@ -848,7 +848,7 @@ cdef class StringHashTable(HashTable):
             object val
             const char *v
             khiter_t k
-            intp_t[:] locs = np.empty(n, dtype=np.intp)
+            intp_t[::1] locs = np.empty(n, dtype=np.intp)
 
         # these by-definition *must* be strings
         vecs = <const char **>malloc(n * sizeof(char *))
@@ -946,8 +946,8 @@ cdef class StringHashTable(HashTable):
         """
         cdef:
             Py_ssize_t i, idx, count = count_prior, n = len(values)
-            intp_t[:] labels
-            int64_t[:] uindexer
+            intp_t[::1] labels
+            int64_t[::1] uindexer
             int ret = 0
             object val
             const char *v
@@ -1168,7 +1168,7 @@ cdef class PyObjectHashTable(HashTable):
             int ret = 0
             object val
             khiter_t k
-            intp_t[:] locs = np.empty(n, dtype=np.intp)
+            intp_t[::1] locs = np.empty(n, dtype=np.intp)
 
         for i in range(n):
             val = values[i]
@@ -1223,7 +1223,7 @@ cdef class PyObjectHashTable(HashTable):
         """
         cdef:
             Py_ssize_t i, idx, count = count_prior, n = len(values)
-            intp_t[:] labels
+            intp_t[::1] labels
             int ret = 0
             object val
             khiter_t k

diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -85,7 +85,9 @@ cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
     {{endif}}
 
     # collect counts in the order corresponding to result_keys:
-    cdef int64_t[:] result_counts = np.empty(table.size, dtype=np.int64)
+    cdef:
+        int64_t[::1] result_counts = np.empty(table.size, dtype=np.int64)
+
     for i in range(table.size):
         {{if dtype == 'object'}}
         k = kh_get_{{ttype}}(table, result_keys.data[i])
@@ -366,7 +368,7 @@ def mode(ndarray[htfunc_t] values, bint dropna):
         ndarray[htfunc_t] keys
         ndarray[htfunc_t] modes
 
-        int64_t[:] counts
+        int64_t[::1] counts
         int64_t count, max_count = -1
         Py_ssize_t nkeys, k, j = 0
 

diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx
@@ -5,11 +5,11 @@ from operator import (
 )
 
 from cpython.datetime cimport (
-    PyDateTime_IMPORT,
     PyDelta_Check,
+    import_datetime,
 )
 
-PyDateTime_IMPORT
+import_datetime()
 
 from cpython.object cimport (
     Py_EQ,

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -9,9 +9,9 @@ from cython import Py_ssize_t
 from cpython.datetime cimport (
     PyDate_Check,
     PyDateTime_Check,
-    PyDateTime_IMPORT,
     PyDelta_Check,
     PyTime_Check,
+    import_datetime,
 )
 from cpython.iterator cimport PyIter_Check
 from cpython.number cimport PyNumber_Check
@@ -27,7 +27,7 @@ from cpython.tuple cimport (
 )
 from cython cimport floating
 
-PyDateTime_IMPORT
+import_datetime()
 
 import numpy as np
 
@@ -2470,8 +2470,8 @@ def maybe_convert_objects(ndarray[object] objects,
         ndarray[int64_t] ints
         ndarray[uint64_t] uints
         ndarray[uint8_t] bools
-        int64_t[:]  idatetimes
-        int64_t[:] itimedeltas
+        int64_t[::1]  idatetimes
+        int64_t[::1] itimedeltas
         Seen seen = Seen()
         object val
         float64_t fval, fnan = np.nan

diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx
@@ -194,7 +194,7 @@ def scalar_binop(object[:] values, object val, object op) -> ndarray:
     """
     cdef:
         Py_ssize_t i, n = len(values)
-        object[:] result
+        object[::1] result
         object x
 
     result = np.empty(n, dtype=object)
@@ -231,7 +231,7 @@ def vec_binop(object[:] left, object[:] right, object op) -> ndarray:
     """
     cdef:
         Py_ssize_t i, n = len(left)
-        object[:] result
+        object[::1] result
 
     if n != <Py_ssize_t>len(right):
         raise ValueError(f'Arrays were different lengths: {n} vs {len(right)}')

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -1457,7 +1457,7 @@ cdef _categorical_convert(parser_t *parser, int64_t col,
         const char *word = NULL
 
         int64_t NA = -1
-        int64_t[:] codes
+        int64_t[::1] codes
         int64_t current_category = 0
 
         char *errors = "strict"

diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
@@ -5,13 +5,13 @@ import cython
 from cpython.datetime cimport (
     PyDate_Check,
     PyDateTime_Check,
-    PyDateTime_IMPORT,
     datetime,
+    import_datetime,
     tzinfo,
 )
 
 # import datetime C API
-PyDateTime_IMPORT
+import_datetime()
 
 
 cimport numpy as cnp
@@ -63,7 +63,6 @@ from pandas._libs.tslibs.timestamps cimport _Timestamp
 from pandas._libs.tslibs.timestamps import Timestamp
 
 # Note: this is the only non-tslibs intra-pandas dependency here
-
 from pandas._libs.missing cimport checknull_with_nat_and_na
 from pandas._libs.tslibs.tzconversion cimport tz_localize_to_utc_single
 

diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx
@@ -18,13 +18,13 @@ import pytz
 from cpython.datetime cimport (
     PyDate_Check,
     PyDateTime_Check,
-    PyDateTime_IMPORT,
     datetime,
+    import_datetime,
     time,
     tzinfo,
 )
 
-PyDateTime_IMPORT
+import_datetime()
 
 from pandas._libs.tslibs.base cimport ABCTimestamp
 from pandas._libs.tslibs.np_datetime cimport (

diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx
@@ -165,7 +165,7 @@ class FreqGroup(Enum):
     FR_MS = c_FreqGroup.FR_MS
     FR_US = c_FreqGroup.FR_US
     FR_NS = c_FreqGroup.FR_NS
-    FR_UND = -c_FreqGroup.FR_UND  # undefined
+    FR_UND = c_FreqGroup.FR_UND  # undefined
 
     @staticmethod
     def from_period_dtype_code(code: int) -> "FreqGroup":

diff --git a/pandas/_libs/tslibs/fields.pyi b/pandas/_libs/tslibs/fields.pyi
@@ -22,7 +22,7 @@ def get_date_field(
     field: str,
 ) -> npt.NDArray[np.int32]: ...
 def get_timedelta_field(
-    tdindex: np.ndarray,  # const int64_t[:]
+    tdindex: npt.NDArray[np.int64],  # const int64_t[:]
     field: str,
 ) -> npt.NDArray[np.int32]: ...
 def isleapyear_arr(
@@ -31,7 +31,7 @@ def isleapyear_arr(
 def build_isocalendar_sarray(
     dtindex: npt.NDArray[np.int64],  # const int64_t[:]
 ) -> np.ndarray: ...
-def get_locale_names(name_type: str, locale: str | None = ...): ...
+def _get_locale_names(name_type: str, locale: str | None = ...): ...
 
 class RoundTo:
     @property

diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx
@@ -152,7 +152,7 @@ def get_date_name_field(const int64_t[:] dtindex, str field, object locale=None)
         if locale is None:
             names = np.array(DAYS_FULL, dtype=np.object_)
         else:
-            names = np.array(get_locale_names('f_weekday', locale),
+            names = np.array(_get_locale_names('f_weekday', locale),
                              dtype=np.object_)
         for i in range(count):
             if dtindex[i] == NPY_NAT:
@@ -167,7 +167,7 @@ def get_date_name_field(const int64_t[:] dtindex, str field, object locale=None)
         if locale is None:
             names = np.array(MONTHS_FULL, dtype=np.object_)
         else:
-            names = np.array(get_locale_names('f_month', locale),
+            names = np.array(_get_locale_names('f_month', locale),
                              dtype=np.object_)
         for i in range(count):
             if dtindex[i] == NPY_NAT:
@@ -574,7 +574,7 @@ def build_isocalendar_sarray(const int64_t[:] dtindex):
     return out
 
 
-def get_locale_names(name_type: str, locale: object = None):
+def _get_locale_names(name_type: str, locale: object = None):
     """
     Returns an array of localized day or month names.
 
@@ -650,7 +650,7 @@ class RoundTo:
         return 4
 
 
-cdef inline ndarray[int64_t] _floor_int64(int64_t[:] values, int64_t unit):
+cdef inline ndarray[int64_t] _floor_int64(const int64_t[:] values, int64_t unit):
     cdef:
         Py_ssize_t i, n = len(values)
         ndarray[int64_t] result = np.empty(n, dtype="i8")
@@ -668,7 +668,7 @@ cdef inline ndarray[int64_t] _floor_int64(int64_t[:] values, int64_t unit):
     return result
 
 
-cdef inline ndarray[int64_t] _ceil_int64(int64_t[:] values, int64_t unit):
+cdef inline ndarray[int64_t] _ceil_int64(const int64_t[:] values, int64_t unit):
     cdef:
         Py_ssize_t i, n = len(values)
         ndarray[int64_t] result = np.empty(n, dtype="i8")

diff --git a/pandas/_libs/tslibs/nattype.pxd b/pandas/_libs/tslibs/nattype.pxd
@@ -4,7 +4,6 @@ from numpy cimport int64_t
 
 cdef int64_t NPY_NAT
 
-cdef bint _nat_scalar_rules[6]
 cdef set c_nat_strings
 
 cdef class _NaT(datetime):

diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx
@@ -3,11 +3,13 @@ import warnings
 from cpython.datetime cimport (
     PyDate_Check,
     PyDateTime_Check,
-    PyDateTime_IMPORT,
     PyDelta_Check,
     datetime,
+    import_datetime,
     timedelta,
 )
+
+import_datetime()
 from cpython.object cimport (
     Py_EQ,
     Py_GE,
@@ -18,10 +20,6 @@ from cpython.object cimport (
     PyObject_RichCompare,
 )
 
-PyDateTime_IMPORT
-
-from cpython.version cimport PY_MINOR_VERSION
-
 import numpy as np
 
 cimport numpy as cnp
@@ -43,14 +41,6 @@ cdef set c_nat_strings = nat_strings
 cdef int64_t NPY_NAT = util.get_nat()
 iNaT = NPY_NAT  # python-visible constant
 
-cdef bint _nat_scalar_rules[6]
-_nat_scalar_rules[Py_EQ] = False
-_nat_scalar_rules[Py_NE] = True
-_nat_scalar_rules[Py_LT] = False
-_nat_scalar_rules[Py_LE] = False
-_nat_scalar_rules[Py_GT] = False
-_nat_scalar_rules[Py_GE] = False
-
 # ----------------------------------------------------------------------
 
 
@@ -107,24 +97,23 @@ def __nat_unpickle(*args):
 cdef class _NaT(datetime):
     # cdef readonly:
     #    int64_t value
-    #    object freq
 
     # higher than np.ndarray and np.matrix
     __array_priority__ = 100
 
     def __richcmp__(_NaT self, object other, int op):
         if util.is_datetime64_object(other) or PyDateTime_Check(other):
             # We treat NaT as datetime-like for this comparison
-            return _nat_scalar_rules[op]
+            return op == Py_NE
 
         elif util.is_timedelta64_object(other) or PyDelta_Check(other):
             # We treat NaT as timedelta-like for this comparison
-            return _nat_scalar_rules[op]
+            return op == Py_NE
 
         elif util.is_array(other):
             if other.dtype.kind in "mM":
                 result = np.empty(other.shape, dtype=np.bool_)
-                result.fill(_nat_scalar_rules[op])
+                result.fill(op == Py_NE)
             elif other.dtype.kind == "O":
                 result = np.array([PyObject_RichCompare(self, x, op) for x in other])
             elif op == Py_EQ:
@@ -510,8 +499,7 @@ class NaTType(_NaT):
     utcoffset = _make_error_func("utcoffset", datetime)
 
     # "fromisocalendar" was introduced in 3.8
-    if PY_MINOR_VERSION >= 8:
-        fromisocalendar = _make_error_func("fromisocalendar", datetime)
+    fromisocalendar = _make_error_func("fromisocalendar", datetime)
 
     # ----------------------------------------------------------------------
     # The remaining methods have docstrings copy/pasted from the analogous

diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd
@@ -8,6 +8,7 @@ from numpy cimport (
 )
 
 
+# TODO(cython3): most of these can be cimported directly from numpy
 cdef extern from "numpy/ndarrayobject.h":
     ctypedef int64_t npy_timedelta
     ctypedef int64_t npy_datetime
@@ -59,6 +60,9 @@ cdef extern from "src/datetime/np_datetime.h":
                                            NPY_DATETIMEUNIT fr,
                                            npy_datetimestruct *result) nogil
 
+    npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT fr,
+                                                npy_datetimestruct *d) nogil
+
 
 cdef bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1