diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 0df5a70d87655..fb7321139fc1a 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -696,6 +696,7 @@ Indexing - Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`) - Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`) - Bug in :meth:`DataFrame.loc` with inconsistent behavior of loc-set with 2 given indexes to Series (:issue:`59933`) +- Bug in :meth:`Index.get_indexer` and similar methods when ``NaN`` is located at or after position 128 (:issue:`58924`) - Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`) - Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`) diff --git a/pandas/_libs/hashtable.pxd b/pandas/_libs/hashtable.pxd index a5a3edad63403..0480ee54ffb4e 100644 --- a/pandas/_libs/hashtable.pxd +++ b/pandas/_libs/hashtable.pxd @@ -41,7 +41,7 @@ cdef class HashTable: cdef class UInt64HashTable(HashTable): cdef kh_uint64_t *table - cdef int64_t na_position + cdef Py_ssize_t na_position cdef bint uses_mask cpdef get_item(self, uint64_t val) @@ -51,7 +51,7 @@ cdef class UInt64HashTable(HashTable): cdef class Int64HashTable(HashTable): cdef kh_int64_t *table - cdef int64_t na_position + cdef Py_ssize_t na_position cdef bint uses_mask cpdef get_item(self, int64_t val) @@ -61,7 +61,7 @@ cdef class Int64HashTable(HashTable): cdef class UInt32HashTable(HashTable): cdef kh_uint32_t *table - cdef int64_t na_position + cdef Py_ssize_t na_position cdef bint uses_mask cpdef get_item(self, uint32_t val) @@ -71,7 +71,7 @@ cdef class UInt32HashTable(HashTable): cdef class Int32HashTable(HashTable): cdef kh_int32_t *table - cdef int64_t na_position + cdef Py_ssize_t na_position cdef bint uses_mask cpdef get_item(self, int32_t val) @@ -81,7 +81,7 @@ cdef class Int32HashTable(HashTable): cdef class UInt16HashTable(HashTable): cdef kh_uint16_t *table - cdef int64_t na_position + cdef Py_ssize_t na_position cdef bint uses_mask cpdef get_item(self, uint16_t val) @@ -91,7 +91,7 @@ cdef class UInt16HashTable(HashTable): cdef class Int16HashTable(HashTable): cdef kh_int16_t *table - cdef int64_t na_position + cdef Py_ssize_t na_position cdef bint uses_mask cpdef get_item(self, int16_t val) @@ -101,7 +101,7 @@ cdef class Int16HashTable(HashTable): cdef class UInt8HashTable(HashTable): cdef kh_uint8_t *table - cdef int64_t na_position + cdef Py_ssize_t na_position cdef bint uses_mask cpdef get_item(self, uint8_t val) @@ -111,7 +111,7 @@ cdef class UInt8HashTable(HashTable): cdef class Int8HashTable(HashTable): cdef kh_int8_t *table - cdef int64_t na_position + cdef Py_ssize_t na_position cdef bint uses_mask cpdef get_item(self, int8_t val) @@ -121,7 +121,7 @@ cdef class Int8HashTable(HashTable): cdef class Float64HashTable(HashTable): cdef kh_float64_t *table - cdef int64_t na_position + cdef Py_ssize_t na_position cdef bint uses_mask cpdef get_item(self, float64_t val) @@ -131,7 +131,7 @@ cdef class Float64HashTable(HashTable): cdef class Float32HashTable(HashTable): cdef kh_float32_t *table - cdef int64_t na_position + cdef Py_ssize_t na_position cdef bint uses_mask cpdef get_item(self, float32_t val) @@ -141,7 +141,7 @@ cdef class Float32HashTable(HashTable): cdef class Complex64HashTable(HashTable): cdef kh_complex64_t *table - cdef int64_t na_position + cdef Py_ssize_t na_position cdef bint uses_mask cpdef get_item(self, complex64_t val) @@ -151,7 +151,7 @@ cdef class Complex64HashTable(HashTable): cdef class Complex128HashTable(HashTable): cdef kh_complex128_t *table - cdef int64_t na_position + cdef Py_ssize_t na_position cdef bint uses_mask cpdef get_item(self, complex128_t val) diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 210df09f07db6..eae393f33bfd3 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -535,7 +535,7 @@ cdef class {{name}}HashTable(HashTable): int ret = 0 {{c_type}} val khiter_t k - int8_t na_position = self.na_position + Py_ssize_t na_position = self.na_position if self.uses_mask and mask is None: raise NotImplementedError # pragma: no cover @@ -567,7 +567,7 @@ cdef class {{name}}HashTable(HashTable): Int64Vector self_locs = Int64Vector() Int64VectorData *l Int64VectorData *sl - int8_t na_position = self.na_position + Py_ssize_t na_position = self.na_position l = &locs.data sl = &self_locs.data @@ -609,7 +609,7 @@ cdef class {{name}}HashTable(HashTable): {{c_type}} val khiter_t k intp_t[::1] locs = np.empty(n, dtype=np.intp) - int8_t na_position = self.na_position + Py_ssize_t na_position = self.na_position if self.uses_mask and mask is None: raise NotImplementedError # pragma: no cover diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py index 50b561aefcf49..6a95cfc7355d8 100644 --- a/pandas/tests/libs/test_hashtable.py +++ b/pandas/tests/libs/test_hashtable.py @@ -149,18 +149,19 @@ def test_map_locations(self, table_type, dtype, writable): def test_map_locations_mask(self, table_type, dtype, writable): if table_type == ht.PyObjectHashTable: pytest.skip("Mask not supported for object") - N = 3 + N = 129 # must be > 128 to test GH#58924 table = table_type(uses_mask=True) keys = (np.arange(N) + N).astype(dtype) keys.flags.writeable = writable - table.map_locations(keys, np.array([False, False, True])) + mask = np.concatenate([np.repeat(False, N - 1), [True]], axis=0) + table.map_locations(keys, mask) for i in range(N - 1): assert table.get_item(keys[i]) == i with pytest.raises(KeyError, match=re.escape(str(keys[N - 1]))): table.get_item(keys[N - 1]) - assert table.get_na() == 2 + assert table.get_na() == N - 1 def test_lookup(self, table_type, dtype, writable): N = 3