Skip to content

Commit 7f58d74

Browse files
avm19avm19mroeschke
authored
BUG: Fix na_position type in IndexEngine (#61062)
* Modify an existing test to cover the issue with na_pos > 128. * Change na_position type from int8_t and int64_t consistently to Py_ssize_t. * Add What's New entry. * Sort whatsnew entries alphabetically * Improve the whatsnew entry. * Move whatsnew entry from v2.3.0.rst to v3.0.0.rst. * Update doc/source/whatsnew/v3.0.0.rst Co-authored-by: Matthew Roeschke <[email protected]> * Undo remove '-'. * Sort whatsnew entries alphabetically. --------- Co-authored-by: avm19 <[email protected]> Co-authored-by: Matthew Roeschke <[email protected]>
1 parent 2030d9d commit 7f58d74

File tree

4 files changed

+20
-18
lines changed

4 files changed

+20
-18
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -696,6 +696,7 @@ Indexing
696696
- Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`)
697697
- Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`)
698698
- Bug in :meth:`DataFrame.loc` with inconsistent behavior of loc-set with 2 given indexes to Series (:issue:`59933`)
699+
- Bug in :meth:`Index.get_indexer` and similar methods when ``NaN`` is located at or after position 128 (:issue:`58924`)
699700
- Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`)
700701
- Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`)
701702

pandas/_libs/hashtable.pxd

+12-12
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ cdef class HashTable:
4141

4242
cdef class UInt64HashTable(HashTable):
4343
cdef kh_uint64_t *table
44-
cdef int64_t na_position
44+
cdef Py_ssize_t na_position
4545
cdef bint uses_mask
4646

4747
cpdef get_item(self, uint64_t val)
@@ -51,7 +51,7 @@ cdef class UInt64HashTable(HashTable):
5151

5252
cdef class Int64HashTable(HashTable):
5353
cdef kh_int64_t *table
54-
cdef int64_t na_position
54+
cdef Py_ssize_t na_position
5555
cdef bint uses_mask
5656

5757
cpdef get_item(self, int64_t val)
@@ -61,7 +61,7 @@ cdef class Int64HashTable(HashTable):
6161

6262
cdef class UInt32HashTable(HashTable):
6363
cdef kh_uint32_t *table
64-
cdef int64_t na_position
64+
cdef Py_ssize_t na_position
6565
cdef bint uses_mask
6666

6767
cpdef get_item(self, uint32_t val)
@@ -71,7 +71,7 @@ cdef class UInt32HashTable(HashTable):
7171

7272
cdef class Int32HashTable(HashTable):
7373
cdef kh_int32_t *table
74-
cdef int64_t na_position
74+
cdef Py_ssize_t na_position
7575
cdef bint uses_mask
7676

7777
cpdef get_item(self, int32_t val)
@@ -81,7 +81,7 @@ cdef class Int32HashTable(HashTable):
8181

8282
cdef class UInt16HashTable(HashTable):
8383
cdef kh_uint16_t *table
84-
cdef int64_t na_position
84+
cdef Py_ssize_t na_position
8585
cdef bint uses_mask
8686

8787
cpdef get_item(self, uint16_t val)
@@ -91,7 +91,7 @@ cdef class UInt16HashTable(HashTable):
9191

9292
cdef class Int16HashTable(HashTable):
9393
cdef kh_int16_t *table
94-
cdef int64_t na_position
94+
cdef Py_ssize_t na_position
9595
cdef bint uses_mask
9696

9797
cpdef get_item(self, int16_t val)
@@ -101,7 +101,7 @@ cdef class Int16HashTable(HashTable):
101101

102102
cdef class UInt8HashTable(HashTable):
103103
cdef kh_uint8_t *table
104-
cdef int64_t na_position
104+
cdef Py_ssize_t na_position
105105
cdef bint uses_mask
106106

107107
cpdef get_item(self, uint8_t val)
@@ -111,7 +111,7 @@ cdef class UInt8HashTable(HashTable):
111111

112112
cdef class Int8HashTable(HashTable):
113113
cdef kh_int8_t *table
114-
cdef int64_t na_position
114+
cdef Py_ssize_t na_position
115115
cdef bint uses_mask
116116

117117
cpdef get_item(self, int8_t val)
@@ -121,7 +121,7 @@ cdef class Int8HashTable(HashTable):
121121

122122
cdef class Float64HashTable(HashTable):
123123
cdef kh_float64_t *table
124-
cdef int64_t na_position
124+
cdef Py_ssize_t na_position
125125
cdef bint uses_mask
126126

127127
cpdef get_item(self, float64_t val)
@@ -131,7 +131,7 @@ cdef class Float64HashTable(HashTable):
131131

132132
cdef class Float32HashTable(HashTable):
133133
cdef kh_float32_t *table
134-
cdef int64_t na_position
134+
cdef Py_ssize_t na_position
135135
cdef bint uses_mask
136136

137137
cpdef get_item(self, float32_t val)
@@ -141,7 +141,7 @@ cdef class Float32HashTable(HashTable):
141141

142142
cdef class Complex64HashTable(HashTable):
143143
cdef kh_complex64_t *table
144-
cdef int64_t na_position
144+
cdef Py_ssize_t na_position
145145
cdef bint uses_mask
146146

147147
cpdef get_item(self, complex64_t val)
@@ -151,7 +151,7 @@ cdef class Complex64HashTable(HashTable):
151151

152152
cdef class Complex128HashTable(HashTable):
153153
cdef kh_complex128_t *table
154-
cdef int64_t na_position
154+
cdef Py_ssize_t na_position
155155
cdef bint uses_mask
156156

157157
cpdef get_item(self, complex128_t val)

pandas/_libs/hashtable_class_helper.pxi.in

+3-3
Original file line numberDiff line numberDiff line change
@@ -535,7 +535,7 @@ cdef class {{name}}HashTable(HashTable):
535535
int ret = 0
536536
{{c_type}} val
537537
khiter_t k
538-
int8_t na_position = self.na_position
538+
Py_ssize_t na_position = self.na_position
539539

540540
if self.uses_mask and mask is None:
541541
raise NotImplementedError # pragma: no cover
@@ -567,7 +567,7 @@ cdef class {{name}}HashTable(HashTable):
567567
Int64Vector self_locs = Int64Vector()
568568
Int64VectorData *l
569569
Int64VectorData *sl
570-
int8_t na_position = self.na_position
570+
Py_ssize_t na_position = self.na_position
571571

572572
l = &locs.data
573573
sl = &self_locs.data
@@ -609,7 +609,7 @@ cdef class {{name}}HashTable(HashTable):
609609
{{c_type}} val
610610
khiter_t k
611611
intp_t[::1] locs = np.empty(n, dtype=np.intp)
612-
int8_t na_position = self.na_position
612+
Py_ssize_t na_position = self.na_position
613613

614614
if self.uses_mask and mask is None:
615615
raise NotImplementedError # pragma: no cover

pandas/tests/libs/test_hashtable.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -149,18 +149,19 @@ def test_map_locations(self, table_type, dtype, writable):
149149
def test_map_locations_mask(self, table_type, dtype, writable):
150150
if table_type == ht.PyObjectHashTable:
151151
pytest.skip("Mask not supported for object")
152-
N = 3
152+
N = 129 # must be > 128 to test GH#58924
153153
table = table_type(uses_mask=True)
154154
keys = (np.arange(N) + N).astype(dtype)
155155
keys.flags.writeable = writable
156-
table.map_locations(keys, np.array([False, False, True]))
156+
mask = np.concatenate([np.repeat(False, N - 1), [True]], axis=0)
157+
table.map_locations(keys, mask)
157158
for i in range(N - 1):
158159
assert table.get_item(keys[i]) == i
159160

160161
with pytest.raises(KeyError, match=re.escape(str(keys[N - 1]))):
161162
table.get_item(keys[N - 1])
162163

163-
assert table.get_na() == 2
164+
assert table.get_na() == N - 1
164165

165166
def test_lookup(self, table_type, dtype, writable):
166167
N = 3

0 commit comments

Comments
 (0)