Skip to content

Commit 54f23e8

Browse files
authored
BUG: get_indexer returned dtype (#36431)
1 parent 51ffcdb commit 54f23e8

File tree

5 files changed

+13
-12
lines changed

5 files changed

+13
-12
lines changed

doc/source/whatsnew/v1.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,7 @@ Indexing
295295

296296
- Bug in :meth:`PeriodIndex.get_loc` incorrectly raising ``ValueError`` on non-datelike strings instead of ``KeyError``, causing similar errors in :meth:`Series.__geitem__`, :meth:`Series.__contains__`, and :meth:`Series.loc.__getitem__` (:issue:`34240`)
297297
- Bug in :meth:`Index.sort_values` where, when empty values were passed, the method would break by trying to compare missing values instead of pushing them to the end of the sort order. (:issue:`35584`)
298+
- Bug in :meth:`Index.get_indexer` and :meth:`Index.get_indexer_non_unique` where int64 arrays are returned instead of intp. (:issue:`36359`)
298299
-
299300

300301
Missing

pandas/_libs/hashtable.pxd

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from pandas._libs.khash cimport (
22
kh_int64_t, kh_uint64_t, kh_float64_t, kh_pymap_t, kh_str_t, uint64_t,
33
int64_t, float64_t)
4-
from numpy cimport ndarray
4+
from numpy cimport ndarray, intp_t
55

66
# prototypes for sharing
77

pandas/_libs/hashtable_class_helper.pxi.in

+7-7
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@ cdef class {{name}}HashTable(HashTable):
347347
int ret = 0
348348
{{dtype}}_t val
349349
khiter_t k
350-
int64_t[:] locs = np.empty(n, dtype=np.int64)
350+
intp_t[:] locs = np.empty(n, dtype=np.intp)
351351

352352
with nogil:
353353
for i in range(n):
@@ -551,15 +551,15 @@ cdef class {{name}}HashTable(HashTable):
551551
def get_labels_groupby(self, const {{dtype}}_t[:] values):
552552
cdef:
553553
Py_ssize_t i, n = len(values)
554-
int64_t[:] labels
554+
intp_t[:] labels
555555
Py_ssize_t idx, count = 0
556556
int ret = 0
557557
{{dtype}}_t val
558558
khiter_t k
559559
{{name}}Vector uniques = {{name}}Vector()
560560
{{name}}VectorData *ud
561561

562-
labels = np.empty(n, dtype=np.int64)
562+
labels = np.empty(n, dtype=np.intp)
563563
ud = uniques.data
564564

565565
with nogil:
@@ -648,8 +648,8 @@ cdef class StringHashTable(HashTable):
648648
def get_indexer(self, ndarray[object] values):
649649
cdef:
650650
Py_ssize_t i, n = len(values)
651-
ndarray[int64_t] labels = np.empty(n, dtype=np.int64)
652-
int64_t *resbuf = <int64_t*>labels.data
651+
ndarray[intp_t] labels = np.empty(n, dtype=np.intp)
652+
intp_t *resbuf = <intp_t*>labels.data
653653
khiter_t k
654654
kh_str_t *table = self.table
655655
const char *v
@@ -680,7 +680,7 @@ cdef class StringHashTable(HashTable):
680680
object val
681681
const char *v
682682
khiter_t k
683-
int64_t[:] locs = np.empty(n, dtype=np.int64)
683+
intp_t[:] locs = np.empty(n, dtype=np.intp)
684684

685685
# these by-definition *must* be strings
686686
vecs = <const char **>malloc(n * sizeof(char *))
@@ -986,7 +986,7 @@ cdef class PyObjectHashTable(HashTable):
986986
int ret = 0
987987
object val
988988
khiter_t k
989-
int64_t[:] locs = np.empty(n, dtype=np.int64)
989+
intp_t[:] locs = np.empty(n, dtype=np.intp)
990990

991991
for i in range(n):
992992
val = values[i]

pandas/_libs/index.pyx

+3-3
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ cdef class IndexEngine:
266266
"""
267267
cdef:
268268
ndarray values, x
269-
ndarray[int64_t] result, missing
269+
ndarray[intp_t] result, missing
270270
set stargets, remaining_stargets
271271
dict d = {}
272272
object val
@@ -283,8 +283,8 @@ cdef class IndexEngine:
283283
else:
284284
n_alloc = n
285285

286-
result = np.empty(n_alloc, dtype=np.int64)
287-
missing = np.empty(n_t, dtype=np.int64)
286+
result = np.empty(n_alloc, dtype=np.intp)
287+
missing = np.empty(n_t, dtype=np.intp)
288288

289289
# map each starget to its position in the index
290290
if stargets and len(stargets) < 5 and self.is_monotonic_increasing:

pandas/tests/base/test_misc.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -201,4 +201,4 @@ def test_get_indexer_non_unique_dtype_mismatch():
201201
# GH 25459
202202
indexes, missing = pd.Index(["A", "B"]).get_indexer_non_unique(pd.Index([0]))
203203
tm.assert_numpy_array_equal(np.array([-1], dtype=np.intp), indexes)
204-
tm.assert_numpy_array_equal(np.array([0], dtype=np.int64), missing)
204+
tm.assert_numpy_array_equal(np.array([0], dtype=np.intp), missing)

0 commit comments

Comments
 (0)