Skip to content

BUG: get_indexer returned dtype #36431

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Sep 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@ Indexing

- Bug in :meth:`PeriodIndex.get_loc` incorrectly raising ``ValueError`` on non-datelike strings instead of ``KeyError``, causing similar errors in :meth:`Series.__geitem__`, :meth:`Series.__contains__`, and :meth:`Series.loc.__getitem__` (:issue:`34240`)
- Bug in :meth:`Index.sort_values` where, when empty values were passed, the method would break by trying to compare missing values instead of pushing them to the end of the sort order. (:issue:`35584`)
- Bug in :meth:`Index.get_indexer` and :meth:`Index.get_indexer_non_unique` where int64 arrays are returned instead of intp. (:issue:`36359`)
-

Missing
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/hashtable.pxd
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from pandas._libs.khash cimport (
kh_int64_t, kh_uint64_t, kh_float64_t, kh_pymap_t, kh_str_t, uint64_t,
int64_t, float64_t)
from numpy cimport ndarray
from numpy cimport ndarray, intp_t

# prototypes for sharing

Expand Down
14 changes: 7 additions & 7 deletions pandas/_libs/hashtable_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ cdef class {{name}}HashTable(HashTable):
int ret = 0
{{dtype}}_t val
khiter_t k
int64_t[:] locs = np.empty(n, dtype=np.int64)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are these all the int64's in this file? (that are indexers)

intp_t[:] locs = np.empty(n, dtype=np.intp)

with nogil:
for i in range(n):
Expand Down Expand Up @@ -551,15 +551,15 @@ cdef class {{name}}HashTable(HashTable):
def get_labels_groupby(self, const {{dtype}}_t[:] values):
cdef:
Py_ssize_t i, n = len(values)
int64_t[:] labels
intp_t[:] labels
Py_ssize_t idx, count = 0
int ret = 0
{{dtype}}_t val
khiter_t k
{{name}}Vector uniques = {{name}}Vector()
{{name}}VectorData *ud

labels = np.empty(n, dtype=np.int64)
labels = np.empty(n, dtype=np.intp)
ud = uniques.data

with nogil:
Expand Down Expand Up @@ -648,8 +648,8 @@ cdef class StringHashTable(HashTable):
def get_indexer(self, ndarray[object] values):
cdef:
Py_ssize_t i, n = len(values)
ndarray[int64_t] labels = np.empty(n, dtype=np.int64)
int64_t *resbuf = <int64_t*>labels.data
ndarray[intp_t] labels = np.empty(n, dtype=np.intp)
intp_t *resbuf = <intp_t*>labels.data
khiter_t k
kh_str_t *table = self.table
const char *v
Expand Down Expand Up @@ -680,7 +680,7 @@ cdef class StringHashTable(HashTable):
object val
const char *v
khiter_t k
int64_t[:] locs = np.empty(n, dtype=np.int64)
intp_t[:] locs = np.empty(n, dtype=np.intp)

# these by-definition *must* be strings
vecs = <const char **>malloc(n * sizeof(char *))
Expand Down Expand Up @@ -986,7 +986,7 @@ cdef class PyObjectHashTable(HashTable):
int ret = 0
object val
khiter_t k
int64_t[:] locs = np.empty(n, dtype=np.int64)
intp_t[:] locs = np.empty(n, dtype=np.intp)

for i in range(n):
val = values[i]
Expand Down
6 changes: 3 additions & 3 deletions pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ cdef class IndexEngine:
"""
cdef:
ndarray values, x
ndarray[int64_t] result, missing
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are all of these indexer changed int his file?

ndarray[intp_t] result, missing
set stargets, remaining_stargets
dict d = {}
object val
Expand All @@ -283,8 +283,8 @@ cdef class IndexEngine:
else:
n_alloc = n

result = np.empty(n_alloc, dtype=np.int64)
missing = np.empty(n_t, dtype=np.int64)
result = np.empty(n_alloc, dtype=np.intp)
missing = np.empty(n_t, dtype=np.intp)

# map each starget to its position in the index
if stargets and len(stargets) < 5 and self.is_monotonic_increasing:
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/base/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,4 +201,4 @@ def test_get_indexer_non_unique_dtype_mismatch():
# GH 25459
indexes, missing = pd.Index(["A", "B"]).get_indexer_non_unique(pd.Index([0]))
tm.assert_numpy_array_equal(np.array([-1], dtype=np.intp), indexes)
tm.assert_numpy_array_equal(np.array([0], dtype=np.int64), missing)
tm.assert_numpy_array_equal(np.array([0], dtype=np.intp), missing)