Skip to content

Commit bd8ee57

Browse files
committed
Modify get_indexer_non_unique to account for nan keys - Bug pandas-dev#52234
1 parent 2e0c8a4 commit bd8ee57

File tree

2 files changed

+30
-2
lines changed

2 files changed

+30
-2
lines changed

pandas/_libs/index.pyx

+4-2
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,8 @@ cdef class IndexEngine:
360360
values = self.values
361361
stargets = set(targets)
362362

363+
na_in_stargets = any([checknull(t) for t in stargets])
364+
363365
n = len(values)
364366
n_t = len(targets)
365367
if n > 10_000:
@@ -374,7 +376,7 @@ cdef class IndexEngine:
374376
if (
375377
stargets and
376378
len(stargets) < 5 and
377-
not any([checknull(t) for t in stargets]) and
379+
not na_in_stargets and
378380
self.is_monotonic_increasing
379381
):
380382
# if there are few enough stargets and the index is monotonically
@@ -396,7 +398,7 @@ cdef class IndexEngine:
396398
# otherwise, map by iterating through all items in the index
397399

398400
# short-circuit na check
399-
if values.dtype == object:
401+
if na_in_stargets:
400402
check_na_values = True
401403
# keep track of nas in values
402404
found_nas = set()

pandas/tests/indexes/base_class/test_indexing.py

+26
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,32 @@ def test_get_indexer_non_unique_dtype_mismatch(self):
3737
tm.assert_numpy_array_equal(np.array([-1], dtype=np.intp), indexes)
3838
tm.assert_numpy_array_equal(np.array([0], dtype=np.intp), missing)
3939

40+
def test_get_indexer_non_unique_int_index(self):
41+
indexes, missing = Index([np.nan, 100, 200, 100]).get_indexer_non_unique(
42+
Index([np.nan])
43+
)
44+
tm.assert_numpy_array_equal(np.array([0], dtype=np.intp), indexes)
45+
tm.assert_numpy_array_equal(np.array([], dtype=np.intp), missing)
46+
47+
indexes, missing = Index([np.nan, 100, 200, 100]).get_indexer_non_unique(
48+
Index([np.nan, 100])
49+
)
50+
tm.assert_numpy_array_equal(np.array([0, 1, 3], dtype=np.intp), indexes)
51+
tm.assert_numpy_array_equal(np.array([], dtype=np.intp), missing)
52+
53+
def test_get_indexer_non_unique_float_index(self):
54+
indexes, missing = Index([np.nan, 100.0, 200.0, 100.0]).get_indexer_non_unique(
55+
Index([np.nan])
56+
)
57+
tm.assert_numpy_array_equal(np.array([0], dtype=np.intp), indexes)
58+
tm.assert_numpy_array_equal(np.array([], dtype=np.intp), missing)
59+
60+
indexes, missing = Index([np.nan, 100.0, 200.0, 100.0]).get_indexer_non_unique(
61+
Index([np.nan, 100.0])
62+
)
63+
tm.assert_numpy_array_equal(np.array([0, 1, 3], dtype=np.intp), indexes)
64+
tm.assert_numpy_array_equal(np.array([], dtype=np.intp), missing)
65+
4066

4167
class TestGetLoc:
4268
@pytest.mark.slow # to_flat_index takes a while

0 commit comments

Comments
 (0)