Skip to content

Commit f7bcc10

Browse files
committed
BUG: Index.get_indexer_non_unique misbehaves when index contains multiple nan (pandas-dev#35392)
1 parent 04e9e0a commit f7bcc10

File tree

2 files changed

+28
-1
lines changed

2 files changed

+28
-1
lines changed

pandas/_libs/index.pyx

+9-1
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,15 @@ cdef class IndexEngine:
270270
Py_ssize_t i, j, n, n_t, n_alloc
271271

272272
self._ensure_mapping_populated()
273-
values = np.array(self._get_index_values(), copy=False)
273+
# GH #35392 Index.get_indexer_non_unique misbehaves when index contains multiple nan
274+
if any([checknull(t) for t in targets]):
275+
new_targets = [0 if checknull(t) else t for t in targets]
276+
new_values = [0 if checknull(v) else v for v in self._get_index_values()]
277+
targets = np.array(new_targets, dtype=object)
278+
values = np.array(new_values, dtype=object)
279+
else:
280+
values = np.array(self._get_index_values(), copy=False)
281+
274282
stargets = set(targets)
275283
n = len(values)
276284
n_t = len(targets)

pandas/tests/base/test_misc.py

+19
Original file line numberDiff line numberDiff line change
@@ -201,3 +201,22 @@ def test_get_indexer_non_unique_dtype_mismatch():
201201
indexes, missing = pd.Index(["A", "B"]).get_indexer_non_unique(pd.Index([0]))
202202
tm.assert_numpy_array_equal(np.array([-1], dtype=np.intp), indexes)
203203
tm.assert_numpy_array_equal(np.array([0], dtype=np.int64), missing)
204+
205+
206+
@pytest.mark.parametrize(
207+
"idx, target, expected",
208+
[
209+
([np.nan, "var1", np.nan], [np.nan], np.array([0, 2], dtype=np.int64)),
210+
(
211+
[np.nan, "var1", np.nan],
212+
[np.nan, "var1"],
213+
np.array([0, 2, 1], dtype=np.int64),
214+
),
215+
],
216+
)
217+
def test_get_indexer_non_unique_multiple_nans(idx, target, expected):
218+
# GH 35392
219+
axis = pd.Index(idx)
220+
actual = axis.get_indexer_for(target)
221+
222+
tm.assert_numpy_array_equal(actual, expected)

0 commit comments

Comments
 (0)