Skip to content

Commit 7efe61d

Browse files
authored
BUG: Index.get_loc(np.nan) non-unique non-monotonic (#43711)
1 parent 6b75ed6 commit 7efe61d

File tree

4 files changed

+59
-7
lines changed

4 files changed

+59
-7
lines changed

doc/source/whatsnew/v1.4.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,8 @@ Indexing
431431
- Bug in :meth:`DataFrame.drop` where the error message did not show missing labels with commas when raising ``KeyError`` (:issue:`42881`)
432432
- Bug in :meth:`DataFrame.query` where method calls in query strings led to errors when the ``numexpr`` package was installed. (:issue:`22435`)
433433
- Bug in :meth:`DataFrame.nlargest` and :meth:`Series.nlargest` where sorted result did not count indexes containing ``np.nan`` (:issue:`28984`)
434-
434+
- Bug in indexing on a non-unique object-dtype :class:`Index` with an NA scalar (e.g. ``np.nan``) (:issue:`43711`)
435+
-
435436

436437
Missing
437438
^^^^^^^

pandas/_libs/index.pyx

+33-2
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,11 @@ from pandas._libs import (
3434
algos,
3535
hashtable as _hash,
3636
)
37-
from pandas._libs.missing import checknull
37+
38+
from pandas._libs.missing cimport (
39+
checknull,
40+
is_matching_na,
41+
)
3842

3943

4044
cdef inline bint is_definitely_invalid_key(object val):
@@ -146,9 +150,17 @@ cdef class IndexEngine:
146150
cdef:
147151
ndarray[uint8_t, ndim=1, cast=True] indexer
148152

149-
indexer = self.values == val
153+
indexer = self._get_bool_indexer(val)
150154
return self._unpack_bool_indexer(indexer, val)
151155

156+
cdef ndarray _get_bool_indexer(self, object val):
157+
"""
158+
Return a ndarray[bool] of locations where val matches self.values.
159+
160+
If val is not NA, this is equivalent to `self.values == val`
161+
"""
162+
raise NotImplementedError("Implemented by subclasses")
163+
152164
cdef _unpack_bool_indexer(self,
153165
ndarray[uint8_t, ndim=1, cast=True] indexer,
154166
object val):
@@ -420,6 +432,25 @@ cdef class ObjectEngine(IndexEngine):
420432
raise KeyError(val) from err
421433
return loc
422434

435+
cdef ndarray _get_bool_indexer(self, object val):
436+
# We need to check for equality and for matching NAs
437+
cdef:
438+
ndarray values = self.values
439+
440+
if not checknull(val):
441+
return values == val
442+
443+
cdef:
444+
ndarray[uint8_t] result = np.empty(len(values), dtype=np.uint8)
445+
Py_ssize_t i
446+
object item
447+
448+
for i in range(len(values)):
449+
item = values[i]
450+
result[i] = is_matching_na(item, val)
451+
452+
return result.view(bool)
453+
423454

424455
cdef class DatetimeEngine(Int64Engine):
425456

pandas/_libs/index_class_helper.pxi.in

+3-3
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,7 @@ cdef class {{name}}Engine(IndexEngine):
4545
cdef void _call_map_locations(self, ndarray[{{dtype}}_t] values):
4646
self.mapping.map_locations(values)
4747

48-
cdef _maybe_get_bool_indexer(self, object val):
49-
# Returns ndarray[bool] or int
48+
cdef ndarray _get_bool_indexer(self, object val):
5049
cdef:
5150
ndarray[uint8_t, ndim=1, cast=True] indexer
5251
ndarray[{{dtype}}_t, ndim=1] values
@@ -71,6 +70,7 @@ cdef class {{name}}Engine(IndexEngine):
7170
# when trying to cast it to ndarray
7271
raise KeyError(val)
7372

74-
return self._unpack_bool_indexer(indexer, val)
73+
return indexer
74+
7575

7676
{{endfor}}

pandas/tests/indexes/base_class/test_indexing.py

+21-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@
22
import pytest
33

44
import pandas as pd
5-
from pandas import Index
5+
from pandas import (
6+
Index,
7+
NaT,
8+
)
69
import pandas._testing as tm
710

811

@@ -56,3 +59,20 @@ def test_get_loc_tuple_monotonic_above_size_cutoff(self):
5659

5760
res = oidx.get_loc(tup)
5861
assert res == loc
62+
63+
def test_get_loc_nan_object_dtype_nonmonotonic_nonunique(self):
64+
# case that goes through _maybe_get_bool_indexer
65+
idx = Index(["foo", np.nan, None, "foo", 1.0, None], dtype=object)
66+
67+
# we dont raise KeyError on nan
68+
res = idx.get_loc(np.nan)
69+
assert res == 1
70+
71+
# we only match on None, not on np.nan
72+
res = idx.get_loc(None)
73+
expected = np.array([False, False, True, False, False, True])
74+
tm.assert_numpy_array_equal(res, expected)
75+
76+
# we don't match at all on mismatched NA
77+
with pytest.raises(KeyError, match="NaT"):
78+
idx.get_loc(NaT)

0 commit comments

Comments
 (0)