Skip to content

Commit ddb76a1

Browse files
Backport PR #41933: BUG: CategoricalIndex.get_loc(np.nan) (#42039)
Co-authored-by: jbrockmendel <[email protected]>
1 parent 82e26fb commit ddb76a1

File tree

4 files changed

+29
-1
lines changed

4 files changed

+29
-1
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1016,6 +1016,7 @@ Indexing
10161016
- Bug in :meth:`DataFrame.loc.__setitem__` when setting-with-expansion incorrectly raising when the index in the expanding axis contained duplicates (:issue:`40096`)
10171017
- Bug in :meth:`DataFrame.loc.__getitem__` with :class:`MultiIndex` casting to float when at least one index column has float dtype and we retrieve a scalar (:issue:`41369`)
10181018
- Bug in :meth:`DataFrame.loc` incorrectly matching non-Boolean index elements (:issue:`20432`)
1019+
- Bug in indexing with ``np.nan`` on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` incorrectly raising ``KeyError`` when ``np.nan`` keys are present (:issue:`41933`)
10191020
- Bug in :meth:`Series.__delitem__` with ``ExtensionDtype`` incorrectly casting to ``ndarray`` (:issue:`40386`)
10201021
- Bug in :meth:`DataFrame.at` with a :class:`CategoricalIndex` returning incorrect results when passed integer keys (:issue:`41846`)
10211022
- Bug in :meth:`DataFrame.loc` returning a :class:`MultiIndex` in the wrong order if an indexer has duplicates (:issue:`40978`)

pandas/core/indexes/category.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,14 @@ def _reindex_non_unique( # type: ignore[override]
483483
# Indexing Methods
484484

485485
def _maybe_cast_indexer(self, key) -> int:
486-
return self._data._unbox_scalar(key)
486+
# GH#41933: we have to do this instead of self._data._validate_scalar
487+
# because this will correctly get partial-indexing on Interval categories
488+
try:
489+
return self._data._unbox_scalar(key)
490+
except KeyError:
491+
if is_valid_na_for_dtype(key, self.categories.dtype):
492+
return -1
493+
raise
487494

488495
def _get_indexer(
489496
self,

pandas/tests/indexes/categorical/test_indexing.py

+7
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,13 @@ def test_get_loc_nonmonotonic_nonunique(self):
198198
expected = np.array([False, True, False, True], dtype=bool)
199199
tm.assert_numpy_array_equal(result, expected)
200200

201+
def test_get_loc_nan(self):
202+
# GH#41933
203+
ci = CategoricalIndex(["A", "B", np.nan])
204+
res = ci.get_loc(np.nan)
205+
206+
assert res == 2
207+
201208

202209
class TestGetIndexer:
203210
def test_get_indexer_base(self):

pandas/tests/indexing/test_categorical.py

+13
Original file line numberDiff line numberDiff line change
@@ -540,3 +540,16 @@ def test_loc_getitem_with_non_string_categories(self, idx_values, ordered):
540540
result.loc[sl, "A"] = ["qux", "qux2"]
541541
expected = DataFrame({"A": ["qux", "qux2", "baz"]}, index=cat_idx)
542542
tm.assert_frame_equal(result, expected)
543+
544+
def test_getitem_categorical_with_nan(self):
545+
# GH#41933
546+
ci = CategoricalIndex(["A", "B", np.nan])
547+
548+
ser = Series(range(3), index=ci)
549+
550+
assert ser[np.nan] == 2
551+
assert ser.loc[np.nan] == 2
552+
553+
df = DataFrame(ser)
554+
assert df.loc[np.nan, 0] == 2
555+
assert df.loc[np.nan][0] == 2

0 commit comments

Comments
 (0)