diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 414794dd6a56e..2129025b2c071 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -1014,6 +1014,7 @@ Indexing - Bug in :meth:`DataFrame.loc.__setitem__` when setting-with-expansion incorrectly raising when the index in the expanding axis contained duplicates (:issue:`40096`) - Bug in :meth:`DataFrame.loc.__getitem__` with :class:`MultiIndex` casting to float when at least one index column has float dtype and we retrieve a scalar (:issue:`41369`) - Bug in :meth:`DataFrame.loc` incorrectly matching non-Boolean index elements (:issue:`20432`) +- Bug in indexing with ``np.nan`` on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` incorrectly raising ``KeyError`` when ``np.nan`` keys are present (:issue:`41933`) - Bug in :meth:`Series.__delitem__` with ``ExtensionDtype`` incorrectly casting to ``ndarray`` (:issue:`40386`) - Bug in :meth:`DataFrame.loc` returning a :class:`MultiIndex` in the wrong order if an indexer has duplicates (:issue:`40978`) - Bug in :meth:`DataFrame.__setitem__` raising a ``TypeError`` when using a ``str`` subclass as the column name with a :class:`DatetimeIndex` (:issue:`37366`) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 228f58d47b8ed..1bda05f3ce5df 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -483,7 +483,14 @@ def _reindex_non_unique( # type: ignore[override] # Indexing Methods def _maybe_cast_indexer(self, key) -> int: - return self._data._unbox_scalar(key) + # GH#41933: we have to do this instead of self._data._validate_scalar + # because this will correctly get partial-indexing on Interval categories + try: + return self._data._unbox_scalar(key) + except KeyError: + if is_valid_na_for_dtype(key, self.categories.dtype): + return -1 + raise def _get_indexer( self, diff --git a/pandas/tests/indexes/categorical/test_indexing.py b/pandas/tests/indexes/categorical/test_indexing.py index 0e0849fdb8dcf..b4a42cf137495 100644 --- a/pandas/tests/indexes/categorical/test_indexing.py +++ b/pandas/tests/indexes/categorical/test_indexing.py @@ -198,6 +198,13 @@ def test_get_loc_nonmonotonic_nonunique(self): expected = np.array([False, True, False, True], dtype=bool) tm.assert_numpy_array_equal(result, expected) + def test_get_loc_nan(self): + # GH#41933 + ci = CategoricalIndex(["A", "B", np.nan]) + res = ci.get_loc(np.nan) + + assert res == 2 + class TestGetIndexer: def test_get_indexer_base(self): diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index cd49620f45fae..23f7192724540 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -540,3 +540,16 @@ def test_loc_getitem_with_non_string_categories(self, idx_values, ordered): result.loc[sl, "A"] = ["qux", "qux2"] expected = DataFrame({"A": ["qux", "qux2", "baz"]}, index=cat_idx) tm.assert_frame_equal(result, expected) + + def test_getitem_categorical_with_nan(self): + # GH#41933 + ci = CategoricalIndex(["A", "B", np.nan]) + + ser = Series(range(3), index=ci) + + assert ser[np.nan] == 2 + assert ser.loc[np.nan] == 2 + + df = DataFrame(ser) + assert df.loc[np.nan, 0] == 2 + assert df.loc[np.nan][0] == 2