Skip to content

Commit e6122c6

Browse files
jbrockmendelmeeseeksmachine
authored andcommitted
Backport PR pandas-dev#41933: BUG: CategoricalIndex.get_loc(np.nan)
1 parent 0c1e59c commit e6122c6

File tree

4 files changed

+29
-1
lines changed

4 files changed

+29
-1
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1014,6 +1014,7 @@ Indexing
10141014
- Bug in :meth:`DataFrame.loc.__setitem__` when setting-with-expansion incorrectly raising when the index in the expanding axis contained duplicates (:issue:`40096`)
10151015
- Bug in :meth:`DataFrame.loc.__getitem__` with :class:`MultiIndex` casting to float when at least one index column has float dtype and we retrieve a scalar (:issue:`41369`)
10161016
- Bug in :meth:`DataFrame.loc` incorrectly matching non-Boolean index elements (:issue:`20432`)
1017+
- Bug in indexing with ``np.nan`` on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` incorrectly raising ``KeyError`` when ``np.nan`` keys are present (:issue:`41933`)
10171018
- Bug in :meth:`Series.__delitem__` with ``ExtensionDtype`` incorrectly casting to ``ndarray`` (:issue:`40386`)
10181019
- Bug in :meth:`DataFrame.loc` returning a :class:`MultiIndex` in the wrong order if an indexer has duplicates (:issue:`40978`)
10191020
- Bug in :meth:`DataFrame.__setitem__` raising a ``TypeError`` when using a ``str`` subclass as the column name with a :class:`DatetimeIndex` (:issue:`37366`)

pandas/core/indexes/category.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,14 @@ def _reindex_non_unique( # type: ignore[override]
483483
# Indexing Methods
484484

485485
def _maybe_cast_indexer(self, key) -> int:
486-
return self._data._unbox_scalar(key)
486+
# GH#41933: we have to do this instead of self._data._validate_scalar
487+
# because this will correctly get partial-indexing on Interval categories
488+
try:
489+
return self._data._unbox_scalar(key)
490+
except KeyError:
491+
if is_valid_na_for_dtype(key, self.categories.dtype):
492+
return -1
493+
raise
487494

488495
def _get_indexer(
489496
self,

pandas/tests/indexes/categorical/test_indexing.py

+7
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,13 @@ def test_get_loc_nonmonotonic_nonunique(self):
198198
expected = np.array([False, True, False, True], dtype=bool)
199199
tm.assert_numpy_array_equal(result, expected)
200200

201+
def test_get_loc_nan(self):
202+
# GH#41933
203+
ci = CategoricalIndex(["A", "B", np.nan])
204+
res = ci.get_loc(np.nan)
205+
206+
assert res == 2
207+
201208

202209
class TestGetIndexer:
203210
def test_get_indexer_base(self):

pandas/tests/indexing/test_categorical.py

+13
Original file line numberDiff line numberDiff line change
@@ -540,3 +540,16 @@ def test_loc_getitem_with_non_string_categories(self, idx_values, ordered):
540540
result.loc[sl, "A"] = ["qux", "qux2"]
541541
expected = DataFrame({"A": ["qux", "qux2", "baz"]}, index=cat_idx)
542542
tm.assert_frame_equal(result, expected)
543+
544+
def test_getitem_categorical_with_nan(self):
545+
# GH#41933
546+
ci = CategoricalIndex(["A", "B", np.nan])
547+
548+
ser = Series(range(3), index=ci)
549+
550+
assert ser[np.nan] == 2
551+
assert ser.loc[np.nan] == 2
552+
553+
df = DataFrame(ser)
554+
assert df.loc[np.nan, 0] == 2
555+
assert df.loc[np.nan][0] == 2

0 commit comments

Comments
 (0)