diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 9aebcad1d8cae..b316b4ff2d688 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -269,6 +269,7 @@ Indexing - Bug in :meth:`DataFrame.mask` with ``inplace=True`` and ``ExtensionDtype`` columns incorrectly raising (:issue:`45577`) - Bug in getting a column from a DataFrame with an object-dtype row index with datetime-like values: the resulting Series now preserves the exact object-dtype Index from the parent DataFrame (:issue:`42950`) - Bug in indexing on a :class:`DatetimeIndex` with a ``np.str_`` key incorrectly raising (:issue:`45580`) +- Bug in :meth:`CategoricalIndex.get_indexer` when index contains ``NaN`` values, resulting in elements that are in target but not present in the index to be mapped to the index of the NaN element, instead of -1 (:issue:`45361`) - Missing diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 99396c6986043..c37a52ddb5491 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3791,6 +3791,7 @@ def get_indexer( tolerance=None, ) -> npt.NDArray[np.intp]: method = missing.clean_reindex_fill_method(method) + orig_target = target target = self._maybe_cast_listlike_indexer(target) self._check_indexing_method(method, limit, tolerance) @@ -3813,9 +3814,15 @@ def get_indexer( indexer = self._engine.get_indexer(target.codes) if self.hasnans and target.hasnans: + # After _maybe_cast_listlike_indexer, target elements which do not + # belong to some category are changed to NaNs + # Mask to track actual NaN values compared to inserted NaN values + # GH#45361 + target_nans = isna(orig_target) loc = self.get_loc(np.nan) mask = target.isna() - indexer[mask] = loc + indexer[target_nans] = loc + indexer[mask & ~target_nans] = -1 return indexer if is_categorical_dtype(target.dtype): diff --git a/pandas/tests/indexes/categorical/test_indexing.py b/pandas/tests/indexes/categorical/test_indexing.py index 2297f8cf87209..588486452fc20 100644 --- a/pandas/tests/indexes/categorical/test_indexing.py +++ b/pandas/tests/indexes/categorical/test_indexing.py @@ -298,6 +298,18 @@ def test_get_indexer_same_categories_different_order(self): expected = np.array([1, 1], dtype="intp") tm.assert_numpy_array_equal(result, expected) + def test_get_indexer_nans_in_index_and_target(self): + # GH 45361 + ci = CategoricalIndex([1, 2, np.nan, 3]) + other1 = [2, 3, 4, np.nan] + res1 = ci.get_indexer(other1) + expected1 = np.array([1, 3, -1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(res1, expected1) + other2 = [1, 4, 2, 3] + res2 = ci.get_indexer(other2) + expected2 = np.array([0, -1, 1, 3], dtype=np.intp) + tm.assert_numpy_array_equal(res2, expected2) + class TestWhere: def test_where(self, listlike_box):