diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index d11d4952ff1c7..e561e994778e9 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -521,40 +521,33 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): if self.is_unique and self.equals(target): return np.arange(len(self), dtype="intp") - # Note: we use engine.get_indexer_non_unique below because, even if - # `target` is unique, any non-category entries in it will be encoded - # as -1 by _get_codes_for_get_indexer, so `codes` may not be unique. - codes = self._get_codes_for_get_indexer(target._values) - indexer, _ = self._engine.get_indexer_non_unique(codes) - return ensure_platform_int(indexer) + return self._get_indexer_non_unique(target._values)[0] @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) def get_indexer_non_unique(self, target): target = ibase.ensure_index(target) + return self._get_indexer_non_unique(target._values) - codes = self._get_codes_for_get_indexer(target._values) - indexer, missing = self._engine.get_indexer_non_unique(codes) - return ensure_platform_int(indexer), missing - - def _get_codes_for_get_indexer(self, target: ArrayLike) -> np.ndarray: + def _get_indexer_non_unique(self, values: ArrayLike): """ - Extract integer codes we can use for comparison. - - Notes - ----- - If a value in target is not present, it gets coded as -1. + get_indexer_non_unique but after unrapping the target Index object. """ + # Note: we use engine.get_indexer_non_unique for get_indexer in addition + # to get_indexer_non_unique because, even if `target` is unique, any + # non-category entries in it will be encoded as -1 so `codes` may + # not be unique. - if isinstance(target, Categorical): + if isinstance(values, Categorical): # Indexing on codes is more efficient if categories are the same, # so we can apply some optimizations based on the degree of # dtype-matching. - cat = self._data._encode_with_my_categories(target) + cat = self._data._encode_with_my_categories(values) codes = cat._codes else: - codes = self.categories.get_indexer(target) + codes = self.categories.get_indexer(values) - return codes + indexer, missing = self._engine.get_indexer_non_unique(codes) + return ensure_platform_int(indexer), missing @doc(Index._convert_list_indexer) def _convert_list_indexer(self, keyarr): diff --git a/pandas/tests/indexing/interval/test_interval.py b/pandas/tests/indexing/interval/test_interval.py index df59d09edd3ef..0fa6abb27cb61 100644 --- a/pandas/tests/indexing/interval/test_interval.py +++ b/pandas/tests/indexing/interval/test_interval.py @@ -84,7 +84,7 @@ def test_large_series(self): tm.assert_series_equal(result1, result3) def test_loc_getitem_frame(self): - + # CategoricalIndex with IntervalIndex categories df = DataFrame({"A": range(10)}) s = pd.cut(df.A, 5) df["B"] = s