REF: de-duplicate CategoricalIndex.get_indexer (#37923)

jbrockmendel · web-flow · commit df32e83f36bf · 2020-11-17T20:10:04.000-05:00
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
@@ -526,40 +526,33 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
         if self.is_unique and self.equals(target):
             return np.arange(len(self), dtype="intp")
 
-        # Note: we use engine.get_indexer_non_unique below because, even if
-        #  `target` is unique, any non-category entries in it will be encoded
-        #  as -1 by _get_codes_for_get_indexer, so `codes` may not be unique.
-        codes = self._get_codes_for_get_indexer(target._values)
-        indexer, _ = self._engine.get_indexer_non_unique(codes)
-        return ensure_platform_int(indexer)
+        return self._get_indexer_non_unique(target._values)[0]
 
     @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)
     def get_indexer_non_unique(self, target):
         target = ibase.ensure_index(target)
+        return self._get_indexer_non_unique(target._values)
 
-        codes = self._get_codes_for_get_indexer(target._values)
-        indexer, missing = self._engine.get_indexer_non_unique(codes)
-        return ensure_platform_int(indexer), missing
-
-    def _get_codes_for_get_indexer(self, target: ArrayLike) -> np.ndarray:
+    def _get_indexer_non_unique(self, values: ArrayLike):
         """
-        Extract integer codes we can use for comparison.
-
-        Notes
-        -----
-        If a value in target is not present, it gets coded as -1.
+        get_indexer_non_unique but after unrapping the target Index object.
         """
+        # Note: we use engine.get_indexer_non_unique for get_indexer in addition
+        #  to get_indexer_non_unique because, even if `target` is unique, any
+        #  non-category entries in it will be encoded as -1  so `codes` may
+        #  not be unique.
 
-        if isinstance(target, Categorical):
+        if isinstance(values, Categorical):
             # Indexing on codes is more efficient if categories are the same,
             #  so we can apply some optimizations based on the degree of
             #  dtype-matching.
-            cat = self._data._encode_with_my_categories(target)
+            cat = self._data._encode_with_my_categories(values)
             codes = cat._codes
         else:
-            codes = self.categories.get_indexer(target)
+            codes = self.categories.get_indexer(values)
 
-        return codes
+        indexer, missing = self._engine.get_indexer_non_unique(codes)
+        return ensure_platform_int(indexer), missing
 
     @doc(Index._convert_list_indexer)
     def _convert_list_indexer(self, keyarr):
diff --git a/pandas/tests/indexing/interval/test_interval.py b/pandas/tests/indexing/interval/test_interval.py
@@ -84,7 +84,7 @@ def test_large_series(self):
         tm.assert_series_equal(result1, result3)
 
     def test_loc_getitem_frame(self):
-
+        # CategoricalIndex with IntervalIndex categories
         df = DataFrame({"A": range(10)})
         s = pd.cut(df.A, 5)
         df["B"] = s