From 85f0c7030ec2eecebd060fc385d5857ca9d08eeb Mon Sep 17 00:00:00 2001
From: Pietro Battiston <me@pietrobattiston.it>
Date: Wed, 13 Jun 2018 10:22:59 +0200
Subject: [PATCH] BUG: fix get_indexer_non_unique with CategoricalIndex key

closes #21448
---
 doc/source/whatsnew/v0.23.2.txt           |  2 +-
 pandas/core/indexes/base.py               |  3 +++
 pandas/core/indexes/category.py           |  7 ++++++-
 pandas/tests/categorical/test_indexing.py | 20 +++++++++++++++++++-
 4 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index ec2eddcfd4d41..611e5c4836c6f 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -55,7 +55,7 @@ Conversion
 Indexing
 ^^^^^^^^
 
--
+- Bug in :meth:`Index.get_indexer_non_unique` with categorical key (:issue:`21448`)
 -
 
 I/O
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index bf1051332ee19..d9e4ef7db1158 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -31,6 +31,7 @@
     is_dtype_equal,
     is_dtype_union_equal,
     is_object_dtype,
+    is_categorical,
     is_categorical_dtype,
     is_interval_dtype,
     is_period_dtype,
@@ -3300,6 +3301,8 @@ def _filter_indexer_tolerance(self, target, indexer, tolerance):
     @Appender(_index_shared_docs['get_indexer_non_unique'] % _index_doc_kwargs)
     def get_indexer_non_unique(self, target):
         target = _ensure_index(target)
+        if is_categorical(target):
+            target = target.astype(target.dtype.categories.dtype)
         pself, ptarget = self._maybe_promote(target)
         if pself is not self or ptarget is not target:
             return pself.get_indexer_non_unique(ptarget)
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 150eca32e229d..587090fa72def 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -598,7 +598,12 @@ def get_indexer_non_unique(self, target):
         target = ibase._ensure_index(target)
 
         if isinstance(target, CategoricalIndex):
-            target = target.categories
+            # Indexing on codes is more efficient if categories are the same:
+            if target.categories is self.categories:
+                target = target.codes
+                indexer, missing = self._engine.get_indexer_non_unique(target)
+                return _ensure_platform_int(indexer), missing
+            target = target.values
 
         codes = self.categories.get_indexer(target)
         indexer, missing = self._engine.get_indexer_non_unique(codes)
diff --git a/pandas/tests/categorical/test_indexing.py b/pandas/tests/categorical/test_indexing.py
index 9c27b1101e5ca..cf7b5cfa55882 100644
--- a/pandas/tests/categorical/test_indexing.py
+++ b/pandas/tests/categorical/test_indexing.py
@@ -5,7 +5,7 @@
 import numpy as np
 
 import pandas.util.testing as tm
-from pandas import Categorical, Index, PeriodIndex
+from pandas import Categorical, Index, CategoricalIndex, PeriodIndex
 from pandas.tests.categorical.common import TestCategorical
 
 
@@ -103,3 +103,21 @@ def f():
             s.categories = [1, 2]
 
         pytest.raises(ValueError, f)
+
+    # Combinations of sorted/unique:
+    @pytest.mark.parametrize("idx_values", [[1, 2, 3, 4], [1, 3, 2, 4],
+                                            [1, 3, 3, 4], [1, 2, 2, 4]])
+    # Combinations of missing/unique
+    @pytest.mark.parametrize("key_values", [[1, 2], [1, 5], [1, 1], [5, 5]])
+    @pytest.mark.parametrize("key_class", [Categorical, CategoricalIndex])
+    def test_get_indexer_non_unique(self, idx_values, key_values, key_class):
+        # GH 21448
+        key = key_class(key_values, categories=range(1, 5))
+        # Test for flat index and CategoricalIndex with same/different cats:
+        for dtype in None, 'category', key.dtype:
+            idx = Index(idx_values, dtype=dtype)
+            expected, exp_miss = idx.get_indexer_non_unique(key_values)
+            result, res_miss = idx.get_indexer_non_unique(key)
+
+            tm.assert_numpy_array_equal(expected, result)
+            tm.assert_numpy_array_equal(exp_miss, res_miss)