From 8c8f752de522b7df3c8ed5bf5a11167df39b3117 Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.gorelli@ws-1808.seri.co.uk>
Date: Tue, 18 Feb 2020 18:11:11 +0000
Subject: [PATCH 1/6] add int8_t and int16_t to join_t, make sure correct index
 is returned for categorical joins

---
 pandas/_libs/join.pyx                    |  2 ++
 pandas/core/indexes/base.py              |  3 +++
 pandas/tests/reshape/merge/test_merge.py | 14 ++++++++++++++
 3 files changed, 19 insertions(+)

diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx
index 093c53790cd35..6a5baa9896f05 100644
--- a/pandas/_libs/join.pyx
+++ b/pandas/_libs/join.pyx
@@ -242,6 +242,8 @@ ctypedef fused join_t:
     float64_t
     float32_t
     object
+    int8_t
+    int16_t
     int32_t
     int64_t
     uint64_t
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 14ee21ea5614c..c7bf9e713d452 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -3822,6 +3822,9 @@ def _join_monotonic(self, other, how="left", return_indexers=False):
                 join_index, lidx, ridx = self._outer_indexer(sv, ov)
             join_index = self._wrap_joined_index(join_index, other)
 
+        if self._typ == "categoricalindex":
+            join_index = self._create_from_codes(join_index)
+
         if return_indexers:
             lidx = None if lidx is None else ensure_platform_int(lidx)
             ridx = None if ridx is None else ensure_platform_int(ridx)
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index 4f2cd878df613..673437aa45325 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -2163,3 +2163,17 @@ def test_merge_datetime_upcast_dtype():
         }
     )
     tm.assert_frame_equal(result, expected)
+
+
+def test_categorical_non_unique_monotonic():
+    # GH 28189
+    df = DataFrame(range(4), columns=["value"], index=CategoricalIndex(["1"] * 4))
+    df2 = DataFrame([[6]], columns=["value"], index=CategoricalIndex(["1"]))
+
+    result = merge(df, df2, how="left", left_index=True, right_index=True)
+    expected = DataFrame(
+        [[0, 6], [1, 6], [2, 6], [3, 6]],
+        columns=["value_x", "value_y"],
+        index=CategoricalIndex(["1"] * 4),
+    )
+    tm.assert_frame_equal(expected, result)

From 98f5d9d565a828ddfdd1d0fb81465f6a64d8db82 Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.gorelli@ws-1808.seri.co.uk>
Date: Wed, 19 Feb 2020 14:27:43 +0000
Subject: [PATCH 2/6] refactor

---
 doc/source/whatsnew/v1.1.0.rst  | 2 +-
 pandas/core/indexes/base.py     | 3 ---
 pandas/core/indexes/category.py | 5 +++++
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 13827e8fc4c33..c5629d0b03060 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -102,7 +102,7 @@ Bug fixes
 Categorical
 ^^^^^^^^^^^
 
--
+- Bug where :func:`merge` was unable to join on non-unique categorical indices (:issue:`28189`)
 -
 
 Datetimelike
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index c7bf9e713d452..14ee21ea5614c 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -3822,9 +3822,6 @@ def _join_monotonic(self, other, how="left", return_indexers=False):
                 join_index, lidx, ridx = self._outer_indexer(sv, ov)
             join_index = self._wrap_joined_index(join_index, other)
 
-        if self._typ == "categoricalindex":
-            join_index = self._create_from_codes(join_index)
-
         if return_indexers:
             lidx = None if lidx is None else ensure_platform_int(lidx)
             ridx = None if ridx is None else ensure_platform_int(ridx)
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index adb2ed9211bfe..ea282227ddbf7 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -28,6 +28,7 @@
 from pandas.core.indexes.base import Index, _index_shared_docs, maybe_extract_name
 from pandas.core.indexes.extension import ExtensionIndex, inherit_names
 import pandas.core.missing as missing
+from pandas.core.ops import get_op_result_name
 
 _index_doc_kwargs = dict(ibase._index_doc_kwargs)
 _index_doc_kwargs.update(dict(target_klass="CategoricalIndex"))
@@ -781,6 +782,10 @@ def _delegate_method(self, name: str, *args, **kwargs):
             return res
         return CategoricalIndex(res, name=self.name)
 
+    def _wrap_joined_index(self, joined, other):
+        name = get_op_result_name(self, other)
+        return self._create_from_codes(joined, name=name)
+
 
 CategoricalIndex._add_numeric_methods_add_sub_disabled()
 CategoricalIndex._add_numeric_methods_disabled()

From fd371c13b374111de9bf7ad57b464a9336e89df9 Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.gorelli@ws-1808.seri.co.uk>
Date: Thu, 20 Feb 2020 16:58:42 +0000
Subject: [PATCH 3/6] type

---
 pandas/core/indexes/category.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index ea282227ddbf7..0d57f2fa476e7 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -782,7 +782,9 @@ def _delegate_method(self, name: str, *args, **kwargs):
             return res
         return CategoricalIndex(res, name=self.name)
 
-    def _wrap_joined_index(self, joined, other):
+    def _wrap_joined_index(
+        self, joined: np.ndarray, other: "CategoricalIndex"
+    ) -> "CategoricalIndex":
         name = get_op_result_name(self, other)
         return self._create_from_codes(joined, name=name)
 

From fb58340330657be7d4e5831c92ee61961229055c Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.gorelli@ws-1808.seri.co.uk>
Date: Fri, 21 Feb 2020 17:03:23 +0000
Subject: [PATCH 4/6] test for int8 and int16

---
 pandas/tests/reshape/merge/test_merge.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index 673437aa45325..194e3eadea1b5 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -2165,15 +2165,21 @@ def test_merge_datetime_upcast_dtype():
     tm.assert_frame_equal(result, expected)
 
 
-def test_categorical_non_unique_monotonic():
+@pytest.mark.parametrize("n_categories", [5, 128])
+def test_categorical_non_unique_monotonic(n_categories):
     # GH 28189
-    df = DataFrame(range(4), columns=["value"], index=CategoricalIndex(["1"] * 4))
-    df2 = DataFrame([[6]], columns=["value"], index=CategoricalIndex(["1"]))
+    left_index = CategoricalIndex([0] + list(range(n_categories)))
+    df = DataFrame(range(n_categories + 1), columns=["value"], index=left_index)
+    df2 = DataFrame(
+        [[6]],
+        columns=["value"],
+        index=CategoricalIndex([0], categories=np.arange(n_categories)),
+    )
 
     result = merge(df, df2, how="left", left_index=True, right_index=True)
     expected = DataFrame(
-        [[0, 6], [1, 6], [2, 6], [3, 6]],
+        [[i, 6.0] if i < 2 else [i, np.nan] for i in range(n_categories + 1)],
         columns=["value_x", "value_y"],
-        index=CategoricalIndex(["1"] * 4),
+        index=left_index,
     )
     tm.assert_frame_equal(expected, result)

From 44673c6dfdb6ab13bb3a4b61f4206f0572f52673 Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.gorelli@ws-1808.seri.co.uk>
Date: Tue, 25 Feb 2020 10:49:04 +0000
Subject: [PATCH 5/6] add comment to test

---
 pandas/tests/reshape/merge/test_merge.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index 106170e104735..803c662a0569f 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -2168,6 +2168,8 @@ def test_merge_datetime_upcast_dtype():
 @pytest.mark.parametrize("n_categories", [5, 128])
 def test_categorical_non_unique_monotonic(n_categories):
     # GH 28189
+    # With n_categories as 5, we test the int8 case,
+    # with n_categories as 128, we test the int16 case.
     left_index = CategoricalIndex([0] + list(range(n_categories)))
     df1 = DataFrame(range(n_categories + 1), columns=["value"], index=left_index)
     df2 = DataFrame(

From 4a994d53d01328bc87fa5348f2a32bbea51d78a5 Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.gorelli@ws-1808.seri.co.uk>
Date: Tue, 25 Feb 2020 10:53:50 +0000
Subject: [PATCH 6/6] clarify

---
 pandas/tests/reshape/merge/test_merge.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index 803c662a0569f..d80e2e7afceef 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -2168,8 +2168,8 @@ def test_merge_datetime_upcast_dtype():
 @pytest.mark.parametrize("n_categories", [5, 128])
 def test_categorical_non_unique_monotonic(n_categories):
     # GH 28189
-    # With n_categories as 5, we test the int8 case,
-    # with n_categories as 128, we test the int16 case.
+    # With n_categories as 5, we test the int8 case is hit in libjoin,
+    # with n_categories as 128 we test the int16 case.
     left_index = CategoricalIndex([0] + list(range(n_categories)))
     df1 = DataFrame(range(n_categories + 1), columns=["value"], index=left_index)
     df2 = DataFrame(