From add19006da9ad2cab63a9efb5ed00d00435de010 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 10 Jun 2021 14:55:29 -0700
Subject: [PATCH 1/4] REF+BUG: IntervalIndex.get_indexer with categorical both
 have nans

---
 pandas/core/indexes/base.py                   | 16 +++++++++++++++
 pandas/core/indexes/interval.py               | 13 +-----------
 .../tests/indexes/interval/test_indexing.py   | 20 +++++++++++++++++++
 3 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 73f21f908d55d..f584318b77add 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -3403,6 +3403,22 @@ def get_indexer(
             #  matched to Interval scalars
             return self._get_indexer_non_comparable(target, method=method, unique=True)
 
+        if is_categorical_dtype(target.dtype) and not is_categorical_dtype(self.dtype):
+            # TODO: we can remove the self.dtype condition following GH#41933
+            # potential fastpath
+            # get an indexer for unique categories then propagate to codes via take_nd
+            categories_indexer = self.get_indexer(target.categories)
+            indexer = algos.take_nd(categories_indexer, target.codes, fill_value=-1)
+
+            if (not self._is_multi and self.hasnans) and target.hasnans:
+                # Exclude MultiIndex because hasnans raises NotImplementedError
+                # we should only get here if we are unique, so loc is an integer
+                loc = self.get_loc(np.nan)
+                mask = target.isna()
+                indexer[mask] = loc
+
+            return ensure_platform_int(indexer)
+
         pself, ptarget = self._maybe_promote(target)
         if pself is not self or ptarget is not target:
             return pself.get_indexer(
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index 7c96336103212..85ff5f482191f 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -7,10 +7,8 @@
 )
 import textwrap
 from typing import (
-    TYPE_CHECKING,
     Any,
     Hashable,
-    cast,
 )
 
 import numpy as np
@@ -46,7 +44,6 @@
 )
 from pandas.core.dtypes.common import (
     ensure_platform_int,
-    is_categorical_dtype,
     is_datetime64tz_dtype,
     is_datetime_or_timedelta_dtype,
     is_dtype_equal,
@@ -63,7 +60,6 @@
 from pandas.core.dtypes.dtypes import IntervalDtype
 from pandas.core.dtypes.missing import is_valid_na_for_dtype
 
-from pandas.core.algorithms import take_nd
 from pandas.core.arrays.interval import (
     IntervalArray,
     _interval_shared_docs,
@@ -91,9 +87,6 @@
     timedelta_range,
 )
 
-if TYPE_CHECKING:
-    from pandas import CategoricalIndex
-
 _index_doc_kwargs = dict(ibase._index_doc_kwargs)
 
 _index_doc_kwargs.update(
@@ -668,11 +661,7 @@ def _get_indexer(
             left_indexer = self.left.get_indexer(target.left)
             right_indexer = self.right.get_indexer(target.right)
             indexer = np.where(left_indexer == right_indexer, left_indexer, -1)
-        elif is_categorical_dtype(target.dtype):
-            target = cast("CategoricalIndex", target)
-            # get an indexer for unique categories then propagate to codes via take_nd
-            categories_indexer = self.get_indexer(target.categories)
-            indexer = take_nd(categories_indexer, target.codes, fill_value=-1)
+
         elif not is_object_dtype(target):
             # homogeneous scalar index: use IntervalTree
             target = self._maybe_convert_i8(target)
diff --git a/pandas/tests/indexes/interval/test_indexing.py b/pandas/tests/indexes/interval/test_indexing.py
index a5a921f42c3ef..dac044b0eaa95 100644
--- a/pandas/tests/indexes/interval/test_indexing.py
+++ b/pandas/tests/indexes/interval/test_indexing.py
@@ -275,6 +275,26 @@ def test_get_indexer_categorical(self, target, ordered):
         expected = index.get_indexer(target)
         tm.assert_numpy_array_equal(result, expected)
 
+    def test_get_indexer_categorical_with_nans(self):
+        # nans in both index and in target
+        ii = IntervalIndex.from_breaks(range(5))
+        ii2 = ii.append(IntervalIndex([np.nan]))
+        ci2 = CategoricalIndex(ii2)
+
+        result = ii2.get_indexer(ci2)
+        expected = np.arange(5, dtype=np.intp)
+        tm.assert_numpy_array_equal(result, expected)
+
+        # not-all-matches
+        result = ii2[1:].get_indexer(ci2[::-1])
+        expected = np.array([3, 2, 1, 0, -1], dtype=np.intp)
+        tm.assert_numpy_array_equal(result, expected)
+
+        # non-unique target, non-unique nans
+        result = ii2.get_indexer(ci2.append(ci2))
+        expected = np.array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4], dtype=np.intp)
+        tm.assert_numpy_array_equal(result, expected)
+
     @pytest.mark.parametrize(
         "tuples, closed",
         [

From 819db87cd85d03d9a0b6bf2f34bbc088baee7b05 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 10 Jun 2021 14:57:21 -0700
Subject: [PATCH 2/4] whatsnew

---
 doc/source/whatsnew/v1.3.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index d7a6c2c3f0e1a..603396fb8487e 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -957,6 +957,7 @@ Indexing
 
 - Bug in :meth:`Index.union` and :meth:`MultiIndex.union` dropping duplicate ``Index`` values when ``Index`` was not monotonic or ``sort`` was set to ``False`` (:issue:`36289`, :issue:`31326`, :issue:`40862`)
 - Bug in :meth:`CategoricalIndex.get_indexer` failing to raise ``InvalidIndexError`` when non-unique (:issue:`38372`)
+- Bug in :meth:`IntervalIndex.get_indexer` when ``target`` has ``CategoricalDtype`` and both the index and the target contain NA values (:issue:`??`)
 - Bug in :meth:`Series.loc` raising a ``ValueError`` when input was filtered with a Boolean list and values to set were a list with lower dimension (:issue:`20438`)
 - Bug in inserting many new columns into a :class:`DataFrame` causing incorrect subsequent indexing behavior (:issue:`38380`)
 - Bug in :meth:`DataFrame.__setitem__` raising a ``ValueError`` when setting multiple values to duplicate columns (:issue:`15695`)

From b6251fa9aa1e51d32cfb62575e2be974135258fb Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 10 Jun 2021 14:59:11 -0700
Subject: [PATCH 3/4] GH refs

---
 doc/source/whatsnew/v1.3.0.rst                 | 2 +-
 pandas/core/indexes/base.py                    | 1 +
 pandas/tests/indexes/interval/test_indexing.py | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 603396fb8487e..afc4875d2ee29 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -957,7 +957,7 @@ Indexing
 
 - Bug in :meth:`Index.union` and :meth:`MultiIndex.union` dropping duplicate ``Index`` values when ``Index`` was not monotonic or ``sort`` was set to ``False`` (:issue:`36289`, :issue:`31326`, :issue:`40862`)
 - Bug in :meth:`CategoricalIndex.get_indexer` failing to raise ``InvalidIndexError`` when non-unique (:issue:`38372`)
-- Bug in :meth:`IntervalIndex.get_indexer` when ``target`` has ``CategoricalDtype`` and both the index and the target contain NA values (:issue:`??`)
+- Bug in :meth:`IntervalIndex.get_indexer` when ``target`` has ``CategoricalDtype`` and both the index and the target contain NA values (:issue:`41934`)
 - Bug in :meth:`Series.loc` raising a ``ValueError`` when input was filtered with a Boolean list and values to set were a list with lower dimension (:issue:`20438`)
 - Bug in inserting many new columns into a :class:`DataFrame` causing incorrect subsequent indexing behavior (:issue:`38380`)
 - Bug in :meth:`DataFrame.__setitem__` raising a ``ValueError`` when setting multiple values to duplicate columns (:issue:`15695`)
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index f584318b77add..7efce6e836924 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -3413,6 +3413,7 @@ def get_indexer(
             if (not self._is_multi and self.hasnans) and target.hasnans:
                 # Exclude MultiIndex because hasnans raises NotImplementedError
                 # we should only get here if we are unique, so loc is an integer
+                # GH#41934
                 loc = self.get_loc(np.nan)
                 mask = target.isna()
                 indexer[mask] = loc
diff --git a/pandas/tests/indexes/interval/test_indexing.py b/pandas/tests/indexes/interval/test_indexing.py
index dac044b0eaa95..aa3359d775c5a 100644
--- a/pandas/tests/indexes/interval/test_indexing.py
+++ b/pandas/tests/indexes/interval/test_indexing.py
@@ -276,7 +276,7 @@ def test_get_indexer_categorical(self, target, ordered):
         tm.assert_numpy_array_equal(result, expected)
 
     def test_get_indexer_categorical_with_nans(self):
-        # nans in both index and in target
+        # GH#41934 nans in both index and in target
         ii = IntervalIndex.from_breaks(range(5))
         ii2 = ii.append(IntervalIndex([np.nan]))
         ci2 = CategoricalIndex(ii2)

From 1b3162c52f0b68719ff46a351a6ab2b519323fb4 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Tue, 15 Jun 2021 17:37:32 -0700
Subject: [PATCH 4/4] remove now-unnecessary check

---
 pandas/core/indexes/base.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index f420a3a561bbf..dc0343a6199e7 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -3403,8 +3403,7 @@ def get_indexer(
             #  matched to Interval scalars
             return self._get_indexer_non_comparable(target, method=method, unique=True)
 
-        if is_categorical_dtype(target.dtype) and not is_categorical_dtype(self.dtype):
-            # TODO: we can remove the self.dtype condition following GH#41933
+        if is_categorical_dtype(target.dtype):
             # potential fastpath
             # get an indexer for unique categories then propagate to codes via take_nd
             categories_indexer = self.get_indexer(target.categories)