From 291a72599fe4b54edc5a6b65ff5ac67db414f1a8 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 11 Jul 2021 11:47:32 -0700
Subject: [PATCH 1/3] REF: implement get_indexer_strict

---
 pandas/core/frame.py         |  2 +-
 pandas/core/indexes/base.py  | 78 +++++++++++++++++++++++++++++++
 pandas/core/indexes/multi.py | 39 ++++++++--------
 pandas/core/indexing.py      | 90 +-----------------------------------
 4 files changed, 102 insertions(+), 107 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 43adb4df7fcb4..841b0fe4f0195 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3456,7 +3456,7 @@ def __getitem__(self, key):
         else:
             if is_iterator(key):
                 key = list(key)
-            indexer = self.loc._get_listlike_indexer(key, axis=1)[1]
+            indexer = self.columns.get_indexer_strict(key, "columns")[1]
 
         # take() does not accept boolean indexers
         if getattr(indexer, "dtype", None) == bool:
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 2f1d85f1340a4..cff022d6db06a 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -5390,6 +5390,84 @@ def get_indexer_for(self, target) -> np.ndarray:
         indexer, _ = self.get_indexer_non_unique(target)
         return indexer
 
+    def get_indexer_strict(self, key, axis_name: str) -> np.ndarray:
+        """
+        Analogue to get_indexer that raises if any elements are missing.
+        """
+        keyarr = key
+        if not isinstance(keyarr, Index):
+            keyarr = com.asarray_tuplesafe(keyarr)
+
+        if self._index_as_unique:
+            indexer = self.get_indexer_for(keyarr)
+            keyarr = self.reindex(keyarr)[0]
+        else:
+            keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr)
+
+        self._raise_if_missing(keyarr, indexer, axis_name)
+
+        if (
+            needs_i8_conversion(self.dtype)
+            or is_categorical_dtype(self.dtype)
+            or is_interval_dtype(self.dtype)
+        ):
+            # For CategoricalIndex take instead of reindex to preserve dtype.
+            #  For IntervalIndex this is to map integers to the Intervals they match to.
+            keyarr = self.take(indexer)
+            if keyarr.dtype.kind in ["m", "M"]:
+                # DTI/TDI.take can infer a freq in some cases when we dont want one
+                if isinstance(key, list) or (
+                    isinstance(key, type(self)) and key.freq is None
+                ):
+                    keyarr = keyarr._with_freq(None)
+
+        return keyarr, indexer
+
+    def _raise_if_missing(self, key, indexer, axis_name: str):
+        """
+        Check that indexer can be used to return a result.
+
+        e.g. at least one element was found,
+        unless the list of keys was actually empty.
+
+        Parameters
+        ----------
+        key : list-like
+            Targeted labels (only used to show correct error message).
+        indexer: array-like of booleans
+            Indices corresponding to the key,
+            (with -1 indicating not found).
+        axis_name : str
+
+        Raises
+        ------
+        KeyError
+            If at least one key was requested but none was found.
+        """
+        if len(key) == 0:
+            return
+
+        # Count missing values
+        missing_mask = indexer < 0
+        nmissing = missing_mask.sum()
+
+        if nmissing:
+
+            # TODO: remove special-case; this is just to keep exception
+            #  message tests from raising while debugging
+            use_interval_msg = is_interval_dtype(self.dtype) or (
+                is_categorical_dtype(self.dtype)
+                and is_interval_dtype(self.categories.dtype)
+            )
+
+            if nmissing == len(indexer):
+                if use_interval_msg:
+                    key = list(key)
+                raise KeyError(f"None of [{key}] are in the [{axis_name}]")
+
+            not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())
+            raise KeyError(f"{not_found} not in index")
+
     @overload
     def _get_indexer_non_comparable(
         self, target: Index, method, unique: Literal[True] = ...
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 8903d29782610..6b02358ab2dcc 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -2542,29 +2542,32 @@ def _get_values_for_loc(self, series: Series, loc, key):
         new_ser = series._constructor(new_values, index=new_index, name=series.name)
         return new_ser.__finalize__(series)
 
-    def _convert_listlike_indexer(self, keyarr) -> np.ndarray | None:
-        """
-        Analogous to get_indexer when we are partial-indexing on our first level.
-
-        Parameters
-        ----------
-        keyarr : Index, np.ndarray, or ExtensionArray
-            Indexer to convert.
+    def get_indexer_strict(self, key, axis_name) -> np.ndarray:
 
-        Returns
-        -------
-        np.ndarray[intp] or None
-        """
-        indexer = None
+        keyarr = key
+        if not isinstance(keyarr, Index):
+            keyarr = com.asarray_tuplesafe(keyarr)
 
-        # are we indexing a specific level
         if len(keyarr) and not isinstance(keyarr[0], tuple):
             _, indexer = self.reindex(keyarr, level=0)
 
-            # take all
             if indexer is None:
+                # exact match
                 indexer = np.arange(len(self), dtype=np.intp)
-                return indexer
+
+            else:
+                self._raise_if_missing(key, indexer, axis_name)
+            return self[indexer], indexer
+
+        return super().get_indexer_strict(key, axis_name)
+
+    def _raise_if_missing(self, key, indexer, axis_name: str):
+        keyarr = key
+        if not isinstance(key, Index):
+            keyarr = com.asarray_tuplesafe(key)
+
+        if len(keyarr) and not isinstance(keyarr[0], tuple):
+            # i.e. same condition for special case in MultiIndex.get_indexer_strict
 
             check = self.levels[0].get_indexer(keyarr)
             mask = check == -1
@@ -2574,8 +2577,8 @@ def _convert_listlike_indexer(self, keyarr) -> np.ndarray | None:
                 # We get here when levels still contain values which are not
                 # actually in Index anymore
                 raise KeyError(f"{keyarr} not in index")
-
-        return indexer
+        else:
+            return super()._raise_if_missing(key, indexer, axis_name)
 
     def _get_partial_string_timestamp_match_key(self, key):
         """
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 387dcca6897b7..f85f74ff7f3b3 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -30,7 +30,6 @@
     is_object_dtype,
     is_scalar,
     is_sequence,
-    needs_i8_conversion,
 )
 from pandas.core.dtypes.concat import concat_compat
 from pandas.core.dtypes.generic import (
@@ -56,11 +55,8 @@
     length_of_indexer,
 )
 from pandas.core.indexes.api import (
-    CategoricalIndex,
     Index,
-    IntervalIndex,
     MultiIndex,
-    ensure_index,
 )
 
 if TYPE_CHECKING:
@@ -1300,94 +1296,12 @@ def _get_listlike_indexer(self, key, axis: int):
             Indexer for the return object, -1 denotes keys not found.
         """
         ax = self.obj._get_axis(axis)
+        axis_name = self.obj._get_axis_name(axis)
 
-        keyarr = key
-        if not isinstance(keyarr, Index):
-            keyarr = com.asarray_tuplesafe(keyarr)
-
-        if isinstance(ax, MultiIndex):
-            # get_indexer expects a MultiIndex or sequence of tuples, but
-            #  we may be doing partial-indexing, so need an extra check
-
-            # Have the index compute an indexer or return None
-            # if it cannot handle:
-            indexer = ax._convert_listlike_indexer(keyarr)
-            # We only act on all found values:
-            if indexer is not None and (indexer != -1).all():
-                # _validate_read_indexer is a no-op if no -1s, so skip
-                return ax[indexer], indexer
-
-        if ax._index_as_unique:
-            indexer = ax.get_indexer_for(keyarr)
-            keyarr = ax.reindex(keyarr)[0]
-        else:
-            keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)
-
-        self._validate_read_indexer(keyarr, indexer, axis)
-
-        if needs_i8_conversion(ax.dtype) or isinstance(
-            ax, (IntervalIndex, CategoricalIndex)
-        ):
-            # For CategoricalIndex take instead of reindex to preserve dtype.
-            #  For IntervalIndex this is to map integers to the Intervals they match to.
-            keyarr = ax.take(indexer)
-            if keyarr.dtype.kind in ["m", "M"]:
-                # DTI/TDI.take can infer a freq in some cases when we dont want one
-                if isinstance(key, list) or (
-                    isinstance(key, type(ax)) and key.freq is None
-                ):
-                    keyarr = keyarr._with_freq(None)
+        keyarr, indexer = ax.get_indexer_strict(key, axis_name)
 
         return keyarr, indexer
 
-    def _validate_read_indexer(self, key, indexer, axis: int):
-        """
-        Check that indexer can be used to return a result.
-
-        e.g. at least one element was found,
-        unless the list of keys was actually empty.
-
-        Parameters
-        ----------
-        key : list-like
-            Targeted labels (only used to show correct error message).
-        indexer: array-like of booleans
-            Indices corresponding to the key,
-            (with -1 indicating not found).
-        axis : int
-            Dimension on which the indexing is being made.
-
-        Raises
-        ------
-        KeyError
-            If at least one key was requested but none was found.
-        """
-        if len(key) == 0:
-            return
-
-        # Count missing values:
-        missing_mask = indexer < 0
-        missing = (missing_mask).sum()
-
-        if missing:
-            ax = self.obj._get_axis(axis)
-
-            # TODO: remove special-case; this is just to keep exception
-            #  message tests from raising while debugging
-            use_interval_msg = isinstance(ax, IntervalIndex) or (
-                isinstance(ax, CategoricalIndex)
-                and isinstance(ax.categories, IntervalIndex)
-            )
-
-            if missing == len(indexer):
-                axis_name = self.obj._get_axis_name(axis)
-                if use_interval_msg:
-                    key = list(key)
-                raise KeyError(f"None of [{key}] are in the [{axis_name}]")
-
-            not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())
-            raise KeyError(f"{not_found} not in index")
-
 
 @doc(IndexingMixin.iloc)
 class _iLocIndexer(_LocationIndexer):

From 4e7e62825c8fc3fc181e1755775114129965b85a Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 11 Jul 2021 11:49:36 -0700
Subject: [PATCH 2/3] privatize get_indexer_strict->_get_indexer_strict

---
 pandas/core/frame.py         | 2 +-
 pandas/core/indexes/base.py  | 2 +-
 pandas/core/indexes/multi.py | 6 +++---
 pandas/core/indexing.py      | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 841b0fe4f0195..ddb91d9329710 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3456,7 +3456,7 @@ def __getitem__(self, key):
         else:
             if is_iterator(key):
                 key = list(key)
-            indexer = self.columns.get_indexer_strict(key, "columns")[1]
+            indexer = self.columns._get_indexer_strict(key, "columns")[1]
 
         # take() does not accept boolean indexers
         if getattr(indexer, "dtype", None) == bool:
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index cff022d6db06a..eb0af1c3802ba 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -5390,7 +5390,7 @@ def get_indexer_for(self, target) -> np.ndarray:
         indexer, _ = self.get_indexer_non_unique(target)
         return indexer
 
-    def get_indexer_strict(self, key, axis_name: str) -> np.ndarray:
+    def _get_indexer_strict(self, key, axis_name: str) -> np.ndarray:
         """
         Analogue to get_indexer that raises if any elements are missing.
         """
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 6b02358ab2dcc..9b89eb10c89a6 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -2542,7 +2542,7 @@ def _get_values_for_loc(self, series: Series, loc, key):
         new_ser = series._constructor(new_values, index=new_index, name=series.name)
         return new_ser.__finalize__(series)
 
-    def get_indexer_strict(self, key, axis_name) -> np.ndarray:
+    def _get_indexer_strict(self, key, axis_name) -> np.ndarray:
 
         keyarr = key
         if not isinstance(keyarr, Index):
@@ -2559,7 +2559,7 @@ def get_indexer_strict(self, key, axis_name) -> np.ndarray:
                 self._raise_if_missing(key, indexer, axis_name)
             return self[indexer], indexer
 
-        return super().get_indexer_strict(key, axis_name)
+        return super()._get_indexer_strict(key, axis_name)
 
     def _raise_if_missing(self, key, indexer, axis_name: str):
         keyarr = key
@@ -2567,7 +2567,7 @@ def _raise_if_missing(self, key, indexer, axis_name: str):
             keyarr = com.asarray_tuplesafe(key)
 
         if len(keyarr) and not isinstance(keyarr[0], tuple):
-            # i.e. same condition for special case in MultiIndex.get_indexer_strict
+            # i.e. same condition for special case in MultiIndex._get_indexer_strict
 
             check = self.levels[0].get_indexer(keyarr)
             mask = check == -1
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index f85f74ff7f3b3..371cf58f593ce 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1298,7 +1298,7 @@ def _get_listlike_indexer(self, key, axis: int):
         ax = self.obj._get_axis(axis)
         axis_name = self.obj._get_axis_name(axis)
 
-        keyarr, indexer = ax.get_indexer_strict(key, axis_name)
+        keyarr, indexer = ax._get_indexer_strict(key, axis_name)
 
         return keyarr, indexer
 

From 79a2e263baf5c47437e4d213b51792518b74accd Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 11 Jul 2021 14:38:10 -0700
Subject: [PATCH 3/3] mypy fixup

---
 pandas/core/indexes/base.py  | 13 +++++++++----
 pandas/core/indexes/multi.py |  2 +-
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index eb0af1c3802ba..1d8497545f50a 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -5390,7 +5390,7 @@ def get_indexer_for(self, target) -> np.ndarray:
         indexer, _ = self.get_indexer_non_unique(target)
         return indexer
 
-    def _get_indexer_strict(self, key, axis_name: str) -> np.ndarray:
+    def _get_indexer_strict(self, key, axis_name: str_t) -> tuple[Index, np.ndarray]:
         """
         Analogue to get_indexer that raises if any elements are missing.
         """
@@ -5417,13 +5417,15 @@ def _get_indexer_strict(self, key, axis_name: str) -> np.ndarray:
             if keyarr.dtype.kind in ["m", "M"]:
                 # DTI/TDI.take can infer a freq in some cases when we dont want one
                 if isinstance(key, list) or (
-                    isinstance(key, type(self)) and key.freq is None
+                    isinstance(key, type(self))
+                    # "Index" has no attribute "freq"
+                    and key.freq is None  # type: ignore[attr-defined]
                 ):
                     keyarr = keyarr._with_freq(None)
 
         return keyarr, indexer
 
-    def _raise_if_missing(self, key, indexer, axis_name: str):
+    def _raise_if_missing(self, key, indexer, axis_name: str_t):
         """
         Check that indexer can be used to return a result.
 
@@ -5457,7 +5459,10 @@ def _raise_if_missing(self, key, indexer, axis_name: str):
             #  message tests from raising while debugging
             use_interval_msg = is_interval_dtype(self.dtype) or (
                 is_categorical_dtype(self.dtype)
-                and is_interval_dtype(self.categories.dtype)
+                # "Index" has no attribute "categories"  [attr-defined]
+                and is_interval_dtype(
+                    self.categories.dtype  # type: ignore[attr-defined]
+                )
             )
 
             if nmissing == len(indexer):
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 9b89eb10c89a6..87d83dfce052d 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -2542,7 +2542,7 @@ def _get_values_for_loc(self, series: Series, loc, key):
         new_ser = series._constructor(new_values, index=new_index, name=series.name)
         return new_ser.__finalize__(series)
 
-    def _get_indexer_strict(self, key, axis_name) -> np.ndarray:
+    def _get_indexer_strict(self, key, axis_name: str) -> tuple[Index, np.ndarray]:
 
         keyarr = key
         if not isinstance(keyarr, Index):