From 24cc97e61a9edfbd1a42cb4e7d5ed68089dc792b Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 17 Mar 2021 12:18:31 -0700
Subject: [PATCH 1/3] TYP: index.pyi

---
 pandas/_libs/index.pyi              | 88 +++++++++++++++++++++++++++++
 pandas/_libs/index.pyx              | 34 +++++------
 pandas/core/arrays/string_.py       |  7 +--
 pandas/core/groupby/ops.py          | 10 +---
 pandas/core/indexes/base.py         |  6 +-
 pandas/core/indexes/datetimelike.py |  3 +-
 pandas/core/indexes/multi.py        | 12 +++-
 pandas/core/strings/object_array.py |  3 +-
 8 files changed, 127 insertions(+), 36 deletions(-)
 create mode 100644 pandas/_libs/index.pyi

diff --git a/pandas/_libs/index.pyi b/pandas/_libs/index.pyi
new file mode 100644
index 0000000000000..0ea635f1af049
--- /dev/null
+++ b/pandas/_libs/index.pyi
@@ -0,0 +1,88 @@
+from typing import Optional
+
+import numpy as np
+
+class IndexEngine:
+    over_size_threshold: bool
+
+    def __init__(self, vgetter, n: int): ...
+
+    def __contains__(self, val: object) -> bool: ...
+
+    # -> int | slice | np.ndarray[bool]
+    def get_loc(self, val: object) -> int | slice | np.ndarray: ...
+
+    def sizeof(self, deep: bool = False) -> int: ...
+    def __sizeof__(self) -> int: ...
+
+    @property
+    def is_unique(self) -> bool: ...
+
+    @property
+    def is_monotonic_increasing(self) -> bool: ...
+
+    @property
+    def is_monotonic_decreasing(self) -> bool: ...
+
+    def get_backfill_indexer(self, other: np.ndarray, limit: int | None =...) -> np.ndarray: ...
+    def get_pad_indexer(self, other: np.ndarray, limit: int | None =...) -> np.ndarray: ...
+
+    @property
+    def is_mapping_populated(self) -> bool: ...
+
+    def clear_mapping(self): ...
+    def get_indexer(self, values: np.ndarray) -> np.ndarray: ...  # np.ndarray[np.intp]
+    def get_indexer_non_unique(
+        self,
+        targets: np.ndarray,
+    ) -> tuple[
+        np.ndarray,  # np.ndarray[np.intp]
+        np.ndarray,  # np.ndarray[np.intp]
+    ]: ...
+
+
+class Float64Engine(IndexEngine): ...
+class Float32Engine(IndexEngine): ...
+
+class Int64Engine(IndexEngine): ...
+class Int32Engine(IndexEngine): ...
+class Int16Engine(IndexEngine): ...
+class Int8Engine(IndexEngine): ...
+
+class UInt64Engine(IndexEngine): ...
+class UInt32Engine(IndexEngine): ...
+class UInt16Engine(IndexEngine): ...
+class UInt8Engine(IndexEngine): ...
+
+class ObjectEngine(IndexEngine): ...
+
+class DatetimeEngine(Int64Engine): ...
+class TimedeltaEngine(DatetimeEngine): ...
+class PeriodEngine(Int64Engine): ...
+
+
+class BaseMultiIndexCodesEngine:
+    levels: list[np.ndarray]
+    offsets: np.ndarray  # ndarray[uint64_t, ndim=1]
+
+    def __init__(
+        self,
+        levels: list[np.ndarray],  # all entries hashable
+        labels: list[np.ndarray],  # all entries integer-dtyped
+        offsets: np.ndarray,  # np.ndarray[np.uint64, ndim=1]
+    ): ...
+
+    def get_indexer(
+        self,
+        target: np.ndarray,  # np.ndarray[object]
+    ) -> np.ndarray: ...    # np.ndarray[np.intp]
+
+    def _extract_level_codes(self, target: object): ...
+
+    def get_indexer_with_fill(
+        self,
+        target: np.ndarray,  # np.ndarray[object] of tuples
+        values: np.ndarray,  # np.ndarray[object] of tuples
+        method: str,
+        limit: int | None,
+    ) -> np.ndarray: ...  # np.ndarray[np.int64]
diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
index 9159fa03c12c0..2525c4d7e90bc 100644
--- a/pandas/_libs/index.pyx
+++ b/pandas/_libs/index.pyx
@@ -259,7 +259,7 @@ cdef class IndexEngine:
         self.monotonic_inc = 0
         self.monotonic_dec = 0
 
-    def get_indexer(self, ndarray values):
+    def get_indexer(self, ndarray values) -> np.ndarray:
         self._ensure_mapping_populated()
         return self.mapping.lookup(values)
 
@@ -269,6 +269,11 @@ cdef class IndexEngine:
         return the labels in the same order as the target
         and a missing indexer into the targets (which correspond
         to the -1 indices in the results
+
+        Returns
+        -------
+        indexer : np.ndarray[np.intp]
+        missing : np.ndarray[np.intp]
         """
         cdef:
             ndarray values, x
@@ -455,22 +460,22 @@ cdef class DatetimeEngine(Int64Engine):
         # we may get datetime64[ns] or timedelta64[ns], cast these to int64
         return super().get_indexer_non_unique(targets.view("i8"))
 
-    def get_indexer(self, ndarray values):
+    def get_indexer(self, ndarray values) -> np.ndarray:
         self._ensure_mapping_populated()
         if values.dtype != self._get_box_dtype():
-            return np.repeat(-1, len(values)).astype('i4')
+            return np.repeat(np.intp(-1), len(values))
         values = np.asarray(values).view('i8')
         return self.mapping.lookup(values)
 
     def get_pad_indexer(self, other: np.ndarray, limit=None) -> np.ndarray:
         if other.dtype != self._get_box_dtype():
-            return np.repeat(-1, len(other)).astype('i4')
+            return np.repeat(np.intp(-1), len(other))
         other = np.asarray(other).view('i8')
         return algos.pad(self._get_index_values(), other, limit=limit)
 
     def get_backfill_indexer(self, other: np.ndarray, limit=None) -> np.ndarray:
         if other.dtype != self._get_box_dtype():
-            return np.repeat(-1, len(other)).astype('i4')
+            return np.repeat(np.intp(-1), len(other))
         other = np.asarray(other).view('i8')
         return algos.backfill(self._get_index_values(), other, limit=limit)
 
@@ -572,17 +577,17 @@ cdef class BaseMultiIndexCodesEngine:
         # integers representing labels: we will use its get_loc and get_indexer
         self._base.__init__(self, lambda: lab_ints, len(lab_ints))
 
-    def _codes_to_ints(self, codes):
+    def _codes_to_ints(self, ndarray[uint64_t] codes) -> np.ndarray:
         raise NotImplementedError("Implemented by subclass")
 
-    def _extract_level_codes(self, object target):
+    def _extract_level_codes(self, ndarray[object] target) -> np.ndarray:
         """
         Map the requested list of (tuple) keys to their integer representations
         for searching in the underlying integer index.
 
         Parameters
         ----------
-        target : list-like of keys
+        target : ndarray[object]
             Each key is a tuple, with a label for each level of the index.
 
         Returns
@@ -607,7 +612,7 @@ cdef class BaseMultiIndexCodesEngine:
 
         Returns
         -------
-        np.ndarray[int64_t, ndim=1] of the indexer of `target` into
+        np.ndarray[intp_t, ndim=1] of the indexer of `target` into
         `self.values`
         """
         lab_ints = self._extract_level_codes(target)
@@ -635,7 +640,7 @@ cdef class BaseMultiIndexCodesEngine:
             the same as the length of all tuples in `values`
         values : ndarray[object] of tuples
             must be sorted and all have the same length.  Should be the set of
-            the MultiIndex's values.  Needed only if `method` is not None
+            the MultiIndex's values.
         method: string
             "backfill" or "pad"
         limit: int or None
@@ -694,9 +699,8 @@ cdef class BaseMultiIndexCodesEngine:
             next_code += 1
 
         # get the indexer, and undo the sorting of `target.values`
-        sorted_indexer = (
-            algos.backfill if method == "backfill" else algos.pad
-        )(new_codes, new_target_codes, limit=limit).astype('int64')
+        algo = algos.backfill if method == "backfill" else algos.pad
+        sorted_indexer = algo(new_codes, new_target_codes, limit=limit).astype("int64")
         return sorted_indexer[np.argsort(target_order)]
 
     def get_loc(self, object key):
@@ -715,9 +719,7 @@ cdef class BaseMultiIndexCodesEngine:
 
         return self._base.get_loc(self, lab_int)
 
-    def get_indexer_non_unique(self, ndarray target):
-        # This needs to be overridden just because the default one works on
-        # target._values, and target can be itself a MultiIndex.
+    def get_indexer_non_unique(self, ndarray[object] target):
 
         lab_ints = self._extract_level_codes(target)
         indexer = self._base.get_indexer_non_unique(self, lab_ints)
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index 67cd6c63c1faa..0a2893ac49a49 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -405,12 +405,7 @@ def _cmp_method(self, other, op):
     _str_na_value = StringDtype.na_value
 
     def _str_map(self, f, na_value=None, dtype: Optional[Dtype] = None):
-        from pandas.arrays import (
-            BooleanArray,
-            IntegerArray,
-            StringArray,
-        )
-        from pandas.core.arrays.string_ import StringDtype
+        from pandas.arrays import BooleanArray
 
         if dtype is None:
             dtype = StringDtype()
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 74e96015b4544..e00cf397e474b 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -786,14 +786,10 @@ def _aggregate_series_pure_python(self, obj: Series, func: F):
             counts[label] = group.shape[0]
             result[label] = res
 
-        result = lib.maybe_convert_objects(result, try_float=False)
-        # error: Incompatible types in assignment (expression has type
-        # "Union[ExtensionArray, ndarray]", variable has type "ndarray")
-        result = maybe_cast_result(  # type: ignore[assignment]
-            result, obj, numeric_only=True
-        )
+        converted = lib.maybe_convert_objects(result, try_float=False)
+        out = maybe_cast_result(converted, obj, numeric_only=True)
 
-        return result, counts
+        return out, counts
 
 
 class BinGrouper(BaseGrouper):
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 3a468758ab3fd..aa71a899984b9 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -319,7 +319,7 @@ def _outer_indexer(
     # would we like our indexing holder to defer to us
     _defer_to_indexing = False
 
-    _engine_type = libindex.ObjectEngine
+    _engine_type: Type[libindex.IndexEngine] = libindex.ObjectEngine
     # whether we support partial string indexing. Overridden
     # in DatetimeIndex and PeriodIndex
     _supports_partial_string_indexing = False
@@ -722,8 +722,8 @@ def _cleanup(self) -> None:
         self._engine.clear_mapping()
 
     @cache_readonly
-    def _engine(self) -> libindex.ObjectEngine:
-        # property, for now, slow to look up
+    def _engine(self) -> libindex.IndexEngine:
+        # For base class (object dtype) we get ObjectEngine
 
         # to avoid a reference cycle, bind `target_values` to a local variable, so
         # `self` is not passed into the lambda.
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
index 31ad8b7d8a295..e1f0d0fdfacda 100644
--- a/pandas/core/indexes/datetimelike.py
+++ b/pandas/core/indexes/datetimelike.py
@@ -7,6 +7,7 @@
     Any,
     List,
     Optional,
+    Sequence,
     Tuple,
     TypeVar,
     Union,
@@ -535,7 +536,7 @@ def shift(self: _T, periods: int = 1, freq=None) -> _T:
     # --------------------------------------------------------------------
     # List-like Methods
 
-    def _get_delete_freq(self, loc: int):
+    def _get_delete_freq(self, loc: Union[int, Sequence[int]]):
         """
         Find the `freq` for self.delete(loc).
         """
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 97492f35232e3..99f83a4f286a3 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -559,6 +559,7 @@ def from_tuples(
             if isinstance(tuples, Index):
                 tuples = tuples._values
 
+            tuples = cast(np.ndarray, tuples)
             arrays = list(lib.tuples_to_object_array(tuples).T)
         elif isinstance(tuples, list):
             arrays = list(lib.to_object_array_tuples(tuples).T)
@@ -1111,8 +1112,8 @@ def _engine(self):
         # Check the total number of bits needed for our representation:
         if lev_bits[0] > 64:
             # The levels would overflow a 64 bit uint - use Python integers:
-            return MultiIndexPyIntEngine(self.levels, self.codes, offsets)
-        return MultiIndexUIntEngine(self.levels, self.codes, offsets)
+            return MultiIndexPyIntEngine(list(self.levels), self.codes, offsets)
+        return MultiIndexUIntEngine(list(self.levels), self.codes, offsets)
 
     @property
     def _constructor(self) -> Callable[..., MultiIndex]:
@@ -2698,11 +2699,16 @@ def _get_indexer(
                         target, method=method, limit=limit, tolerance=tolerance
                     )
 
+                # TODO: explicitly raise here?  we only have one test that
+                #  gets here, and it is checking that we raise with method="nearest"
+
         if method == "pad" or method == "backfill":
             if tolerance is not None:
                 raise NotImplementedError(
                     "tolerance not implemented yet for MultiIndex"
                 )
+            # TODO: get_indexer_with_fill docstring says values must be _sorted_
+            #  but that doesn't appear to be enforced
             indexer = self._engine.get_indexer_with_fill(
                 target=target._values, values=self._values, method=method, limit=limit
             )
@@ -2714,6 +2720,8 @@ def _get_indexer(
         else:
             indexer = self._engine.get_indexer(target._values)
 
+        # Note: we only get here (in extant tests at least) with
+        #  target.nlevels == self.nlevels
         return ensure_platform_int(indexer)
 
     def get_slice_bound(
diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
index edf32bade0657..db924e0b00819 100644
--- a/pandas/core/strings/object_array.py
+++ b/pandas/core/strings/object_array.py
@@ -73,8 +73,9 @@ def _str_map(self, f, na_value=None, dtype: Optional[Dtype] = None):
             arr = np.asarray(arr, dtype=object)  # type: ignore[assignment]
         mask = isna(arr)
         convert = not np.all(mask)
+        ndarr = np.asarray(arr)
         try:
-            result = lib.map_infer_mask(arr, f, mask.view(np.uint8), convert)
+            result = lib.map_infer_mask(ndarr, f, mask.view(np.uint8), convert)
         except (TypeError, AttributeError) as e:
             # Reraise the exception if callable `f` got wrong number of args.
             # The user may want to be warned by this, instead of getting NaN

From 3d5cf0e1490e35086393f0cf4e3a11db65234cab Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 18 Mar 2021 07:51:36 -0700
Subject: [PATCH 2/3] revert unnecessary

---
 pandas/_libs/index.pyi              | 2 --
 pandas/core/indexes/multi.py        | 7 +++----
 pandas/core/strings/object_array.py | 3 +--
 3 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/pandas/_libs/index.pyi b/pandas/_libs/index.pyi
index 0ea635f1af049..979619c3d14c4 100644
--- a/pandas/_libs/index.pyi
+++ b/pandas/_libs/index.pyi
@@ -1,5 +1,3 @@
-from typing import Optional
-
 import numpy as np
 
 class IndexEngine:
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 99f83a4f286a3..5816c0231db18 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -557,9 +557,8 @@ def from_tuples(
             arrays = [[]] * len(names)
         elif isinstance(tuples, (np.ndarray, Index)):
             if isinstance(tuples, Index):
-                tuples = tuples._values
+                tuples = np.asarray(tuples._values)
 
-            tuples = cast(np.ndarray, tuples)
             arrays = list(lib.tuples_to_object_array(tuples).T)
         elif isinstance(tuples, list):
             arrays = list(lib.to_object_array_tuples(tuples).T)
@@ -1112,8 +1111,8 @@ def _engine(self):
         # Check the total number of bits needed for our representation:
         if lev_bits[0] > 64:
             # The levels would overflow a 64 bit uint - use Python integers:
-            return MultiIndexPyIntEngine(list(self.levels), self.codes, offsets)
-        return MultiIndexUIntEngine(list(self.levels), self.codes, offsets)
+            return MultiIndexPyIntEngine(self.levels, self.codes, offsets)
+        return MultiIndexUIntEngine(self.levels, self.codes, offsets)
 
     @property
     def _constructor(self) -> Callable[..., MultiIndex]:
diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
index db924e0b00819..edf32bade0657 100644
--- a/pandas/core/strings/object_array.py
+++ b/pandas/core/strings/object_array.py
@@ -73,9 +73,8 @@ def _str_map(self, f, na_value=None, dtype: Optional[Dtype] = None):
             arr = np.asarray(arr, dtype=object)  # type: ignore[assignment]
         mask = isna(arr)
         convert = not np.all(mask)
-        ndarr = np.asarray(arr)
         try:
-            result = lib.map_infer_mask(ndarr, f, mask.view(np.uint8), convert)
+            result = lib.map_infer_mask(arr, f, mask.view(np.uint8), convert)
         except (TypeError, AttributeError) as e:
             # Reraise the exception if callable `f` got wrong number of args.
             # The user may want to be warned by this, instead of getting NaN

From 70270f344a27f7fb087b8f58ef819b96fbbb5c9f Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Tue, 30 Mar 2021 07:07:07 -0700
Subject: [PATCH 3/3] add slice to _get_delete_freq

---
 pandas/core/indexes/datetimelike.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
index e4d379fc9aed0..bac00b2399121 100644
--- a/pandas/core/indexes/datetimelike.py
+++ b/pandas/core/indexes/datetimelike.py
@@ -537,7 +537,7 @@ def shift(self: _T, periods: int = 1, freq=None) -> _T:
     # --------------------------------------------------------------------
     # List-like Methods
 
-    def _get_delete_freq(self, loc: Union[int, Sequence[int]]):
+    def _get_delete_freq(self, loc: Union[int, slice, Sequence[int]]):
         """
         Find the `freq` for self.delete(loc).
         """