TYP: index.pyi (#40486)

jbrockmendel · web-flow · commit 872fc29b56dc · 2021-03-30T17:21:28.000-04:00
diff --git a/pandas/_libs/index.pyi b/pandas/_libs/index.pyi
@@ -0,0 +1,86 @@
+import numpy as np
+
+class IndexEngine:
+    over_size_threshold: bool
+
+    def __init__(self, vgetter, n: int): ...
+
+    def __contains__(self, val: object) -> bool: ...
+
+    # -> int | slice | np.ndarray[bool]
+    def get_loc(self, val: object) -> int | slice | np.ndarray: ...
+
+    def sizeof(self, deep: bool = False) -> int: ...
+    def __sizeof__(self) -> int: ...
+
+    @property
+    def is_unique(self) -> bool: ...
+
+    @property
+    def is_monotonic_increasing(self) -> bool: ...
+
+    @property
+    def is_monotonic_decreasing(self) -> bool: ...
+
+    def get_backfill_indexer(self, other: np.ndarray, limit: int | None =...) -> np.ndarray: ...
+    def get_pad_indexer(self, other: np.ndarray, limit: int | None =...) -> np.ndarray: ...
+
+    @property
+    def is_mapping_populated(self) -> bool: ...
+
+    def clear_mapping(self): ...
+    def get_indexer(self, values: np.ndarray) -> np.ndarray: ...  # np.ndarray[np.intp]
+    def get_indexer_non_unique(
+        self,
+        targets: np.ndarray,
+    ) -> tuple[
+        np.ndarray,  # np.ndarray[np.intp]
+        np.ndarray,  # np.ndarray[np.intp]
+    ]: ...
+
+
+class Float64Engine(IndexEngine): ...
+class Float32Engine(IndexEngine): ...
+
+class Int64Engine(IndexEngine): ...
+class Int32Engine(IndexEngine): ...
+class Int16Engine(IndexEngine): ...
+class Int8Engine(IndexEngine): ...
+
+class UInt64Engine(IndexEngine): ...
+class UInt32Engine(IndexEngine): ...
+class UInt16Engine(IndexEngine): ...
+class UInt8Engine(IndexEngine): ...
+
+class ObjectEngine(IndexEngine): ...
+
+class DatetimeEngine(Int64Engine): ...
+class TimedeltaEngine(DatetimeEngine): ...
+class PeriodEngine(Int64Engine): ...
+
+
+class BaseMultiIndexCodesEngine:
+    levels: list[np.ndarray]
+    offsets: np.ndarray  # ndarray[uint64_t, ndim=1]
+
+    def __init__(
+        self,
+        levels: list[np.ndarray],  # all entries hashable
+        labels: list[np.ndarray],  # all entries integer-dtyped
+        offsets: np.ndarray,  # np.ndarray[np.uint64, ndim=1]
+    ): ...
+
+    def get_indexer(
+        self,
+        target: np.ndarray,  # np.ndarray[object]
+    ) -> np.ndarray: ...    # np.ndarray[np.intp]
+
+    def _extract_level_codes(self, target: object): ...
+
+    def get_indexer_with_fill(
+        self,
+        target: np.ndarray,  # np.ndarray[object] of tuples
+        values: np.ndarray,  # np.ndarray[object] of tuples
+        method: str,
+        limit: int | None,
+    ) -> np.ndarray: ...  # np.ndarray[np.int64]
diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
@@ -259,7 +259,7 @@ cdef class IndexEngine:
         self.monotonic_inc = 0
         self.monotonic_dec = 0
 
-    def get_indexer(self, ndarray values):
+    def get_indexer(self, ndarray values) -> np.ndarray:
         self._ensure_mapping_populated()
         return self.mapping.lookup(values)
 
@@ -269,6 +269,11 @@ cdef class IndexEngine:
         return the labels in the same order as the target
         and a missing indexer into the targets (which correspond
         to the -1 indices in the results
+
+        Returns
+        -------
+        indexer : np.ndarray[np.intp]
+        missing : np.ndarray[np.intp]
         """
         cdef:
             ndarray values, x
@@ -455,7 +460,7 @@ cdef class DatetimeEngine(Int64Engine):
         # we may get datetime64[ns] or timedelta64[ns], cast these to int64
         return super().get_indexer_non_unique(targets.view("i8"))
 
-    def get_indexer(self, ndarray values):
+    def get_indexer(self, ndarray values) -> np.ndarray:
         self._ensure_mapping_populated()
         if values.dtype != self._get_box_dtype():
             return np.repeat(-1, len(values)).astype(np.intp)
@@ -572,17 +577,17 @@ cdef class BaseMultiIndexCodesEngine:
         # integers representing labels: we will use its get_loc and get_indexer
         self._base.__init__(self, lambda: lab_ints, len(lab_ints))
 
-    def _codes_to_ints(self, codes):
+    def _codes_to_ints(self, ndarray[uint64_t] codes) -> np.ndarray:
         raise NotImplementedError("Implemented by subclass")
 
-    def _extract_level_codes(self, object target):
+    def _extract_level_codes(self, ndarray[object] target) -> np.ndarray:
         """
         Map the requested list of (tuple) keys to their integer representations
         for searching in the underlying integer index.
 
         Parameters
         ----------
-        target : list-like of keys
+        target : ndarray[object]
             Each key is a tuple, with a label for each level of the index.
 
         Returns
@@ -607,7 +612,7 @@ cdef class BaseMultiIndexCodesEngine:
 
         Returns
         -------
-        np.ndarray[int64_t, ndim=1] of the indexer of `target` into
+        np.ndarray[intp_t, ndim=1] of the indexer of `target` into
         `self.values`
         """
         lab_ints = self._extract_level_codes(target)
@@ -635,15 +640,15 @@ cdef class BaseMultiIndexCodesEngine:
             the same as the length of all tuples in `values`
         values : ndarray[object] of tuples
             must be sorted and all have the same length.  Should be the set of
-            the MultiIndex's values.  Needed only if `method` is not None
+            the MultiIndex's values.
         method: string
             "backfill" or "pad"
         limit: int or None
             if provided, limit the number of fills to this value
 
         Returns
         -------
-        np.ndarray[int64_t, ndim=1] of the indexer of `target` into `values`,
+        np.ndarray[intp_t, ndim=1] of the indexer of `target` into `values`,
         filled with the `method` (and optionally `limit`) specified
         """
         assert method in ("backfill", "pad")
@@ -714,9 +719,7 @@ cdef class BaseMultiIndexCodesEngine:
 
         return self._base.get_loc(self, lab_int)
 
-    def get_indexer_non_unique(self, ndarray target):
-        # This needs to be overridden just because the default one works on
-        # target._values, and target can be itself a MultiIndex.
+    def get_indexer_non_unique(self, ndarray[object] target):
 
         lab_ints = self._extract_level_codes(target)
         indexer = self._base.get_indexer_non_unique(self, lab_ints)
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
@@ -405,12 +405,7 @@ def _cmp_method(self, other, op):
     _str_na_value = StringDtype.na_value
 
     def _str_map(self, f, na_value=None, dtype: Optional[Dtype] = None):
-        from pandas.arrays import (
-            BooleanArray,
-            IntegerArray,
-            StringArray,
-        )
-        from pandas.core.arrays.string_ import StringDtype
+        from pandas.arrays import BooleanArray
 
         if dtype is None:
             dtype = StringDtype()
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -320,7 +320,7 @@ def _outer_indexer(
     # would we like our indexing holder to defer to us
     _defer_to_indexing = False
 
-    _engine_type = libindex.ObjectEngine
+    _engine_type: Type[libindex.IndexEngine] = libindex.ObjectEngine
     # whether we support partial string indexing. Overridden
     # in DatetimeIndex and PeriodIndex
     _supports_partial_string_indexing = False
@@ -723,8 +723,8 @@ def _cleanup(self) -> None:
         self._engine.clear_mapping()
 
     @cache_readonly
-    def _engine(self) -> libindex.ObjectEngine:
-        # property, for now, slow to look up
+    def _engine(self) -> libindex.IndexEngine:
+        # For base class (object dtype) we get ObjectEngine
 
         # to avoid a reference cycle, bind `target_values` to a local variable, so
         # `self` is not passed into the lambda.
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
@@ -7,6 +7,7 @@
     Any,
     List,
     Optional,
+    Sequence,
     Tuple,
     TypeVar,
     Union,
@@ -536,7 +537,7 @@ def shift(self: _T, periods: int = 1, freq=None) -> _T:
     # --------------------------------------------------------------------
     # List-like Methods
 
-    def _get_delete_freq(self, loc: int):
+    def _get_delete_freq(self, loc: Union[int, slice, Sequence[int]]):
         """
         Find the `freq` for self.delete(loc).
         """
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -557,7 +557,7 @@ def from_tuples(
             arrays = [[]] * len(names)
         elif isinstance(tuples, (np.ndarray, Index)):
             if isinstance(tuples, Index):
-                tuples = tuples._values
+                tuples = np.asarray(tuples._values)
 
             arrays = list(lib.tuples_to_object_array(tuples).T)
         elif isinstance(tuples, list):
@@ -2689,11 +2689,16 @@ def _get_indexer(
                         target, method=method, limit=limit, tolerance=tolerance
                     )
 
+                # TODO: explicitly raise here?  we only have one test that
+                #  gets here, and it is checking that we raise with method="nearest"
+
         if method == "pad" or method == "backfill":
             if tolerance is not None:
                 raise NotImplementedError(
                     "tolerance not implemented yet for MultiIndex"
                 )
+            # TODO: get_indexer_with_fill docstring says values must be _sorted_
+            #  but that doesn't appear to be enforced
             indexer = self._engine.get_indexer_with_fill(
                 target=target._values, values=self._values, method=method, limit=limit
             )
@@ -2705,6 +2710,8 @@ def _get_indexer(
         else:
             indexer = self._engine.get_indexer(target._values)
 
+        # Note: we only get here (in extant tests at least) with
+        #  target.nlevels == self.nlevels
         return ensure_platform_int(indexer)
 
     def get_slice_bound(