pandas-dev · simonjayhawkins · Jul 14, 2021 · Jul 12, 2021 · Jul 12, 2021
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -27,6 +27,7 @@
     ArrayLike,
     DtypeObj,
     Scalar,
+    npt,
 )
 from pandas.util._decorators import doc
 
@@ -528,7 +529,7 @@ def factorize_array(
     size_hint: int | None = None,
     na_value=None,
     mask: np.ndarray | None = None,
-) -> tuple[np.ndarray, np.ndarray]:
+) -> tuple[npt.NDArray[np.intp], np.ndarray]:
     """
     Factorize an array-like to codes and uniques.
 

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -37,6 +37,7 @@
     Ordered,
     Scalar,
     Shape,
+    npt,
     type_t,
 )
 from pandas.compat.numpy import function as nv
@@ -2048,7 +2049,7 @@ def _validate_setitem_value(self, value):
         codes = self.categories.get_indexer(rvalue)
         return codes.astype(self._ndarray.dtype, copy=False)
 
-    def _reverse_indexer(self) -> dict[Hashable, np.ndarray]:
+    def _reverse_indexer(self) -> dict[Hashable, npt.NDArray[np.intp]]:
         """
         Compute the inverse of a categorical, returning
         a dict of categories -> indexers.

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -32,6 +32,7 @@
     FrameOrSeries,
     Shape,
     final,
+    npt,
 )
 from pandas.errors import AbstractMethodError
 from pandas.util._decorators import cache_readonly
@@ -677,7 +678,7 @@ def __init__(
         sort: bool = True,
         group_keys: bool = True,
         mutated: bool = False,
-        indexer: np.ndarray | None = None,
+        indexer: npt.NDArray[np.intp] | None = None,
         dropna: bool = True,
     ):
         assert isinstance(axis, Index), axis
@@ -1268,7 +1269,13 @@ def _is_indexed_like(obj, axes, axis: int) -> bool:
 
 
 class DataSplitter(Generic[FrameOrSeries]):
-    def __init__(self, data: FrameOrSeries, labels, ngroups: int, axis: int = 0):
+    def __init__(
+        self,
+        data: FrameOrSeries,
+        labels: npt.NDArray[np.intp],
+        ngroups: int,
+        axis: int = 0,
+    ):
         self.data = data
         self.labels = ensure_platform_int(labels)  # _should_ already be np.intp
         self.ngroups = ngroups

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -45,6 +45,7 @@
     Shape,
     T,
     final,
+    npt,
 )
 from pandas.compat.numpy import function as nv
 from pandas.errors import (
@@ -306,8 +307,7 @@ class Index(IndexOpsMixin, PandasObject):
     #  given the dtypes of the passed arguments
 
     @final
-    def _left_indexer_unique(self: _IndexT, other: _IndexT) -> np.ndarray:
-        # -> np.ndarray[np.intp]
+    def _left_indexer_unique(self: _IndexT, other: _IndexT) -> npt.NDArray[np.intp]:
         # Caller is responsible for ensuring other.dtype == self.dtype
         sv = self._get_join_target()
         ov = other._get_join_target()
@@ -316,7 +316,7 @@ def _left_indexer_unique(self: _IndexT, other: _IndexT) -> np.ndarray:
     @final
     def _left_indexer(
         self: _IndexT, other: _IndexT
-    ) -> tuple[ArrayLike, np.ndarray, np.ndarray]:
+    ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]:
         # Caller is responsible for ensuring other.dtype == self.dtype
         sv = self._get_join_target()
         ov = other._get_join_target()
@@ -327,7 +327,7 @@ def _left_indexer(
     @final
     def _inner_indexer(
         self: _IndexT, other: _IndexT
-    ) -> tuple[ArrayLike, np.ndarray, np.ndarray]:
+    ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]:
         # Caller is responsible for ensuring other.dtype == self.dtype
         sv = self._get_join_target()
         ov = other._get_join_target()
@@ -338,7 +338,7 @@ def _inner_indexer(
     @final
     def _outer_indexer(
         self: _IndexT, other: _IndexT
-    ) -> tuple[ArrayLike, np.ndarray, np.ndarray]:
+    ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]:
         # Caller is responsible for ensuring other.dtype == self.dtype
         sv = self._get_join_target()
         ov = other._get_join_target()
@@ -3460,8 +3460,7 @@ def get_indexer(
         method: str_t | None = None,
         limit: int | None = None,
         tolerance=None,
-    ) -> np.ndarray:
-        # returned ndarray is np.intp
+    ) -> npt.NDArray[np.intp]:
         method = missing.clean_reindex_fill_method(method)
         target = self._maybe_cast_listlike_indexer(target)
 
@@ -3842,7 +3841,7 @@ def _validate_can_reindex(self, indexer: np.ndarray) -> None:
 
     def reindex(
         self, target, method=None, level=None, limit=None, tolerance=None
-    ) -> tuple[Index, np.ndarray | None]:
+    ) -> tuple[Index, npt.NDArray[np.intp] | None]:
         """
         Create index with target's values.
 
@@ -3918,7 +3917,7 @@ def _maybe_preserve_names(self, target: Index, preserve_names: bool):
     @final
     def _reindex_non_unique(
         self, target: Index
-    ) -> tuple[Index, np.ndarray, np.ndarray | None]:
+    ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp] | None]:
         """
         Create a new index with target's values (move/add/delete values as
         necessary) use with non-unique Index and a possibly non-unique target.
@@ -4206,8 +4205,7 @@ def _join_multi(self, other: Index, how: str_t):
     @final
     def _join_non_unique(
         self, other: Index, how: str_t = "left"
-    ) -> tuple[Index, np.ndarray, np.ndarray]:
-        # returned ndarrays are np.intp
+    ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp]]:
         from pandas.core.reshape.merge import get_join_indexers
 
         # We only get here if dtypes match
@@ -4235,8 +4233,7 @@ def _join_non_unique(
     @final
     def _join_level(
         self, other: Index, level, how: str_t = "left", keep_order: bool = True
-    ) -> tuple[MultiIndex, np.ndarray | None, np.ndarray | None]:
-        # Any returned ndarrays are np.intp
+    ) -> tuple[MultiIndex, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
         """
         The join method *only* affects the level of the resulting
         MultiIndex. Otherwise it just exactly aligns the Index data to the
@@ -4248,7 +4245,7 @@ def _join_level(
         """
         from pandas.core.indexes.multi import MultiIndex
 
-        def _get_leaf_sorter(labels: list[np.ndarray]) -> np.ndarray:
+        def _get_leaf_sorter(labels: list[np.ndarray]) -> npt.NDArray[np.intp]:
             """
             Returns sorter for the inner most level while preserving the
             order of higher levels.
@@ -5000,7 +4997,7 @@ def asof(self, label):
 
         return self[loc]
 
-    def asof_locs(self, where: Index, mask: np.ndarray) -> np.ndarray:
+    def asof_locs(self, where: Index, mask: np.ndarray) -> npt.NDArray[np.intp]:
         """
         Return the locations (indices) of labels in the index.
 
@@ -5191,7 +5188,7 @@ def shift(self, periods=1, freq=None):
             f"TimedeltaIndex; Got type {type(self).__name__}"
         )
 
-    def argsort(self, *args, **kwargs) -> np.ndarray:
+    def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]:
         """
         Return the integer indices that would sort the index.
 
@@ -5342,8 +5339,9 @@ def set_value(self, arr, key, value):
         """
 
     @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)
-    def get_indexer_non_unique(self, target) -> tuple[np.ndarray, np.ndarray]:
-        # both returned ndarrays are np.intp
+    def get_indexer_non_unique(
+        self, target
+    ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
         target = ensure_index(target)
         target = self._maybe_cast_listlike_indexer(target)
 
@@ -5373,7 +5371,7 @@ def get_indexer_non_unique(self, target) -> tuple[np.ndarray, np.ndarray]:
         return ensure_platform_int(indexer), ensure_platform_int(missing)
 
     @final
-    def get_indexer_for(self, target) -> np.ndarray:
+    def get_indexer_for(self, target) -> npt.NDArray[np.intp]:
         """
         Guaranteed return of an indexer even when non-unique.
 
@@ -5393,28 +5391,25 @@ def get_indexer_for(self, target) -> np.ndarray:
     @overload
     def _get_indexer_non_comparable(
         self, target: Index, method, unique: Literal[True] = ...
-    ) -> np.ndarray:
-        # returned ndarray is np.intp
+    ) -> npt.NDArray[np.intp]:
         ...
 
     @overload
     def _get_indexer_non_comparable(
         self, target: Index, method, unique: Literal[False]
-    ) -> tuple[np.ndarray, np.ndarray]:
-        # both returned ndarrays are np.intp
+    ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
         ...
 
     @overload
     def _get_indexer_non_comparable(
         self, target: Index, method, unique: bool = True
-    ) -> np.ndarray | tuple[np.ndarray, np.ndarray]:
-        # any returned ndarrays are np.intp
+    ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
         ...
 
     @final
     def _get_indexer_non_comparable(
         self, target: Index, method, unique: bool = True
-    ) -> np.ndarray | tuple[np.ndarray, np.ndarray]:
+    ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
         """
         Called from get_indexer or get_indexer_non_unique when the target
         is of a non-comparable dtype.

diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
@@ -14,6 +14,7 @@
 from pandas._typing import (
     Dtype,
     DtypeObj,
+    npt,
 )
 from pandas.util._decorators import doc
 
@@ -368,7 +369,7 @@ def fillna(self, value, downcast=None):
 
     def reindex(
         self, target, method=None, level=None, limit=None, tolerance=None
-    ) -> tuple[Index, np.ndarray | None]:
+    ) -> tuple[Index, npt.NDArray[np.intp] | None]:
         """
         Create index with target's values (move/add/delete values as necessary)
 

diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
@@ -33,6 +33,7 @@
 from pandas._typing import (
     Dtype,
     DtypeObj,
+    npt,
 )
 from pandas.util._decorators import (
     cache_readonly,
@@ -807,7 +808,7 @@ def inferred_type(self) -> str:
         # sure we can't have ambiguous indexing
         return "datetime64"
 
-    def indexer_at_time(self, time, asof: bool = False) -> np.ndarray:
+    def indexer_at_time(self, time, asof: bool = False) -> npt.NDArray[np.intp]:
         """
         Return index locations of values at particular time of day
         (e.g. 9:30AM).
@@ -848,7 +849,7 @@ def indexer_at_time(self, time, asof: bool = False) -> np.ndarray:
 
     def indexer_between_time(
         self, start_time, end_time, include_start: bool = True, include_end: bool = True
-    ) -> np.ndarray:
+    ) -> npt.NDArray[np.intp]:
         """
         Return index locations of values between particular times of day
         (e.g., 9:00-9:30AM).

diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
@@ -28,6 +28,7 @@
 from pandas._typing import (
     Dtype,
     DtypeObj,
+    npt,
 )
 from pandas.errors import InvalidIndexError
 from pandas.util._decorators import (
@@ -644,8 +645,7 @@ def _get_indexer(
         method: str | None = None,
         limit: int | None = None,
         tolerance: Any | None = None,
-    ) -> np.ndarray:
-        # returned ndarray is np.intp
+    ) -> npt.NDArray[np.intp]:
 
         if isinstance(target, IntervalIndex):
             # non-overlapping -> at most one match per interval in target
@@ -668,8 +668,9 @@ def _get_indexer(
         return ensure_platform_int(indexer)
 
     @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)
-    def get_indexer_non_unique(self, target: Index) -> tuple[np.ndarray, np.ndarray]:
-        # both returned ndarrays are np.intp
+    def get_indexer_non_unique(
+        self, target: Index
+    ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
         target = ensure_index(target)
 
         if not self._should_compare(target) and not self._should_partial_index(target):
@@ -689,8 +690,9 @@ def get_indexer_non_unique(self, target: Index) -> tuple[np.ndarray, np.ndarray]
 
         return ensure_platform_int(indexer), ensure_platform_int(missing)
 
-    def _get_indexer_pointwise(self, target: Index) -> tuple[np.ndarray, np.ndarray]:
-        # both returned ndarrays are np.intp
+    def _get_indexer_pointwise(
+        self, target: Index
+    ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
         """
         pointwise implementation for get_indexer and get_indexer_non_unique.
         """

diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
@@ -17,7 +17,10 @@
 
 from pandas._libs import index as libindex
 from pandas._libs.lib import no_default
-from pandas._typing import Dtype
+from pandas._typing import (
+    Dtype,
+    npt,
+)
 from pandas.compat.numpy import function as nv
 from pandas.util._decorators import (
     cache_readonly,
@@ -395,8 +398,7 @@ def _get_indexer(
         method: str | None = None,
         limit: int | None = None,
         tolerance=None,
-    ) -> np.ndarray:
-        # -> np.ndarray[np.intp]
+    ) -> npt.NDArray[np.intp]:
         if com.any_not_none(method, tolerance, limit):
             return super()._get_indexer(
                 target, method=method, tolerance=tolerance, limit=limit
@@ -502,7 +504,7 @@ def max(self, axis=None, skipna: bool = True, *args, **kwargs) -> int:
         nv.validate_max(args, kwargs)
         return self._minmax("max")
 
-    def argsort(self, *args, **kwargs) -> np.ndarray:
+    def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]:
         """
         Returns the indices that would sort the index and its
         underlying data.
@@ -529,7 +531,7 @@ def argsort(self, *args, **kwargs) -> np.ndarray:
 
     def factorize(
         self, sort: bool = False, na_sentinel: int | None = -1
-    ) -> tuple[np.ndarray, RangeIndex]:
+    ) -> tuple[npt.NDArray[np.intp], RangeIndex]:
         codes = np.arange(len(self), dtype=np.intp)
         uniques = self
         if sort and self.step < 0:

diff --git a/pandas/core/resample.py b/pandas/core/resample.py
@@ -28,6 +28,7 @@
     TimedeltaConvertibleTypes,
     TimestampConvertibleTypes,
     final,
+    npt,
 )
 from pandas.compat.numpy import function as nv
 from pandas.errors import AbstractMethodError
@@ -1768,9 +1769,8 @@ def _get_period_bins(self, ax: PeriodIndex):
 
 
 def _take_new_index(
-    obj: FrameOrSeries, indexer: np.ndarray, new_index: Index, axis: int = 0
+    obj: FrameOrSeries, indexer: npt.NDArray[np.intp], new_index: Index, axis: int = 0
 ) -> FrameOrSeries:
-    # indexer: np.ndarray[np.intp]
 
     if isinstance(obj, ABCSeries):
         new_values = algos.take_nd(obj._values, indexer)