diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index a467798adf2e4..bf6887ed27005 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -27,6 +27,7 @@ ArrayLike, DtypeObj, Scalar, + npt, ) from pandas.util._decorators import doc @@ -528,7 +529,7 @@ def factorize_array( size_hint: int | None = None, na_value=None, mask: np.ndarray | None = None, -) -> tuple[np.ndarray, np.ndarray]: +) -> tuple[npt.NDArray[np.intp], np.ndarray]: """ Factorize an array-like to codes and uniques. diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 3fdb52a73dc3e..4a3ccc34f9723 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -37,6 +37,7 @@ Ordered, Scalar, Shape, + npt, type_t, ) from pandas.compat.numpy import function as nv @@ -2048,7 +2049,7 @@ def _validate_setitem_value(self, value): codes = self.categories.get_indexer(rvalue) return codes.astype(self._ndarray.dtype, copy=False) - def _reverse_indexer(self) -> dict[Hashable, np.ndarray]: + def _reverse_indexer(self) -> dict[Hashable, npt.NDArray[np.intp]]: """ Compute the inverse of a categorical, returning a dict of categories -> indexers. diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 874d7395b1950..51d2a7700be56 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -32,6 +32,7 @@ FrameOrSeries, Shape, final, + npt, ) from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly @@ -677,7 +678,7 @@ def __init__( sort: bool = True, group_keys: bool = True, mutated: bool = False, - indexer: np.ndarray | None = None, + indexer: npt.NDArray[np.intp] | None = None, dropna: bool = True, ): assert isinstance(axis, Index), axis @@ -1268,7 +1269,13 @@ def _is_indexed_like(obj, axes, axis: int) -> bool: class DataSplitter(Generic[FrameOrSeries]): - def __init__(self, data: FrameOrSeries, labels, ngroups: int, axis: int = 0): + def __init__( + self, + data: FrameOrSeries, + labels: npt.NDArray[np.intp], + ngroups: int, + axis: int = 0, + ): self.data = data self.labels = ensure_platform_int(labels) # _should_ already be np.intp self.ngroups = ngroups diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 2f1d85f1340a4..d369624d30cdf 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -45,6 +45,7 @@ Shape, T, final, + npt, ) from pandas.compat.numpy import function as nv from pandas.errors import ( @@ -306,8 +307,7 @@ class Index(IndexOpsMixin, PandasObject): # given the dtypes of the passed arguments @final - def _left_indexer_unique(self: _IndexT, other: _IndexT) -> np.ndarray: - # -> np.ndarray[np.intp] + def _left_indexer_unique(self: _IndexT, other: _IndexT) -> npt.NDArray[np.intp]: # Caller is responsible for ensuring other.dtype == self.dtype sv = self._get_join_target() ov = other._get_join_target() @@ -316,7 +316,7 @@ def _left_indexer_unique(self: _IndexT, other: _IndexT) -> np.ndarray: @final def _left_indexer( self: _IndexT, other: _IndexT - ) -> tuple[ArrayLike, np.ndarray, np.ndarray]: + ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]: # Caller is responsible for ensuring other.dtype == self.dtype sv = self._get_join_target() ov = other._get_join_target() @@ -327,7 +327,7 @@ def _left_indexer( @final def _inner_indexer( self: _IndexT, other: _IndexT - ) -> tuple[ArrayLike, np.ndarray, np.ndarray]: + ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]: # Caller is responsible for ensuring other.dtype == self.dtype sv = self._get_join_target() ov = other._get_join_target() @@ -338,7 +338,7 @@ def _inner_indexer( @final def _outer_indexer( self: _IndexT, other: _IndexT - ) -> tuple[ArrayLike, np.ndarray, np.ndarray]: + ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]: # Caller is responsible for ensuring other.dtype == self.dtype sv = self._get_join_target() ov = other._get_join_target() @@ -3460,8 +3460,7 @@ def get_indexer( method: str_t | None = None, limit: int | None = None, tolerance=None, - ) -> np.ndarray: - # returned ndarray is np.intp + ) -> npt.NDArray[np.intp]: method = missing.clean_reindex_fill_method(method) target = self._maybe_cast_listlike_indexer(target) @@ -3842,7 +3841,7 @@ def _validate_can_reindex(self, indexer: np.ndarray) -> None: def reindex( self, target, method=None, level=None, limit=None, tolerance=None - ) -> tuple[Index, np.ndarray | None]: + ) -> tuple[Index, npt.NDArray[np.intp] | None]: """ Create index with target's values. @@ -3918,7 +3917,7 @@ def _maybe_preserve_names(self, target: Index, preserve_names: bool): @final def _reindex_non_unique( self, target: Index - ) -> tuple[Index, np.ndarray, np.ndarray | None]: + ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp] | None]: """ Create a new index with target's values (move/add/delete values as necessary) use with non-unique Index and a possibly non-unique target. @@ -4206,8 +4205,7 @@ def _join_multi(self, other: Index, how: str_t): @final def _join_non_unique( self, other: Index, how: str_t = "left" - ) -> tuple[Index, np.ndarray, np.ndarray]: - # returned ndarrays are np.intp + ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp]]: from pandas.core.reshape.merge import get_join_indexers # We only get here if dtypes match @@ -4235,8 +4233,7 @@ def _join_non_unique( @final def _join_level( self, other: Index, level, how: str_t = "left", keep_order: bool = True - ) -> tuple[MultiIndex, np.ndarray | None, np.ndarray | None]: - # Any returned ndarrays are np.intp + ) -> tuple[MultiIndex, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: """ The join method *only* affects the level of the resulting MultiIndex. Otherwise it just exactly aligns the Index data to the @@ -4248,7 +4245,7 @@ def _join_level( """ from pandas.core.indexes.multi import MultiIndex - def _get_leaf_sorter(labels: list[np.ndarray]) -> np.ndarray: + def _get_leaf_sorter(labels: list[np.ndarray]) -> npt.NDArray[np.intp]: """ Returns sorter for the inner most level while preserving the order of higher levels. @@ -5000,7 +4997,7 @@ def asof(self, label): return self[loc] - def asof_locs(self, where: Index, mask: np.ndarray) -> np.ndarray: + def asof_locs(self, where: Index, mask: np.ndarray) -> npt.NDArray[np.intp]: """ Return the locations (indices) of labels in the index. @@ -5191,7 +5188,7 @@ def shift(self, periods=1, freq=None): f"TimedeltaIndex; Got type {type(self).__name__}" ) - def argsort(self, *args, **kwargs) -> np.ndarray: + def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]: """ Return the integer indices that would sort the index. @@ -5342,8 +5339,9 @@ def set_value(self, arr, key, value): """ @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) - def get_indexer_non_unique(self, target) -> tuple[np.ndarray, np.ndarray]: - # both returned ndarrays are np.intp + def get_indexer_non_unique( + self, target + ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: target = ensure_index(target) target = self._maybe_cast_listlike_indexer(target) @@ -5373,7 +5371,7 @@ def get_indexer_non_unique(self, target) -> tuple[np.ndarray, np.ndarray]: return ensure_platform_int(indexer), ensure_platform_int(missing) @final - def get_indexer_for(self, target) -> np.ndarray: + def get_indexer_for(self, target) -> npt.NDArray[np.intp]: """ Guaranteed return of an indexer even when non-unique. @@ -5393,28 +5391,25 @@ def get_indexer_for(self, target) -> np.ndarray: @overload def _get_indexer_non_comparable( self, target: Index, method, unique: Literal[True] = ... - ) -> np.ndarray: - # returned ndarray is np.intp + ) -> npt.NDArray[np.intp]: ... @overload def _get_indexer_non_comparable( self, target: Index, method, unique: Literal[False] - ) -> tuple[np.ndarray, np.ndarray]: - # both returned ndarrays are np.intp + ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... @overload def _get_indexer_non_comparable( self, target: Index, method, unique: bool = True - ) -> np.ndarray | tuple[np.ndarray, np.ndarray]: - # any returned ndarrays are np.intp + ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... @final def _get_indexer_non_comparable( self, target: Index, method, unique: bool = True - ) -> np.ndarray | tuple[np.ndarray, np.ndarray]: + ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: """ Called from get_indexer or get_indexer_non_unique when the target is of a non-comparable dtype. diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 6500a9c8eef61..5e4aecfe83a0a 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -14,6 +14,7 @@ from pandas._typing import ( Dtype, DtypeObj, + npt, ) from pandas.util._decorators import doc @@ -368,7 +369,7 @@ def fillna(self, value, downcast=None): def reindex( self, target, method=None, level=None, limit=None, tolerance=None - ) -> tuple[Index, np.ndarray | None]: + ) -> tuple[Index, npt.NDArray[np.intp] | None]: """ Create index with target's values (move/add/delete values as necessary) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 90ed6a5e0ccfe..9712a5d95a234 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -33,6 +33,7 @@ from pandas._typing import ( Dtype, DtypeObj, + npt, ) from pandas.util._decorators import ( cache_readonly, @@ -807,7 +808,7 @@ def inferred_type(self) -> str: # sure we can't have ambiguous indexing return "datetime64" - def indexer_at_time(self, time, asof: bool = False) -> np.ndarray: + def indexer_at_time(self, time, asof: bool = False) -> npt.NDArray[np.intp]: """ Return index locations of values at particular time of day (e.g. 9:30AM). @@ -848,7 +849,7 @@ def indexer_at_time(self, time, asof: bool = False) -> np.ndarray: def indexer_between_time( self, start_time, end_time, include_start: bool = True, include_end: bool = True - ) -> np.ndarray: + ) -> npt.NDArray[np.intp]: """ Return index locations of values between particular times of day (e.g., 9:00-9:30AM). diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 3d83063d94bac..c401ad0c1e0d5 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -28,6 +28,7 @@ from pandas._typing import ( Dtype, DtypeObj, + npt, ) from pandas.errors import InvalidIndexError from pandas.util._decorators import ( @@ -644,8 +645,7 @@ def _get_indexer( method: str | None = None, limit: int | None = None, tolerance: Any | None = None, - ) -> np.ndarray: - # returned ndarray is np.intp + ) -> npt.NDArray[np.intp]: if isinstance(target, IntervalIndex): # non-overlapping -> at most one match per interval in target @@ -668,8 +668,9 @@ def _get_indexer( return ensure_platform_int(indexer) @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) - def get_indexer_non_unique(self, target: Index) -> tuple[np.ndarray, np.ndarray]: - # both returned ndarrays are np.intp + def get_indexer_non_unique( + self, target: Index + ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: target = ensure_index(target) if not self._should_compare(target) and not self._should_partial_index(target): @@ -689,8 +690,9 @@ def get_indexer_non_unique(self, target: Index) -> tuple[np.ndarray, np.ndarray] return ensure_platform_int(indexer), ensure_platform_int(missing) - def _get_indexer_pointwise(self, target: Index) -> tuple[np.ndarray, np.ndarray]: - # both returned ndarrays are np.intp + def _get_indexer_pointwise( + self, target: Index + ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: """ pointwise implementation for get_indexer and get_indexer_non_unique. """ diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 30275ac60ee76..0ce99df44a5f9 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -17,7 +17,10 @@ from pandas._libs import index as libindex from pandas._libs.lib import no_default -from pandas._typing import Dtype +from pandas._typing import ( + Dtype, + npt, +) from pandas.compat.numpy import function as nv from pandas.util._decorators import ( cache_readonly, @@ -395,8 +398,7 @@ def _get_indexer( method: str | None = None, limit: int | None = None, tolerance=None, - ) -> np.ndarray: - # -> np.ndarray[np.intp] + ) -> npt.NDArray[np.intp]: if com.any_not_none(method, tolerance, limit): return super()._get_indexer( target, method=method, tolerance=tolerance, limit=limit @@ -502,7 +504,7 @@ def max(self, axis=None, skipna: bool = True, *args, **kwargs) -> int: nv.validate_max(args, kwargs) return self._minmax("max") - def argsort(self, *args, **kwargs) -> np.ndarray: + def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]: """ Returns the indices that would sort the index and its underlying data. @@ -529,7 +531,7 @@ def argsort(self, *args, **kwargs) -> np.ndarray: def factorize( self, sort: bool = False, na_sentinel: int | None = -1 - ) -> tuple[np.ndarray, RangeIndex]: + ) -> tuple[npt.NDArray[np.intp], RangeIndex]: codes = np.arange(len(self), dtype=np.intp) uniques = self if sort and self.step < 0: diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 40a208a24699a..cf71bcfffc264 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -28,6 +28,7 @@ TimedeltaConvertibleTypes, TimestampConvertibleTypes, final, + npt, ) from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError @@ -1768,9 +1769,8 @@ def _get_period_bins(self, ax: PeriodIndex): def _take_new_index( - obj: FrameOrSeries, indexer: np.ndarray, new_index: Index, axis: int = 0 + obj: FrameOrSeries, indexer: npt.NDArray[np.intp], new_index: Index, axis: int = 0 ) -> FrameOrSeries: - # indexer: np.ndarray[np.intp] if isinstance(obj, ABCSeries): new_values = algos.take_nd(obj._values, indexer) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 143999a4677b3..f82fcfcf172a9 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -29,6 +29,7 @@ FrameOrSeries, IndexLabel, Suffixes, + npt, ) from pandas.errors import MergeError from pandas.util._decorators import ( @@ -1003,7 +1004,7 @@ def _create_join_index( self, index: Index, other_index: Index, - indexer: np.ndarray, + indexer: npt.NDArray[np.intp], how: str = "left", ) -> Index: """ @@ -1448,7 +1449,7 @@ def _validate(self, validate: str) -> None: def get_join_indexers( left_keys, right_keys, sort: bool = False, how: str = "inner", **kwargs -) -> tuple[np.ndarray, np.ndarray]: +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: """ Parameters @@ -1507,9 +1508,9 @@ def restore_dropped_levels_multijoin( right: MultiIndex, dropped_level_names, join_index: Index, - lindexer: np.ndarray, - rindexer: np.ndarray, -) -> tuple[list[Index], np.ndarray, list[Hashable]]: + lindexer: npt.NDArray[np.intp], + rindexer: npt.NDArray[np.intp], +) -> tuple[list[Index], npt.NDArray[np.intp], list[Hashable]]: """ *this is an internal non-public method* @@ -1539,7 +1540,7 @@ def restore_dropped_levels_multijoin( ------- levels : list of Index levels of combined multiindexes - labels : intp array + labels : np.ndarray[np.intp] labels of combined multiindexes names : List[Hashable] names of combined multiindex levels @@ -2055,7 +2056,7 @@ def _left_join_on_index( def _factorize_keys( lk: ArrayLike, rk: ArrayLike, sort: bool = True, how: str = "inner" -) -> tuple[np.ndarray, np.ndarray, int]: +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp], int]: """ Encode left and right keys as enumerated types. diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 0edb150bdc273..12ab08c4e30a1 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -10,7 +10,10 @@ import pandas._libs.reshape as libreshape from pandas._libs.sparse import IntIndex -from pandas._typing import Dtype +from pandas._typing import ( + Dtype, + npt, +) from pandas.util._decorators import cache_readonly from pandas.core.dtypes.cast import maybe_promote @@ -136,7 +139,7 @@ def __init__(self, index: MultiIndex, level=-1, constructor=None): def _indexer_and_to_sort( self, ) -> tuple[ - np.ndarray, # np.ndarray[np.intp] + npt.NDArray[np.intp], list[np.ndarray], # each has _some_ signed integer dtype ]: v = self.level diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 712e9785f47f7..befa67350e182 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -21,6 +21,7 @@ from pandas._typing import ( IndexKeyFunc, Shape, + npt, ) from pandas.core.dtypes.common import ( @@ -186,7 +187,9 @@ def maybe_lift(lab, size) -> tuple[np.ndarray, int]: return out -def get_compressed_ids(labels, sizes: Shape) -> tuple[np.ndarray, np.ndarray]: +def get_compressed_ids( + labels, sizes: Shape +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.int64]]: """ Group_index is offsets into cartesian product of all possible labels. This space can be huge, so this function compresses it, by computing offsets @@ -236,7 +239,9 @@ def decons_group_index(comp_labels, shape): return label_list[::-1] -def decons_obs_group_ids(comp_ids: np.ndarray, obs_ids, shape, labels, xnull: bool): +def decons_obs_group_ids( + comp_ids: npt.NDArray[np.intp], obs_ids, shape, labels, xnull: bool +): """ Reconstruct labels from observed group ids. @@ -260,8 +265,9 @@ def decons_obs_group_ids(comp_ids: np.ndarray, obs_ids, shape, labels, xnull: bo return [lab[indexer].astype(np.intp, subok=False, copy=True) for lab in labels] -def indexer_from_factorized(labels, shape: Shape, compress: bool = True) -> np.ndarray: - # returned ndarray is np.intp +def indexer_from_factorized( + labels, shape: Shape, compress: bool = True +) -> npt.NDArray[np.intp]: ids = get_group_index(labels, shape, sort=True, xnull=False) if not compress: @@ -275,7 +281,7 @@ def indexer_from_factorized(labels, shape: Shape, compress: bool = True) -> np.n def lexsort_indexer( keys, orders=None, na_position: str = "last", key: Callable | None = None -) -> np.ndarray: +) -> npt.NDArray[np.intp]: """ Performs lexical sorting on a set of keys @@ -347,7 +353,7 @@ def nargsort( na_position: str = "last", key: Callable | None = None, mask: np.ndarray | None = None, -): +) -> npt.NDArray[np.intp]: """ Intended to be a drop-in replacement for np.argsort which handles NaNs. @@ -552,7 +558,7 @@ def ensure_key_mapped(values, key: Callable | None, levels=None): def get_flattened_list( - comp_ids: np.ndarray, # np.ndarray[np.intp] + comp_ids: npt.NDArray[np.intp], ngroups: int, levels: Iterable[Index], labels: Iterable[np.ndarray], @@ -602,8 +608,8 @@ def get_indexer_dict( def get_group_index_sorter( - group_index: np.ndarray, ngroups: int | None = None -) -> np.ndarray: + group_index: npt.NDArray[np.intp], ngroups: int | None = None +) -> npt.NDArray[np.intp]: """ algos.groupsort_indexer implements `counting sort` and it is at least O(ngroups), where @@ -646,7 +652,7 @@ def get_group_index_sorter( def compress_group_index( group_index: np.ndarray, sort: bool = True -) -> tuple[np.ndarray, np.ndarray]: +) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]: """ Group_index is offsets into cartesian product of all possible labels. This space can be huge, so this function compresses it, by computing offsets @@ -667,8 +673,8 @@ def compress_group_index( def _reorder_by_uniques( - uniques: np.ndarray, labels: np.ndarray -) -> tuple[np.ndarray, np.ndarray]: + uniques: npt.NDArray[np.int64], labels: npt.NDArray[np.intp] +) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.intp]]: """ Parameters ----------