From 88141b7a71672299a389989bc11ab1410130289e Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 11 Mar 2021 20:10:07 -0800 Subject: [PATCH 1/4] TYP: annotate Index.join and cousins --- pandas/core/indexes/base.py | 117 ++++++++++++++++++++++++++-- pandas/core/indexes/datetimelike.py | 7 +- 2 files changed, 116 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b001139bef6c5..203f33c25db02 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -21,6 +21,7 @@ TypeVar, Union, cast, + overload, ) import warnings @@ -165,6 +166,8 @@ ) if TYPE_CHECKING: + from typing import Literal + from pandas import ( CategoricalIndex, DataFrame, @@ -3876,7 +3879,36 @@ def _reindex_non_unique(self, target): # -------------------------------------------------------------------- # Join Methods - def join(self, other, how="left", level=None, return_indexers=False, sort=False): + @overload + def join( + self, + other: Index, + how: str = "left", + level=None, + return_indexers: Literal[True] = ..., + sort: bool = False, + ) -> Tuple[Index, Optional[np.ndarray[np.intp]], Optional[np.ndarray[np.intp]]]: + ... + + @overload + def join( + self, + other: Index, + how: str = "left", + level=None, + return_indexers: Literal[False] = ..., + sort: bool = False, + ) -> Index: + ... + + def join( + self, + other: Index, + how: str = "left", + level=None, + return_indexers: bool = False, + sort: bool = False, + ): """ Compute join_index and indexers to conform data structures to the new index. @@ -3918,7 +3950,7 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False) if len(other) == 0 and how in ("left", "outer"): join_index = self._view() if return_indexers: - rindexer = np.repeat(-1, len(join_index)) + rindexer = -1 * np.ones(len(join_index), dtype=np.intp) return join_index, None, rindexer else: return join_index @@ -3926,7 +3958,7 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False) if len(self) == 0 and how in ("right", "outer"): join_index = other._view() if return_indexers: - lindexer = np.repeat(-1, len(join_index)) + lindexer = -1 * np.ones(len(join_index), dtype=np.intp) return join_index, lindexer, None else: return join_index @@ -4001,18 +4033,34 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False) lindexer = None # type: ignore[assignment] else: lindexer = self.get_indexer(join_index) + lindexer = ensure_platform_int(lindexer) if join_index is other: # error: Incompatible types in assignment (expression has type "None", # variable has type "ndarray") rindexer = None # type: ignore[assignment] else: rindexer = other.get_indexer(join_index) + rindexer = ensure_platform_int(rindexer) return join_index, lindexer, rindexer else: return join_index + @overload + def _join_multi( + self, other: Index, how: str = "left", return_indexers: Literal[True] = ... + ) -> Tuple[ + MultiIndex, Optional[np.ndarray[np.intp]], Optional[np.ndarray[np.intp]] + ]: + ... + + @overload + def _join_multi( + self, other: Index, how: str = "left", return_indexers: Literal[False] = ... + ) -> MultiIndex: + ... + @final - def _join_multi(self, other, how, return_indexers=True): + def _join_multi(self, other: Index, how: str, return_indexers: bool = True): from pandas.core.indexes.multi import MultiIndex from pandas.core.reshape.merge import restore_dropped_levels_multijoin @@ -4090,8 +4138,22 @@ def _join_multi(self, other, how, return_indexers=True): return result[0], result[2], result[1] return result + @overload + def _join_non_unique( + self, other: Index, how: str = "left", return_indexers: Literal[True] = ... + ) -> Tuple[Index, Optional[np.ndarray[np.intp]], Optional[np.ndarray[np.intp]]]: + ... + + @overload + def _join_non_unique( + self, other: Index, how: str = "left", return_indexers: Literal[False] = ... + ) -> Index: + ... + @final - def _join_non_unique(self, other, how="left", return_indexers=False): + def _join_non_unique( + self, other: Index, how: str = "left", return_indexers: bool = False + ): from pandas.core.reshape.merge import get_join_indexers # We only get here if dtypes match @@ -4122,9 +4184,38 @@ def _join_non_unique(self, other, how="left", return_indexers=False): else: return join_index + @overload + def _join_level( + self, + other: Index, + level, + how: str = "left", + return_indexers: bool = False, + keep_order: bool = True, + ) -> Tuple[ + MultiIndex, Optional[np.ndarray[np.intp]], Optional[np.ndarray[np.intp]] + ]: + ... + + @overload + def _join_level( + self, + other: Index, + level, + how: str = "left", + return_indexers: bool = False, + keep_order: bool = True, + ) -> MultiIndex: + ... + @final def _join_level( - self, other, level, how="left", return_indexers=False, keep_order=True + self, + other: Index, + level, + how: str = "left", + return_indexers: bool = False, + keep_order: bool = True, ): """ The join method *only* affects the level of the resulting @@ -4264,8 +4355,20 @@ def _get_leaf_sorter(labels): else: return join_index + @overload + def _join_monotonic( + self, other, how: str = "left", return_indexers: Literal[True] = ... + ) -> Tuple[Index, Optional[np.ndarray[np.intp]], Optional[np.ndarray[np.intp]]]: + ... + + @overload + def _join_monotonic( + self, other, how: str = "left", return_indexers: Literal[False] = ... + ) -> Index: + ... + @final - def _join_monotonic(self, other, how="left", return_indexers=False): + def _join_monotonic(self, other, how: str = "left", return_indexers: bool = False): # We only get here with matching dtypes assert other.dtype == self.dtype diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 96459970a9b57..fed1158df5b75 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -827,7 +827,12 @@ def _union(self, other, sort): _join_precedence = 10 def join( - self, other, how: str = "left", level=None, return_indexers=False, sort=False + self, + other: Index, + how: str = "left", + level=None, + return_indexers: bool = False, + sort: bool = False, ): """ See Index.join From ac20b8985ba58862709564999c57547a3de1e9c9 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 11 Mar 2021 20:18:03 -0800 Subject: [PATCH 2/4] TYP: _reindex_indexer --- pandas/core/series.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 9feec7acae4c6..ba49db4760b92 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4220,7 +4220,13 @@ def _reduce( with np.errstate(all="ignore"): return op(delegate, skipna=skipna, **kwds) - def _reindex_indexer(self, new_index, indexer, copy): + def _reindex_indexer( + self, + new_index: Optional[Index], + indexer: Optional[np.ndarray[np.intp]], + copy: bool, + ) -> Series: + # Note: new_index is None iff indexer is None if indexer is None: if copy: return self.copy() @@ -4231,7 +4237,7 @@ def _reindex_indexer(self, new_index, indexer, copy): ) return self._constructor(new_values, index=new_index) - def _needs_reindex_multi(self, axes, method, level): + def _needs_reindex_multi(self, axes, method, level) -> bool: """ Check if we do need a multi reindex; this is for compat with higher dims. From b7f7dad1ec886def76b6e4f2cc860bb35914c671 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 11 Mar 2021 20:32:34 -0800 Subject: [PATCH 3/4] TYP: get_indexer returns ndarray[intp] --- pandas/core/indexes/base.py | 16 ++++++++++++++-- pandas/core/indexes/category.py | 13 +++++++++---- pandas/core/indexes/interval.py | 10 +++++++--- pandas/core/indexes/multi.py | 2 +- pandas/core/indexes/range.py | 4 +++- 5 files changed, 34 insertions(+), 11 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 203f33c25db02..92691d57de1a5 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3396,7 +3396,7 @@ def get_loc(self, key, method=None, tolerance=None): @final def get_indexer( self, target, method=None, limit=None, tolerance=None - ) -> np.ndarray: + ) -> np.ndarray[np.intp]: method = missing.clean_reindex_fill_method(method) target = ensure_index(target) @@ -3421,7 +3421,7 @@ def get_indexer( def _get_indexer( self, target: Index, method=None, limit=None, tolerance=None - ) -> np.ndarray: + ) -> np.ndarray[np.intp]: if tolerance is not None: tolerance = self._convert_tolerance(tolerance, target) @@ -5384,6 +5384,18 @@ def get_indexer_for(self, target, **kwargs): indexer, _ = self.get_indexer_non_unique(target) return indexer + @overload + def _get_indexer_non_comparable( + self, target: Index, method, unique: Literal[True] = ... + ) -> np.ndarray[np.intp]: + ... + + @overload + def _get_indexer_non_comparable( + self, target: Index, method, unique: Literal[False] = ... + ) -> Tuple[np.ndarray[np.intp], np.ndarray[np.intp]]: + ... + @final def _get_indexer_non_comparable(self, target: Index, method, unique: bool = True): """ diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 62941a23c6459..a1d6f3f283023 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -3,6 +3,7 @@ Hashable, List, Optional, + Tuple, ) import warnings @@ -484,7 +485,7 @@ def _maybe_cast_indexer(self, key) -> int: def _get_indexer( self, target: Index, method=None, limit=None, tolerance=None - ) -> np.ndarray: + ) -> np.ndarray[np.intp]: if self.equals(target): return np.arange(len(self), dtype="intp") @@ -492,11 +493,15 @@ def _get_indexer( return self._get_indexer_non_unique(target._values)[0] @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) - def get_indexer_non_unique(self, target): + def get_indexer_non_unique( + self, target + ) -> Tuple[np.ndarray[np.intp], np.ndarray[np.intp]]: target = ibase.ensure_index(target) return self._get_indexer_non_unique(target._values) - def _get_indexer_non_unique(self, values: ArrayLike): + def _get_indexer_non_unique( + self, values: ArrayLike + ) -> Tuple[np.ndarray[np.intp], np.ndarray[np.intp]]: """ get_indexer_non_unique but after unrapping the target Index object. """ @@ -515,7 +520,7 @@ def _get_indexer_non_unique(self, values: ArrayLike): codes = self.categories.get_indexer(values) indexer, missing = self._engine.get_indexer_non_unique(codes) - return ensure_platform_int(indexer), missing + return ensure_platform_int(indexer), ensure_platform_int(missing) @doc(Index._convert_list_indexer) def _convert_list_indexer(self, keyarr): diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 86ff95a588217..82bc2b8ec3bc0 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -700,7 +700,7 @@ def _get_indexer( method: Optional[str] = None, limit: Optional[int] = None, tolerance: Optional[Any] = None, - ) -> np.ndarray: + ) -> np.ndarray[np.intp]: if isinstance(target, IntervalIndex): # equal indexes -> 1:1 positional match @@ -732,7 +732,9 @@ def _get_indexer( return ensure_platform_int(indexer) @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) - def get_indexer_non_unique(self, target: Index) -> Tuple[np.ndarray, np.ndarray]: + def get_indexer_non_unique( + self, target: Index + ) -> Tuple[np.ndarray[np.intp], np.ndarray[np.intp]]: target = ensure_index(target) if isinstance(target, IntervalIndex) and not self._should_compare(target): @@ -751,7 +753,9 @@ def get_indexer_non_unique(self, target: Index) -> Tuple[np.ndarray, np.ndarray] return ensure_platform_int(indexer), ensure_platform_int(missing) - def _get_indexer_pointwise(self, target: Index) -> Tuple[np.ndarray, np.ndarray]: + def _get_indexer_pointwise( + self, target: Index + ) -> Tuple[np.ndarray[np.intp], np.ndarray[np.intp]]: """ pointwise implementation for get_indexer and get_indexer_non_unique. """ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 7bb3dc5ab4545..fbaa46e697f51 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2672,7 +2672,7 @@ def _get_partial_string_timestamp_match_key(self, key): def _get_indexer( self, target: Index, method=None, limit=None, tolerance=None - ) -> np.ndarray: + ) -> np.ndarray[np.intp]: # empty indexer if not len(target): diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 05bb32dad6cab..5797390b66f4e 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -395,7 +395,9 @@ def get_loc(self, key, method=None, tolerance=None): raise KeyError(key) return super().get_loc(key, method=method, tolerance=tolerance) - def _get_indexer(self, target: Index, method=None, limit=None, tolerance=None): + def _get_indexer( + self, target: Index, method=None, limit=None, tolerance=None + ) -> np.ndarray[np.intp]: if com.any_not_none(method, tolerance, limit): return super()._get_indexer( target, method=method, tolerance=tolerance, limit=limit From f57f2cf60cb6d04d5ef52beebdf9a9ebf5d2db6d Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 11 Mar 2021 20:37:24 -0800 Subject: [PATCH 4/4] TYP: Index.reindex return types --- pandas/core/indexes/base.py | 4 +++- pandas/core/indexes/category.py | 6 ++++-- pandas/core/indexes/multi.py | 6 ++++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 92691d57de1a5..3d95a7affe38e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3751,7 +3751,9 @@ def _validate_can_reindex(self, indexer: np.ndarray) -> None: if not self._index_as_unique and len(indexer): raise ValueError("cannot reindex from a duplicate axis") - def reindex(self, target, method=None, level=None, limit=None, tolerance=None): + def reindex( + self, target, method=None, level=None, limit=None, tolerance=None + ) -> Tuple[Index, Optional[np.ndarray[np.intp]]]: """ Create index with target's values. diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index a1d6f3f283023..81766576a8edb 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -387,7 +387,9 @@ def unique(self, level=None): # of result, not self. return type(self)._simple_new(result, name=self.name) - def reindex(self, target, method=None, level=None, limit=None, tolerance=None): + def reindex( + self, target, method=None, level=None, limit=None, tolerance=None + ) -> Tuple[Index, Optional[np.ndarray[np.intp]]]: """ Create index with target's values (move/add/delete values as necessary) @@ -395,7 +397,7 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None): ------- new_index : pd.Index Resulting index - indexer : np.ndarray or None + indexer : np.ndarray[np.intp] or None Indices of output values in original index """ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index fbaa46e697f51..565773993084c 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2509,7 +2509,9 @@ def sortlevel( return new_index, indexer - def reindex(self, target, method=None, level=None, limit=None, tolerance=None): + def reindex( + self, target, method=None, level=None, limit=None, tolerance=None + ) -> Tuple[Index, Optional[np.ndarray[np.intp]]]: """ Create index with target's values (move/add/delete values as necessary) @@ -2517,7 +2519,7 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None): ------- new_index : pd.MultiIndex Resulting index - indexer : np.ndarray or None + indexer : np.ndarray[np.intp] or None Indices of output values in original index. """