From f47c4b26af8c12d5fc4823c4aaf34fc8d45b0e28 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 24 Mar 2021 08:40:37 -0700 Subject: [PATCH 1/4] TYP: get_indexer --- pandas/core/indexes/base.py | 30 ++++++++++++++++++++++++------ pandas/core/indexes/category.py | 16 +++++++++++----- pandas/core/indexes/interval.py | 3 +++ pandas/io/formats/style.py | 5 ++++- 4 files changed, 42 insertions(+), 12 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 094f4a67d2e61..3e90bf5673d70 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -21,6 +21,7 @@ TypeVar, Union, cast, + overload, ) import warnings @@ -166,6 +167,8 @@ ) if TYPE_CHECKING: + from typing import Literal + from pandas import ( CategoricalIndex, DataFrame, @@ -5201,17 +5204,18 @@ def set_value(self, arr, key, value): Returns ------- - indexer : ndarray of int + indexer : np.ndarray[np.intp] Integers from 0 to n - 1 indicating that the index at these positions matches the corresponding target values. Missing values in the target are marked by -1. - missing : ndarray of int + missing : np.ndarray[np.intp] An indexer into the target of the values not found. These correspond to the -1 in the indexer array. """ @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) - def get_indexer_non_unique(self, target): + def get_indexer_non_unique(self, target) -> tuple[np.ndarray, np.ndarray]: + # both returned ndarrays are np.intp target = ensure_index(target) if not self._should_compare(target) and not is_interval_dtype(self.dtype): @@ -5235,10 +5239,10 @@ def get_indexer_non_unique(self, target): tgt_values = target._get_engine_target() indexer, missing = self._engine.get_indexer_non_unique(tgt_values) - return ensure_platform_int(indexer), missing + return ensure_platform_int(indexer), ensure_platform_int(missing) @final - def get_indexer_for(self, target, **kwargs): + def get_indexer_for(self, target, **kwargs) -> np.ndarray: """ Guaranteed return of an indexer even when non-unique. @@ -5247,7 +5251,7 @@ def get_indexer_for(self, target, **kwargs): Returns ------- - numpy.ndarray + numpy.ndarray[np.intp] List of indices. """ if self._index_as_unique: @@ -5255,6 +5259,20 @@ def get_indexer_for(self, target, **kwargs): indexer, _ = self.get_indexer_non_unique(target) return indexer + @overload + def _get_indexer_non_comparable( + self, target: Index, method, unique: Literal[True] = ... + ) -> np.ndarray: + # returned ndarray is np.intp + ... + + @overload + def _get_indexer_non_comparable( + self, target: Index, method, unique: Literal[False] = ... + ) -> tuple[np.ndarray, np.ndarray]: + # both returned ndarrays are np.intp + ... + @final def _get_indexer_non_comparable(self, target: Index, method, unique: bool = True): """ diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index c9c39fde1da46..b7370e09c7d2c 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from typing import ( Any, Hashable, @@ -422,10 +424,9 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None): target = ibase.ensure_index(target) - missing: List[int] if self.equals(target): indexer = None - missing = [] + missing = np.array([], dtype=np.intp) else: indexer, missing = self.get_indexer_non_unique(np.array(target)) @@ -498,6 +499,7 @@ def _get_indexer( limit: Optional[int] = None, tolerance=None, ) -> np.ndarray: + # returned ndarray is np.intp if self.equals(target): return np.arange(len(self), dtype="intp") @@ -505,11 +507,15 @@ def _get_indexer( return self._get_indexer_non_unique(target._values)[0] @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) - def get_indexer_non_unique(self, target): + def get_indexer_non_unique(self, target) -> tuple[np.ndarray, np.ndarray]: + # both returned ndarrays are np.intp target = ibase.ensure_index(target) return self._get_indexer_non_unique(target._values) - def _get_indexer_non_unique(self, values: ArrayLike): + def _get_indexer_non_unique( + self, values: ArrayLike + ) -> tuple[np.ndarray, np.ndarray]: + # both returned ndarrays are np.intp """ get_indexer_non_unique but after unrapping the target Index object. """ @@ -528,7 +534,7 @@ def _get_indexer_non_unique(self, values: ArrayLike): codes = self.categories.get_indexer(values) indexer, missing = self._engine.get_indexer_non_unique(codes) - return ensure_platform_int(indexer), missing + return ensure_platform_int(indexer), ensure_platform_int(missing) @doc(Index._convert_list_indexer) def _convert_list_indexer(self, keyarr): diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 1b286f258d72c..934d14782a052 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -701,6 +701,7 @@ def _get_indexer( limit: Optional[int] = None, tolerance: Optional[Any] = None, ) -> np.ndarray: + # returned ndarray is np.intp if isinstance(target, IntervalIndex): # equal indexes -> 1:1 positional match @@ -733,6 +734,7 @@ def _get_indexer( @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) def get_indexer_non_unique(self, target: Index) -> Tuple[np.ndarray, np.ndarray]: + # both returned ndarrays are np.intp target = ensure_index(target) if isinstance(target, IntervalIndex) and not self._should_compare(target): @@ -752,6 +754,7 @@ def get_indexer_non_unique(self, target: Index) -> Tuple[np.ndarray, np.ndarray] return ensure_platform_int(indexer), ensure_platform_int(missing) def _get_indexer_pointwise(self, target: Index) -> Tuple[np.ndarray, np.ndarray]: + # both returned ndarrays are np.intp """ pointwise implementation for get_indexer and get_indexer_non_unique. """ diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 9250d861740fc..5f86639155b65 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1414,7 +1414,10 @@ def hide_columns(self, subset) -> Styler: """ subset = _non_reducing_slice(subset) hidden_df = self.data.loc[subset] - self.hidden_columns = self.columns.get_indexer_for(hidden_df.columns) + hcols = self.columns.get_indexer_for(hidden_df.columns) + # error: Incompatible types in assignment (expression has type + # "ndarray", variable has type "Sequence[int]") + self.hidden_columns = hcols # type: ignore[assignment] return self # ----------------------------------------------------------------------- From a115d0dc3fe316efab199e9930e3627f712416d2 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 5 Apr 2021 14:38:35 -0700 Subject: [PATCH 2/4] update per discussion in #40612 --- pandas/core/indexes/base.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 47cfbfcc49516..6a845b72f3da4 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5264,13 +5264,15 @@ def _get_indexer_non_comparable( @overload def _get_indexer_non_comparable( - self, target: Index, method, unique: Literal[False] = ... + self, target: Index, method, unique: Literal[False] ) -> tuple[np.ndarray, np.ndarray]: # both returned ndarrays are np.intp ... @final - def _get_indexer_non_comparable(self, target: Index, method, unique: bool = True): + def _get_indexer_non_comparable( + self, target: Index, method, unique: bool = True + ) -> np.ndarray | tuple[np.ndarray, np.ndarray]: """ Called from get_indexer or get_indexer_non_unique when the target is of a non-comparable dtype. From 0e7a16879bee7952c9dc70d494f1440519399512 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 6 Apr 2021 15:29:46 -0700 Subject: [PATCH 3/4] one more overload --- pandas/core/indexes/base.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 95452153e8b33..9c8ff6c5c5e30 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5259,6 +5259,13 @@ def _get_indexer_non_comparable( # both returned ndarrays are np.intp ... + @overload + def _get_indexer_non_comparable( + self, target: Index, method, unique: bool = True + ) -> np.ndarray | tuple[np.ndarray, np.ndarray]: + # any returned ndarrays are np.intp + ... + @final def _get_indexer_non_comparable( self, target: Index, method, unique: bool = True From 407aeb3b13d0eb93e57bc1cd798017c9c537072d Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 6 Apr 2021 16:11:15 -0700 Subject: [PATCH 4/4] pre-commit fixup --- pandas/core/indexes/category.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index b7370e09c7d2c..7a5f9640c51cc 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -3,8 +3,6 @@ from typing import ( Any, Hashable, - List, - Optional, ) import warnings @@ -218,7 +216,7 @@ def __new__( data=None, categories=None, ordered=None, - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, copy=False, name=None, ): @@ -351,7 +349,7 @@ def _format_attrs(self): attrs.append(("length", len(self))) return attrs - def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]: + def _format_with_header(self, header: list[str], na_rep: str = "NaN") -> list[str]: from pandas.io.formats.printing import pprint_thing result = [ @@ -495,8 +493,8 @@ def _maybe_cast_indexer(self, key) -> int: def _get_indexer( self, target: Index, - method: Optional[str] = None, - limit: Optional[int] = None, + method: str | None = None, + limit: int | None = None, tolerance=None, ) -> np.ndarray: # returned ndarray is np.intp @@ -632,7 +630,7 @@ def map(self, mapper): mapped = self._values.map(mapper) return Index(mapped, name=self.name) - def _concat(self, to_concat: List[Index], name: Hashable) -> Index: + def _concat(self, to_concat: list[Index], name: Hashable) -> Index: # if calling index is category, don't check dtype of others try: codes = np.concatenate([self._is_dtype_compat(c).codes for c in to_concat])