From 6c835114ee3d923b727aca15d37046fc8da6e654 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sat, 1 May 2021 19:59:39 -0400 Subject: [PATCH 01/12] TYP: ExtensionArray.__getitem__ --- pandas/core/arrays/_mixins.py | 15 +++++++++++++-- pandas/core/arrays/base.py | 10 +++++++++- pandas/core/arrays/datetimelike.py | 20 +++++++++++++------- pandas/core/indexes/base.py | 6 +----- pandas/core/missing.py | 6 +----- 5 files changed, 37 insertions(+), 20 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index e97687de34273..976e0e73bbbfd 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -6,6 +6,7 @@ Sequence, TypeVar, cast, + overload, ) import numpy as np @@ -13,7 +14,7 @@ from pandas._libs import lib from pandas._typing import ( F, - PositionalIndexer2D, + PositionalIndexer, Shape, type_t, ) @@ -276,9 +277,19 @@ def __setitem__(self, key, value): def _validate_setitem_value(self, value): return value + @overload + def __getitem__(self, item: int | np.integer) -> Any: + ... + + @overload + def __getitem__( + self: NDArrayBackedExtensionArrayT, item: slice | np.ndarray | Sequence[int] + ) -> NDArrayBackedExtensionArrayT: + ... + def __getitem__( self: NDArrayBackedExtensionArrayT, - key: PositionalIndexer2D, + key: PositionalIndexer, ) -> NDArrayBackedExtensionArrayT | Any: if lib.is_integer(key): # fast-path diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 5a2643dd531ed..94ce7cfccd02a 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -16,6 +16,7 @@ Sequence, TypeVar, cast, + overload, ) import numpy as np @@ -288,6 +289,13 @@ def _from_factorized(cls, values, original): # ------------------------------------------------------------------------ # Must be a Sequence # ------------------------------------------------------------------------ + @overload + def __getitem__(self, item: int | np.integer) -> Any: + ... + + @overload + def __getitem__(self, item: slice | np.ndarray | Sequence[int]) -> ExtensionArray: + ... def __getitem__(self, item: PositionalIndexer) -> ExtensionArray | Any: """ @@ -729,7 +737,7 @@ def fillna(self, value=None, method=None, limit=None): new_values = self.copy() return new_values - def dropna(self): + def dropna(self: ExtensionArrayT) -> ExtensionArrayT: """ Return ExtensionArray without NA values. diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 93df88aba2cba..814e96d5d2c49 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -48,7 +48,7 @@ Dtype, DtypeObj, NpDtype, - PositionalIndexer2D, + PositionalIndexer, ) from pandas.compat.numpy import function as nv from pandas.errors import ( @@ -309,8 +309,18 @@ def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: return np.array(list(self), dtype=object) return self._ndarray + @overload + def __getitem__(self, item: int | np.integer) -> DTScalarOrNaT: + ... + + @overload + def __getitem__( + self, item: slice | np.ndarray | Sequence[int] + ) -> DatetimeLikeArrayMixin: + ... + def __getitem__( - self, key: PositionalIndexer2D + self, key: PositionalIndexer ) -> DatetimeLikeArrayMixin | DTScalarOrNaT: """ This getitem defers to the underlying array, which by-definition can @@ -1778,11 +1788,7 @@ def factorize(self, na_sentinel=-1, sort: bool = False): uniques = self.copy() # TODO: copy or view? if sort and self.freq.n < 0: codes = codes[::-1] - # TODO: overload __getitem__, a slice indexer returns same type as self - # error: Incompatible types in assignment (expression has type - # "Union[DatetimeLikeArrayMixin, Union[Any, Any]]", variable - # has type "TimelikeOps") - uniques = uniques[::-1] # type: ignore[assignment] + uniques = uniques[::-1] return codes, uniques # FIXME: shouldn't get here; we are ignoring sort return super().factorize(na_sentinel=na_sentinel) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 1bae9947bd875..55168bd8d90b8 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4595,11 +4595,7 @@ def __getitem__(self, key): result = getitem(key) if not is_scalar(result): - # error: Argument 1 to "ndim" has incompatible type "Union[ExtensionArray, - # Any]"; expected "Union[Union[int, float, complex, str, bytes, generic], - # Sequence[Union[int, float, complex, str, bytes, generic]], - # Sequence[Sequence[Any]], _SupportsArray]" - if np.ndim(result) > 1: # type: ignore[arg-type] + if np.ndim(result) > 1: deprecate_ndim_indexing(result) return result # NB: Using _constructor._simple_new would break if MultiIndex diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 8849eb0670faa..d6c0dcce63603 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -421,11 +421,7 @@ def interpolate_1d( if method in NP_METHODS: # np.interp requires sorted X values, #21037 - # error: Argument 1 to "argsort" has incompatible type "Union[ExtensionArray, - # Any]"; expected "Union[Union[int, float, complex, str, bytes, generic], - # Sequence[Union[int, float, complex, str, bytes, generic]], - # Sequence[Sequence[Any]], _SupportsArray]" - indexer = np.argsort(inds[valid]) # type: ignore[arg-type] + indexer = np.argsort(inds[valid]) result[invalid] = np.interp( inds[invalid], inds[valid][indexer], yvalues[valid][indexer] ) From 0c8d648821e7c60f8306e8ae7de97aebab122d55 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sun, 2 May 2021 14:18:42 -0400 Subject: [PATCH 02/12] make base class use PositionalIndexer2D --- pandas/core/arrays/_mixins.py | 12 ++++++------ pandas/core/arrays/base.py | 4 ++-- pandas/core/arrays/datetimelike.py | 10 +++++++--- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 976e0e73bbbfd..c6d8e4106dd1b 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -14,7 +14,7 @@ from pandas._libs import lib from pandas._typing import ( F, - PositionalIndexer, + PositionalIndexer2D, Shape, type_t, ) @@ -283,14 +283,14 @@ def __getitem__(self, item: int | np.integer) -> Any: @overload def __getitem__( - self: NDArrayBackedExtensionArrayT, item: slice | np.ndarray | Sequence[int] - ) -> NDArrayBackedExtensionArrayT: + self: NDArrayBackedExtensionArray, item: slice | np.ndarray | Sequence[int] + ) -> NDArrayBackedExtensionArray: ... def __getitem__( - self: NDArrayBackedExtensionArrayT, - key: PositionalIndexer, - ) -> NDArrayBackedExtensionArrayT | Any: + self: NDArrayBackedExtensionArray, + key: PositionalIndexer2D, + ) -> NDArrayBackedExtensionArray | Any: if lib.is_integer(key): # fast-path result = self._ndarray[key] diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 94ce7cfccd02a..a83f9cd104326 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -25,7 +25,7 @@ from pandas._typing import ( ArrayLike, Dtype, - PositionalIndexer, + PositionalIndexer2D, Shape, ) from pandas.compat import set_function_name @@ -297,7 +297,7 @@ def __getitem__(self, item: int | np.integer) -> Any: def __getitem__(self, item: slice | np.ndarray | Sequence[int]) -> ExtensionArray: ... - def __getitem__(self, item: PositionalIndexer) -> ExtensionArray | Any: + def __getitem__(self, item: PositionalIndexer2D) -> ExtensionArray | Any: """ Select a subset of self. diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 814e96d5d2c49..da45895f3627b 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -48,7 +48,7 @@ Dtype, DtypeObj, NpDtype, - PositionalIndexer, + PositionalIndexer2D, ) from pandas.compat.numpy import function as nv from pandas.errors import ( @@ -320,13 +320,17 @@ def __getitem__( ... def __getitem__( - self, key: PositionalIndexer + self, key: PositionalIndexer2D ) -> DatetimeLikeArrayMixin | DTScalarOrNaT: """ This getitem defers to the underlying array, which by-definition can only handle list-likes, slices, and integer scalars """ - result = super().__getitem__(key) + # error: Invalid index type "Union[Union[int, integer[Any], slice, + # Sequence[int], ndarray], Tuple[Union[int, integer[Any], slice, Sequence[int], + # ndarray], Union[int, integer[Any], slice, Sequence[int], ndarray]]]" for + # "DatetimeLikeArrayMixin"; expected type "Union[int, integer[Any]]" + result = super().__getitem__(key) # type: ignore[index] if lib.is_scalar(result): return result From 94ced76e082879454b60c7097de6507c27b5453c Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sun, 16 May 2021 15:08:19 -0400 Subject: [PATCH 03/12] fix up getitem typing for DateTimeOps --- pandas/core/arrays/datetimelike.py | 8 ++++---- pandas/core/arrays/datetimes.py | 7 ++----- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index de86455d248b9..e7f85a7cf0902 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -314,13 +314,13 @@ def __getitem__(self, item: int | np.integer) -> DTScalarOrNaT: @overload def __getitem__( - self, item: slice | np.ndarray | Sequence[int] - ) -> DatetimeLikeArrayMixin: + self: DatetimeLikeArrayT, item: slice | np.ndarray | Sequence[int] + ) -> DatetimeLikeArrayT: ... def __getitem__( - self, key: PositionalIndexer2D - ) -> DatetimeLikeArrayMixin | DTScalarOrNaT: + self: DatetimeLikeArrayT, key: PositionalIndexer2D + ) -> DatetimeLikeArrayT | DTScalarOrNaT: """ This getitem defers to the underlying array, which by-definition can only handle list-likes, slices, and integer scalars diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index f07a04b8087e0..e9e2f0e24a817 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -8,7 +8,6 @@ ) from typing import ( TYPE_CHECKING, - cast, overload, ) import warnings @@ -476,11 +475,9 @@ def _generate_range( index = cls._simple_new(arr, freq=None, dtype=dtype) if not left_closed and len(index) and index[0] == start: - # TODO: overload DatetimeLikeArrayMixin.__getitem__ - index = cast(DatetimeArray, index[1:]) + index = index[1:] if not right_closed and len(index) and index[-1] == end: - # TODO: overload DatetimeLikeArrayMixin.__getitem__ - index = cast(DatetimeArray, index[:-1]) + index = index[:-1] dtype = tz_to_dtype(tz) return cls._simple_new(index._ndarray, freq=freq, dtype=dtype) From 2773c8b39abca4156312b4c74b28cbe2fff6f9e2 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sun, 23 May 2021 14:33:30 -0400 Subject: [PATCH 04/12] Make getitem on EA accept 1D, and change declaration for 2D arrays --- pandas/_typing.py | 7 +++--- pandas/core/arrays/_mixins.py | 10 +++++--- pandas/core/arrays/base.py | 12 ++++++--- pandas/core/arrays/categorical.py | 18 ++++++++++++- pandas/core/arrays/datetimelike.py | 11 ++++---- pandas/core/arrays/interval.py | 16 +++++++++++- pandas/core/arrays/masked.py | 15 ++++++++++- pandas/core/arrays/sparse/array.py | 35 ++++++++++++++++++++++++-- pandas/core/arrays/string_arrow.py | 15 ++++++++++- pandas/core/indexes/base.py | 12 ++++++--- pandas/core/internals/array_manager.py | 21 +++++++--------- pandas/core/internals/blocks.py | 6 ++--- pandas/core/internals/concat.py | 6 ++--- pandas/core/internals/ops.py | 24 +++++++++--------- 14 files changed, 152 insertions(+), 56 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index 7763b0ceb610a..2698ed696191b 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -211,7 +211,6 @@ # TODO: add Ellipsis, see # https://github.com/python/typing/issues/684#issuecomment-548203158 # https://bugs.python.org/issue41810 -PositionalIndexer = Union[int, np.integer, slice, Sequence[int], np.ndarray] -PositionalIndexer2D = Union[ - PositionalIndexer, Tuple[PositionalIndexer, PositionalIndexer] -] +PositionalIndexer = Union[int, np.integer, slice, List[int], np.ndarray] +PositionalIndexerTuple = Tuple[PositionalIndexer, PositionalIndexer] +PositionalIndexer2D = Union[PositionalIndexer, PositionalIndexerTuple] diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index db99fdacf3ccc..8ae32445be8b3 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -16,6 +16,7 @@ from pandas._typing import ( F, PositionalIndexer2D, + PositionalIndexerTuple, Shape, type_t, ) @@ -206,19 +207,20 @@ def _validate_setitem_value(self, value): return value @overload - def __getitem__(self, item: int | np.integer) -> Any: + def __getitem__(self, key: int | np.integer) -> Any: ... @overload def __getitem__( - self: NDArrayBackedExtensionArray, item: slice | np.ndarray | Sequence[int] + self: NDArrayBackedExtensionArray, + key: slice | np.ndarray | list[int] | PositionalIndexerTuple, ) -> NDArrayBackedExtensionArray: ... def __getitem__( - self: NDArrayBackedExtensionArray, + self: NDArrayBackedExtensionArrayT, key: PositionalIndexer2D, - ) -> NDArrayBackedExtensionArray | Any: + ) -> NDArrayBackedExtensionArrayT | Any: if lib.is_integer(key): # fast-path result = self._ndarray[key] diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 666cc073cd3b5..c64ecff3a74f8 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -27,7 +27,7 @@ ArrayLike, Dtype, FillnaOptions, - PositionalIndexer2D, + PositionalIndexer, Shape, ) from pandas.compat import set_function_name @@ -297,10 +297,14 @@ def __getitem__(self, item: int | np.integer) -> Any: ... @overload - def __getitem__(self, item: slice | np.ndarray | Sequence[int]) -> ExtensionArray: + def __getitem__( + self: ExtensionArrayT, item: slice | np.ndarray | list[int] + ) -> ExtensionArrayT: ... - def __getitem__(self, item: PositionalIndexer2D) -> ExtensionArray | Any: + def __getitem__( + self: ExtensionArrayT, item: PositionalIndexer + ) -> ExtensionArrayT | Any: """ Select a subset of self. @@ -314,6 +318,8 @@ def __getitem__(self, item: PositionalIndexer2D) -> ExtensionArray | Any: * ndarray: A 1-d boolean NumPy ndarray the same length as 'self' + * list[int]: A list of int + Returns ------- item : scalar or ExtensionArray diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index cb8a08f5668ac..c85ff07ee1617 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -11,6 +11,7 @@ TypeVar, Union, cast, + overload, ) from warnings import ( catch_warnings, @@ -34,6 +35,8 @@ Dtype, NpDtype, Ordered, + PositionalIndexer2D, + PositionalIndexerTuple, Scalar, Shape, type_t, @@ -2015,7 +2018,20 @@ def __repr__(self) -> str: # ------------------------------------------------------------------ - def __getitem__(self, key): + @overload + def __getitem__(self, key: int | np.integer) -> object: + ... + + @overload + def __getitem__( + self: CategoricalT, + key: slice | np.ndarray | list[int] | PositionalIndexerTuple, + ) -> CategoricalT: + ... + + def __getitem__( + self: CategoricalT, key: PositionalIndexer2D + ) -> CategoricalT | object: """ Return an item. """ diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 4ae63068edbcb..6b371a3531ad1 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -48,6 +48,7 @@ DtypeObj, NpDtype, PositionalIndexer2D, + PositionalIndexerTuple, ) from pandas.compat.numpy import function as nv from pandas.errors import ( @@ -314,7 +315,8 @@ def __getitem__(self, item: int | np.integer) -> DTScalarOrNaT: @overload def __getitem__( - self: DatetimeLikeArrayT, item: slice | np.ndarray | Sequence[int] + self: DatetimeLikeArrayT, + item: slice | np.ndarray | list[int] | PositionalIndexerTuple, ) -> DatetimeLikeArrayT: ... @@ -325,11 +327,8 @@ def __getitem__( This getitem defers to the underlying array, which by-definition can only handle list-likes, slices, and integer scalars """ - # error: Invalid index type "Union[Union[int, integer[Any], slice, - # Sequence[int], ndarray], Tuple[Union[int, integer[Any], slice, Sequence[int], - # ndarray], Union[int, integer[Any], slice, Sequence[int], ndarray]]]" for - # "DatetimeLikeArrayMixin"; expected type "Union[int, integer[Any]]" - result = super().__getitem__(key) # type: ignore[index] + # Use cast as we know we will get back a DatetimeLikeArray + result = cast(DatetimeLikeArrayT, super().__getitem__(key)) if lib.is_scalar(result): return result diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index a99bf245a6073..2830e90ceaad7 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -10,6 +10,7 @@ Sequence, TypeVar, cast, + overload, ) import numpy as np @@ -28,6 +29,7 @@ ArrayLike, Dtype, NpDtype, + PositionalIndexer, ) from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender @@ -629,7 +631,19 @@ def __iter__(self): def __len__(self) -> int: return len(self._left) - def __getitem__(self, key): + @overload + def __getitem__(self, key: int | np.integer) -> Interval: + ... + + @overload + def __getitem__( + self: IntervalArrayT, key: slice | np.ndarray | list[int] + ) -> IntervalArrayT: + ... + + def __getitem__( + self: IntervalArrayT, key: PositionalIndexer + ) -> IntervalArrayT | Interval: key = check_array_indexer(self, key) left = self._left[key] right = self._right[key] diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 11f9f645920ec..9890543b9c05d 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -5,6 +5,7 @@ Any, Sequence, TypeVar, + overload, ) import numpy as np @@ -135,7 +136,19 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False): def dtype(self) -> BaseMaskedDtype: raise AbstractMethodError(self) - def __getitem__(self, item: PositionalIndexer) -> BaseMaskedArray | Any: + @overload + def __getitem__(self, item: int | np.integer) -> Any: + ... + + @overload + def __getitem__( + self: BaseMaskedArrayT, item: slice | np.ndarray | list[int] + ) -> BaseMaskedArrayT: + ... + + def __getitem__( + self: BaseMaskedArrayT, item: PositionalIndexer + ) -> BaseMaskedArrayT | Any: if is_integer(item): if self._mask[item]: return self.dtype.na_value diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 4847372f18239..fe17245fc9fb6 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -7,10 +7,13 @@ import numbers import operator from typing import ( + TYPE_CHECKING, Any, Callable, Sequence, TypeVar, + cast, + overload, ) import warnings @@ -27,6 +30,7 @@ from pandas._typing import ( Dtype, NpDtype, + PositionalIndexer, Scalar, ) from pandas.compat.numpy import function as nv @@ -77,6 +81,17 @@ import pandas.io.formats.printing as printing +# See https://github.com/python/typing/issues/684 +if TYPE_CHECKING: + from enum import Enum + + class ellipsis(Enum): + Ellipsis = "..." + + Ellipsis = ellipsis.Ellipsis +else: + ellipsis = type(Ellipsis) + # ---------------------------------------------------------------------------- # Array @@ -810,8 +825,21 @@ def value_counts(self, dropna: bool = True): # -------- # Indexing # -------- + @overload + def __getitem__(self, key: int | np.integer) -> Any: + ... + + @overload + def __getitem__( + self: SparseArrayT, + key: slice | np.ndarray | list[int] | tuple[int | ellipsis, ...], + ) -> SparseArrayT: + ... - def __getitem__(self, key): + def __getitem__( + self: SparseArrayT, + key: PositionalIndexer | tuple[int | ellipsis, ...], + ) -> SparseArrayT | Any: if isinstance(key, tuple): if len(key) > 1: @@ -821,6 +849,8 @@ def __getitem__(self, key): key = key[:-1] if len(key) > 1: raise IndexError("too many indices for array.") + if key[0] is Ellipsis: + raise ValueError("Cannot slice with Ellipsis") key = key[0] if is_integer(key): @@ -849,7 +879,8 @@ def __getitem__(self, key): key = check_array_indexer(self, key) if com.is_bool_indexer(key): - + # mypy doesn't know we have an array here + key = cast(np.ndarray, key) return self.take(np.arange(len(key), dtype=np.int32)[key]) elif hasattr(key, "__len__"): return self.take(key) diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index d5ee28eb7017e..86e177cbd6ab1 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -6,6 +6,7 @@ Any, Sequence, cast, + overload, ) import numpy as np @@ -341,7 +342,19 @@ def _concat_same_type(cls, to_concat) -> ArrowStringArray: ) ) - def __getitem__(self, item: PositionalIndexer) -> Any: + @overload + def __getitem__(self, item: int | np.integer) -> str: + ... + + @overload + def __getitem__( + self: ArrowStringArray, item: slice | np.ndarray | list[int] + ) -> ArrowStringArray: + ... + + def __getitem__( + self: ArrowStringArray, item: PositionalIndexer + ) -> ArrowStringArray | str: """Select a subset of self. Parameters diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 9f18721ee575a..b263d47510b80 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4915,13 +4915,19 @@ def asof_locs(self, where: Index, mask: np.ndarray) -> np.ndarray: which correspond to the return values of the `asof` function for every element in `where`. """ - locs = self._values[mask].searchsorted(where._values, side="right") + # error: Argument 1 to "searchsorted" of "ndarray" has incompatible type + # "Union[ExtensionArray, ndarray]"; expected "Union[Union[int, float, complex, + # str, bytes, generic], Sequence[Union[int, float, complex, str, bytes, + # generic]], Sequence[Sequence[Any]], _SupportsArray]" + # TODO: will be fixed when ExtensionArray.searchsorted() is fixed + locs = self._values[mask].searchsorted( + where._values, side="right" # type: ignore[arg-type] + ) locs = np.where(locs > 0, locs - 1, 0) result = np.arange(len(self), dtype=np.intp)[mask].take(locs) - # TODO: overload return type of ExtensionArray.__getitem__ - first_value = cast(Any, self._values[mask.argmax()]) + first_value = self._values[mask.argmax()] result[(locs == 0) & (where._values < first_value)] = -1 return result diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 31e32b053367b..d984531872aa1 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -324,10 +324,9 @@ def apply_with_block(self: T, f, align_keys=None, swap_axis=True, **kwargs) -> T if self.ndim == 2 and arr.ndim == 2: # 2D for np.ndarray or DatetimeArray/TimedeltaArray assert len(arr) == 1 - # error: Invalid index type "Tuple[int, slice]" for - # "Union[ndarray, ExtensionArray]"; expected type - # "Union[int, slice, ndarray]" - arr = arr[0, :] # type: ignore[index] + # error: No overload variant of "__getitem__" of "ExtensionArray" + # matches argument type "Tuple[int, slice]" + arr = arr[0, :] # type: ignore[call-overload] result_arrays.append(arr) return type(self)(result_arrays, self._axes) @@ -843,10 +842,9 @@ def iset(self, loc: int | slice | np.ndarray, value: ArrayLike): assert value.shape[0] == len(self._axes[0]) for value_idx, mgr_idx in enumerate(indices): - # error: Invalid index type "Tuple[slice, int]" for - # "Union[ExtensionArray, ndarray]"; expected type - # "Union[int, slice, ndarray]" - value_arr = value[:, value_idx] # type: ignore[index] + # error: No overload variant of "__getitem__" of "ExtensionArray" matches + # argument type "Tuple[slice, int]" + value_arr = value[:, value_idx] # type: ignore[call-overload] self.arrays[mgr_idx] = value_arr return @@ -866,10 +864,9 @@ def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None: value = extract_array(value, extract_numpy=True) if value.ndim == 2: if value.shape[0] == 1: - # error: Invalid index type "Tuple[int, slice]" for - # "Union[Any, ExtensionArray, ndarray]"; expected type - # "Union[int, slice, ndarray]" - value = value[0, :] # type: ignore[index] + # error: No overload variant of "__getitem__" of "ExtensionArray" + # matches argument type "Tuple[int, slice]" + value = value[0, :] # type: ignore[call-overload] else: raise ValueError( f"Expected a 1D array, got an array with shape {value.shape}" diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 4f1b16e747394..4e1a3f19ad56d 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -316,9 +316,9 @@ def getitem_block_index(self, slicer: slice) -> Block: Assumes self.ndim == 2 """ - # error: Invalid index type "Tuple[ellipsis, slice]" for - # "Union[ndarray, ExtensionArray]"; expected type "Union[int, slice, ndarray]" - new_values = self.values[..., slicer] # type: ignore[index] + # error: No overload variant of "__getitem__" of "ExtensionArray" matches + # argument type "Tuple[int, slice]" + new_values = self.values[..., slicer] # type: ignore[call-overload] return type(self)(new_values, self._mgr_locs, ndim=self.ndim) @final diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 9642b30ab91ca..842184e28ec8c 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -506,10 +506,10 @@ def _concatenate_join_units( # concatting with at least one EA means we are concatting a single column # the non-EA values are 2D arrays with shape (1, n) - # error: Invalid index type "Tuple[int, slice]" for - # "Union[ExtensionArray, ndarray]"; expected type "Union[int, slice, ndarray]" + # error: No overload variant of "__getitem__" of "ExtensionArray" matches + # argument type "Tuple[int, slice]" to_concat = [ - t if is_1d_only_ea_obj(t) else t[0, :] # type: ignore[index] + t if is_1d_only_ea_obj(t) else t[0, :] # type: ignore[call-overload] for t in to_concat ] concat_values = concat_compat(to_concat, axis=0, ea_compat_axis=True) diff --git a/pandas/core/internals/ops.py b/pandas/core/internals/ops.py index 5f03d6709dfa4..35caeea9b9067 100644 --- a/pandas/core/internals/ops.py +++ b/pandas/core/internals/ops.py @@ -106,28 +106,28 @@ def _get_same_shape_values( # TODO(EA2D): with 2D EAs only this first clause would be needed if not (left_ea or right_ea): - # error: Invalid index type "Tuple[Any, slice]" for "Union[ndarray, - # ExtensionArray]"; expected type "Union[int, slice, ndarray]" - lvals = lvals[rblk.mgr_locs.indexer, :] # type: ignore[index] + # error: No overload variant of "__getitem__" of "ExtensionArray" matches + # argument type "Tuple[Union[ndarray, slice], slice]" + lvals = lvals[rblk.mgr_locs.indexer, :] # type: ignore[call-overload] assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape) elif left_ea and right_ea: assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape) elif right_ea: # lvals are 2D, rvals are 1D - # error: Invalid index type "Tuple[Any, slice]" for "Union[ndarray, - # ExtensionArray]"; expected type "Union[int, slice, ndarray]" - lvals = lvals[rblk.mgr_locs.indexer, :] # type: ignore[index] + # error: No overload variant of "__getitem__" of "ExtensionArray" matches + # argument type "Tuple[Union[ndarray, slice], slice]" + lvals = lvals[rblk.mgr_locs.indexer, :] # type: ignore[call-overload] assert lvals.shape[0] == 1, lvals.shape - # error: Invalid index type "Tuple[int, slice]" for "Union[Any, - # ExtensionArray]"; expected type "Union[int, slice, ndarray]" - lvals = lvals[0, :] # type: ignore[index] + # error: No overload variant of "__getitem__" of "ExtensionArray" matches + # argument type "Tuple[int, slice]" + lvals = lvals[0, :] # type: ignore[call-overload] else: # lvals are 1D, rvals are 2D assert rvals.shape[0] == 1, rvals.shape - # error: Invalid index type "Tuple[int, slice]" for "Union[ndarray, - # ExtensionArray]"; expected type "Union[int, slice, ndarray]" - rvals = rvals[0, :] # type: ignore[index] + # error: No overload variant of "__getitem__" of "ExtensionArray" matches + # argument type "Tuple[int, slice]" + rvals = rvals[0, :] # type: ignore[call-overload] return lvals, rvals From 01c0cf521a3d4da378415b0a42507ad9c958bcf8 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Mon, 31 May 2021 11:47:23 -0400 Subject: [PATCH 05/12] casting in datetimelike, allow NA in string arrow --- pandas/_typing.py | 1 + pandas/core/arrays/datetimelike.py | 10 +++++++--- pandas/core/arrays/string_arrow.py | 7 +++++-- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index 2698ed696191b..904c503be9907 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -211,6 +211,7 @@ # TODO: add Ellipsis, see # https://github.com/python/typing/issues/684#issuecomment-548203158 # https://bugs.python.org/issue41810 +# Using List[int] here rather than Sequence[int] to disallow tuples. PositionalIndexer = Union[int, np.integer, slice, List[int], np.ndarray] PositionalIndexerTuple = Tuple[PositionalIndexer, PositionalIndexer] PositionalIndexer2D = Union[PositionalIndexer, PositionalIndexerTuple] diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 6b371a3531ad1..e14e368ee7e1e 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -327,11 +327,15 @@ def __getitem__( This getitem defers to the underlying array, which by-definition can only handle list-likes, slices, and integer scalars """ - # Use cast as we know we will get back a DatetimeLikeArray - result = cast(DatetimeLikeArrayT, super().__getitem__(key)) + # Use cast as we know we will get back a DatetimeLikeArray or DTScalar + result = cast( + Union[DatetimeLikeArrayT, DTScalarOrNaT], super().__getitem__(key) + ) if lib.is_scalar(result): return result - + else: + # At this point we know the result is an array. + result = cast(DatetimeLikeArrayT, result) result._freq = self._get_getitem_freq(key) return result diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index a014dff23e462..076cfd08e1d04 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -6,6 +6,7 @@ TYPE_CHECKING, Any, Sequence, + Union, cast, overload, ) @@ -80,6 +81,8 @@ if TYPE_CHECKING: from pandas import Series +ArrowStringScalarOrNAT = Union[str, libmissing.NA] + @register_extension_dtype class ArrowStringDtype(StringDtype): @@ -344,7 +347,7 @@ def _concat_same_type(cls, to_concat) -> ArrowStringArray: ) @overload - def __getitem__(self, item: int | np.integer) -> str: + def __getitem__(self, item: int | np.integer) -> ArrowStringScalarOrNAT: ... @overload @@ -355,7 +358,7 @@ def __getitem__( def __getitem__( self: ArrowStringArray, item: PositionalIndexer - ) -> ArrowStringArray | str: + ) -> ArrowStringArray | ArrowStringScalarOrNAT: """Select a subset of self. Parameters From 3b38de218d499f6b64579bbabe018144554ad93d Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Mon, 31 May 2021 13:32:12 -0400 Subject: [PATCH 06/12] fix string arrow NA type --- pandas/core/arrays/string_arrow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 076cfd08e1d04..7fcf063ca0453 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -81,7 +81,7 @@ if TYPE_CHECKING: from pandas import Series -ArrowStringScalarOrNAT = Union[str, libmissing.NA] +ArrowStringScalarOrNAT = Union[str, libmissing.NAType] @register_extension_dtype From d7c545daf0637743a76660be012f8c8f770adb48 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sun, 13 Jun 2021 14:50:39 -0400 Subject: [PATCH 07/12] change an overload in mixins to use NDArrayBackedExtensionArrayT --- pandas/core/arrays/_mixins.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 5605aacd41424..61c5dd1a08371 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -193,9 +193,9 @@ def __getitem__(self, key: int | np.integer) -> Any: @overload def __getitem__( - self: NDArrayBackedExtensionArray, + self: NDArrayBackedExtensionArrayT, key: slice | np.ndarray | list[int] | PositionalIndexerTuple, - ) -> NDArrayBackedExtensionArray: + ) -> NDArrayBackedExtensionArrayT: ... def __getitem__( From 076e4342eea96545632d4f4ec439382baaf3575c Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Mon, 14 Jun 2021 08:03:20 -0400 Subject: [PATCH 08/12] categorical returns Any, interval for NA, put back libmissing in string_arrow --- pandas/core/arrays/categorical.py | 7 +++---- pandas/core/arrays/interval.py | 6 ++++-- pandas/core/arrays/string_arrow.py | 5 ++++- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index f8d6ad082afce..6b5416f1893ea 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -6,6 +6,7 @@ from shutil import get_terminal_size from typing import ( TYPE_CHECKING, + Any, Hashable, Sequence, TypeVar, @@ -2005,7 +2006,7 @@ def __repr__(self) -> str: # ------------------------------------------------------------------ @overload - def __getitem__(self, key: int | np.integer) -> object: + def __getitem__(self, key: int | np.integer) -> Any: ... @overload @@ -2015,9 +2016,7 @@ def __getitem__( ) -> CategoricalT: ... - def __getitem__( - self: CategoricalT, key: PositionalIndexer2D - ) -> CategoricalT | object: + def __getitem__(self: CategoricalT, key: PositionalIndexer2D) -> CategoricalT | Any: """ Return an item. """ diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 1a3e601d17496..273dc1520682e 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -9,6 +9,7 @@ from typing import ( Sequence, TypeVar, + Union, cast, overload, ) @@ -87,6 +88,7 @@ ) IntervalArrayT = TypeVar("IntervalArrayT", bound="IntervalArray") +IntervalOrNA = Union[Interval, float] _interval_shared_docs: dict[str, str] = {} @@ -631,7 +633,7 @@ def __len__(self) -> int: return len(self._left) @overload - def __getitem__(self, key: int | np.integer) -> Interval: + def __getitem__(self, key: int | np.integer) -> IntervalOrNA: ... @overload @@ -642,7 +644,7 @@ def __getitem__( def __getitem__( self: IntervalArrayT, key: PositionalIndexer - ) -> IntervalArrayT | Interval: + ) -> IntervalArrayT | IntervalOrNA: key = check_array_indexer(self, key) left = self._left[key] right = self._right[key] diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 3c8d85fd0a18b..e3338abd12ade 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -13,7 +13,10 @@ import numpy as np -from pandas._libs import lib +from pandas._libs import ( + lib, + missing as libmissing, +) from pandas._typing import ( Dtype, NpDtype, From 1dbb668a2a7fda370d69b0bd890ee2aeab352f21 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Thu, 8 Jul 2021 12:22:48 -0400 Subject: [PATCH 09/12] change ignore messages --- pandas/core/arrays/interval.py | 9 +++++---- pandas/core/indexes/base.py | 8 +++----- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 80eafa43a295a..ca0d2395ef9f1 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -1643,10 +1643,11 @@ def _from_combined(self, combined: np.ndarray) -> IntervalArray: return self._shallow_copy(left=new_left, right=new_right) def unique(self) -> IntervalArray: - # Invalid index type "Tuple[slice, int]" for "Union[ExtensionArray, - # ndarray[Any, Any]]"; expected type "Union[int, integer[Any], slice, - # Sequence[int], ndarray[Any, Any]]" - nc = unique(self._combined.view("complex128")[:, 0]) # type: ignore[index] + # No overload variant of "__getitem__" of "ExtensionArray" matches argument + # type "Tuple[slice, int]" + nc = unique( + self._combined.view("complex128")[:, 0] # type: ignore[call-overload] + ) nc = nc[:, None] return self._from_combined(nc) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 42a94b47017eb..581c081e43230 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5024,13 +5024,11 @@ def asof_locs(self, where: Index, mask: np.ndarray) -> np.ndarray: which correspond to the return values of the `asof` function for every element in `where`. """ - # error: Argument 1 to "searchsorted" of "ndarray" has incompatible type - # "Union[ExtensionArray, ndarray]"; expected "Union[Union[int, float, complex, - # str, bytes, generic], Sequence[Union[int, float, complex, str, bytes, - # generic]], Sequence[Sequence[Any]], _SupportsArray]" + # error: No overload variant of "searchsorted" of "ndarray" matches argument + # types "Union[ExtensionArray, ndarray[Any, Any]]", "str" # TODO: will be fixed when ExtensionArray.searchsorted() is fixed locs = self._values[mask].searchsorted( - where._values, side="right" # type: ignore[arg-type] + where._values, side="right" # type: ignore[call-overload] ) locs = np.where(locs > 0, locs - 1, 0) From adf3a7373ccf0ee3ac9b6fe5cd9ac137803e0020 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Mon, 26 Jul 2021 07:54:00 -0400 Subject: [PATCH 10/12] resolve conflicts in core/internals/blocks.py --- pandas/core/internals/blocks.py | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index dca7dd96a469d..4d7b96100d9d8 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -312,17 +312,6 @@ def getitem_block(self, slicer) -> Block: return type(self)(new_values, new_mgr_locs, self.ndim) - def getitem_block_index(self, slicer: slice) -> Block: - """ - Perform __getitem__-like specialized to slicing along index. - - Assumes self.ndim == 2 - """ - # error: No overload variant of "__getitem__" of "ExtensionArray" matches - # argument type "Tuple[int, slice]" - new_values = self.values[..., slicer] # type: ignore[call-overload] - return type(self)(new_values, self._mgr_locs, ndim=self.ndim) - @final def getitem_block_columns(self, slicer, new_mgr_locs: BlockPlacement) -> Block: """ @@ -1564,9 +1553,9 @@ def getitem_block_index(self, slicer: slice) -> ExtensionBlock: Assumes self.ndim == 2 """ - # error: Invalid index type "Tuple[ellipsis, slice]" for - # "Union[ndarray, ExtensionArray]"; expected type "Union[int, slice, ndarray]" - new_values = self.values[..., slicer] # type: ignore[index] + # error: No overload variant of "__getitem__" of "ExtensionArray" matches + # argument type "Tuple[ellipsis, slice]" + new_values = self.values[..., slicer] # type: ignore[call-overload] return type(self)(new_values, self._mgr_locs, ndim=self.ndim) def fillna( From 9a8550dbb4b4acd6a0136cc2796c1bef950bab32 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Mon, 6 Sep 2021 13:05:28 -0400 Subject: [PATCH 11/12] create types for split of getitem arguments --- pandas/_typing.py | 4 +++- pandas/core/arrays/_mixins.py | 6 ++++-- pandas/core/arrays/base.py | 8 ++++---- pandas/core/arrays/categorical.py | 6 ++++-- pandas/core/arrays/datetimelike.py | 6 ++++-- pandas/core/arrays/interval.py | 8 ++++---- pandas/core/arrays/masked.py | 8 ++++---- pandas/core/arrays/sparse/array.py | 6 ++++-- pandas/core/arrays/string_arrow.py | 8 ++++---- pandas/core/groupby/groupby.py | 15 ++++++--------- 10 files changed, 41 insertions(+), 34 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index cea420a548b89..6e3706263e634 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -205,6 +205,8 @@ # https://github.com/python/typing/issues/684#issuecomment-548203158 # https://bugs.python.org/issue41810 # Using List[int] here rather than Sequence[int] to disallow tuples. -PositionalIndexer = Union[int, np.integer, slice, List[int], np.ndarray] +ScalarIndexer = Union[int, np.integer] +SequenceIndexer = Union[slice, List[int], np.ndarray] +PositionalIndexer = Union[ScalarIndexer, SequenceIndexer] PositionalIndexerTuple = Tuple[PositionalIndexer, PositionalIndexer] PositionalIndexer2D = Union[PositionalIndexer, PositionalIndexerTuple] diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 61c5dd1a08371..32533a04e2b5f 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -17,6 +17,8 @@ F, PositionalIndexer2D, PositionalIndexerTuple, + ScalarIndexer, + SequenceIndexer, Shape, type_t, ) @@ -188,13 +190,13 @@ def _validate_setitem_value(self, value): return value @overload - def __getitem__(self, key: int | np.integer) -> Any: + def __getitem__(self, key: ScalarIndexer) -> Any: ... @overload def __getitem__( self: NDArrayBackedExtensionArrayT, - key: slice | np.ndarray | list[int] | PositionalIndexerTuple, + key: SequenceIndexer | PositionalIndexerTuple, ) -> NDArrayBackedExtensionArrayT: ... diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 9a03f79d8e802..dff0a1a38adf5 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -29,6 +29,8 @@ Dtype, FillnaOptions, PositionalIndexer, + ScalarIndexer, + SequenceIndexer, Shape, ) from pandas.compat import set_function_name @@ -292,13 +294,11 @@ def _from_factorized(cls, values, original): # Must be a Sequence # ------------------------------------------------------------------------ @overload - def __getitem__(self, item: int | np.integer) -> Any: + def __getitem__(self, item: ScalarIndexer) -> Any: ... @overload - def __getitem__( - self: ExtensionArrayT, item: slice | np.ndarray | list[int] - ) -> ExtensionArrayT: + def __getitem__(self: ExtensionArrayT, item: SequenceIndexer) -> ExtensionArrayT: ... def __getitem__( diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 25ce3437ae550..a1e322a9a78bb 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -40,6 +40,8 @@ PositionalIndexer2D, PositionalIndexerTuple, Scalar, + ScalarIndexer, + SequenceIndexer, Shape, npt, type_t, @@ -2008,13 +2010,13 @@ def __repr__(self) -> str: # ------------------------------------------------------------------ @overload - def __getitem__(self, key: int | np.integer) -> Any: + def __getitem__(self, key: ScalarIndexer) -> Any: ... @overload def __getitem__( self: CategoricalT, - key: slice | np.ndarray | list[int] | PositionalIndexerTuple, + key: SequenceIndexer | PositionalIndexerTuple, ) -> CategoricalT: ... diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 559fd1813d66d..63ba9fdd59fc6 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -50,6 +50,8 @@ NpDtype, PositionalIndexer2D, PositionalIndexerTuple, + ScalarIndexer, + SequenceIndexer, npt, ) from pandas.compat.numpy import function as nv @@ -315,13 +317,13 @@ def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: return self._ndarray @overload - def __getitem__(self, item: int | np.integer) -> DTScalarOrNaT: + def __getitem__(self, item: ScalarIndexer) -> DTScalarOrNaT: ... @overload def __getitem__( self: DatetimeLikeArrayT, - item: slice | np.ndarray | list[int] | PositionalIndexerTuple, + item: SequenceIndexer | PositionalIndexerTuple, ) -> DatetimeLikeArrayT: ... diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 185bb7788b991..732bdb112b8c3 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -34,6 +34,8 @@ Dtype, NpDtype, PositionalIndexer, + ScalarIndexer, + SequenceIndexer, ) from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender @@ -640,13 +642,11 @@ def __len__(self) -> int: return len(self._left) @overload - def __getitem__(self, key: int | np.integer) -> IntervalOrNA: + def __getitem__(self, key: ScalarIndexer) -> IntervalOrNA: ... @overload - def __getitem__( - self: IntervalArrayT, key: slice | np.ndarray | list[int] - ) -> IntervalArrayT: + def __getitem__(self: IntervalArrayT, key: SequenceIndexer) -> IntervalArrayT: ... def __getitem__( diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 90930526ce4d2..dc2dbb85611ab 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -20,6 +20,8 @@ NpDtype, PositionalIndexer, Scalar, + ScalarIndexer, + SequenceIndexer, type_t, ) from pandas.errors import AbstractMethodError @@ -139,13 +141,11 @@ def dtype(self) -> BaseMaskedDtype: raise AbstractMethodError(self) @overload - def __getitem__(self, item: int | np.integer) -> Any: + def __getitem__(self, item: ScalarIndexer) -> Any: ... @overload - def __getitem__( - self: BaseMaskedArrayT, item: slice | np.ndarray | list[int] - ) -> BaseMaskedArrayT: + def __getitem__(self: BaseMaskedArrayT, item: SequenceIndexer) -> BaseMaskedArrayT: ... def __getitem__( diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index b2e739f83bc05..36688d79ed0dd 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -32,6 +32,8 @@ NpDtype, PositionalIndexer, Scalar, + ScalarIndexer, + SequenceIndexer, ) from pandas.compat.numpy import function as nv from pandas.errors import PerformanceWarning @@ -822,13 +824,13 @@ def value_counts(self, dropna: bool = True): # Indexing # -------- @overload - def __getitem__(self, key: int | np.integer) -> Any: + def __getitem__(self, key: ScalarIndexer) -> Any: ... @overload def __getitem__( self: SparseArrayT, - key: slice | np.ndarray | list[int] | tuple[int | ellipsis, ...], + key: SequenceIndexer | tuple[int | ellipsis, ...], ) -> SparseArrayT: ... diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index e3338abd12ade..4be7f4eb0c521 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -22,6 +22,8 @@ NpDtype, PositionalIndexer, Scalar, + ScalarIndexer, + SequenceIndexer, ) from pandas.compat import ( pa_version_under1p0, @@ -268,13 +270,11 @@ def _concat_same_type(cls, to_concat) -> ArrowStringArray: ) @overload - def __getitem__(self, item: int | np.integer) -> ArrowStringScalarOrNAT: + def __getitem__(self, item: ScalarIndexer) -> ArrowStringScalarOrNAT: ... @overload - def __getitem__( - self: ArrowStringArray, item: slice | np.ndarray | list[int] - ) -> ArrowStringArray: + def __getitem__(self: ArrowStringArray, item: SequenceIndexer) -> ArrowStringArray: ... def __getitem__( diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 0e358e611f418..ec8775cf78571 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2984,10 +2984,9 @@ def blk_func(values: ArrayLike) -> ArrayLike: if real_2d and values.ndim == 1: assert result.shape[1] == 1, result.shape - # error: Invalid index type "Tuple[slice, int]" for - # "Union[ExtensionArray, ndarray[Any, Any]]"; expected type - # "Union[int, integer[Any], slice, Sequence[int], ndarray[Any, Any]]" - result = result[:, 0] # type: ignore[index] + # error: No overload variant of "__getitem__" of "ExtensionArray" + # matches argument type "Tuple[slice, int]" + result = result[:, 0] # type: ignore[call-overload] if needs_mask: mask = mask[:, 0] @@ -3001,11 +3000,9 @@ def blk_func(values: ArrayLike) -> ArrayLike: if needs_2d and not real_2d: if result.ndim == 2: assert result.shape[1] == 1 - # error: Invalid index type "Tuple[slice, int]" for - # "Union[ExtensionArray, Any, ndarray[Any, Any]]"; expected - # type "Union[int, integer[Any], slice, Sequence[int], - # ndarray[Any, Any]]" - result = result[:, 0] # type: ignore[index] + # error: No overload variant of "__getitem__" of "ExtensionArray" + # matches argument type "Tuple[slice, int]" + result = result[:, 0] # type: ignore[call-overload] return result.T From b9417327db6d65649ea03c47daa6d4cfbe43bfc9 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Mon, 6 Sep 2021 16:41:04 -0400 Subject: [PATCH 12/12] comments on various indexers in _typing.py --- pandas/_typing.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/_typing.py b/pandas/_typing.py index 6defb0aabcedc..5077e659410e3 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -206,6 +206,10 @@ # indexing # PositionalIndexer -> valid 1D positional indexer, e.g. can pass # to ndarray.__getitem__ +# ScalarIndexer is for a single value as the index +# SequenceIndexer is for list like or slices (but not tuples) +# PositionalIndexerTuple is extends the PositionalIndexer for 2D arrays +# These are used in various __getitem__ overloads # TODO: add Ellipsis, see # https://github.com/python/typing/issues/684#issuecomment-548203158 # https://bugs.python.org/issue41810