From f2c52a4acbcf6d895c7d54ecfaf2f2b76ade8925 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sat, 23 Jan 2021 11:33:23 -0500 Subject: [PATCH 01/19] small typing fixes --- pandas/_typing.py | 2 +- pandas/core/dtypes/base.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index 9b957ab4d0686..f03b3c9eaf65a 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -91,7 +91,7 @@ Suffixes = Tuple[str, str] Ordered = Optional[bool] JSONSerializable = Optional[Union[PythonScalar, List, Dict]] -Axes = Collection +Axes = Collection[Any] # dtypes NpDtype = Union[str, np.dtype] diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 6adb4984d156e..227c1139c2967 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -139,7 +139,7 @@ def na_value(self) -> object: return np.nan @property - def type(self) -> Type: + def type(self) -> Type[Any]: """ The scalar type for the array, e.g. ``int`` From d7ff8d34ccee8bf85f54c7a9d8e08798cd59d76b Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sat, 23 Jan 2021 18:56:38 -0500 Subject: [PATCH 02/19] fix ExtensionArray and EXtensionDtype --- pandas/_typing.py | 7 ++++ pandas/core/arrays/base.py | 56 +++++++++++++++++++++--------- pandas/core/arrays/sparse/array.py | 4 +-- pandas/core/dtypes/base.py | 12 +++---- 4 files changed, 54 insertions(+), 25 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index f03b3c9eaf65a..40f0a371e7dea 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -94,6 +94,13 @@ Axes = Collection[Any] # dtypes +if TYPE_CHECKING: + + class _dummy: + ... + + np.dtype = _dummy + NpDtype = Union[str, np.dtype] Dtype = Union[ "ExtensionDtype", NpDtype, Type[Union[str, float, int, complex, bool, object]] diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 9a8b37e0785e0..175968a37ca33 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -10,9 +10,12 @@ import operator from typing import ( + TYPE_CHECKING, Any, Callable, Dict, + Iterator, + Literal, Optional, Sequence, Tuple, @@ -49,6 +52,13 @@ from pandas.core.missing import get_fill_func from pandas.core.sorting import nargminmax, nargsort +if TYPE_CHECKING: + + class _dummy: + ... + + np.ndarray = _dummy + _extension_array_shared_docs: Dict[str, str] = {} ExtensionArrayT = TypeVar("ExtensionArrayT", bound="ExtensionArray") @@ -347,7 +357,7 @@ def __len__(self) -> int: """ raise AbstractMethodError(self) - def __iter__(self): + def __iter__(self) -> Iterator[Any]: """ Iterate over elements of the array. """ @@ -357,7 +367,7 @@ def __iter__(self): for i in range(len(self)): yield self[i] - def __contains__(self, item) -> bool: + def __contains__(self, item: Any) -> bool: """ Return for `item in self`. """ @@ -396,7 +406,7 @@ def to_numpy( self, dtype: Optional[Dtype] = None, copy: bool = False, - na_value=lib.no_default, + na_value: Optional[Any] = lib.no_default, ) -> np.ndarray: """ Convert to a NumPy ndarray. @@ -475,7 +485,7 @@ def nbytes(self) -> int: # Additional Methods # ------------------------------------------------------------------------ - def astype(self, dtype, copy=True): + def astype(self, dtype: Dtype, copy: bool = True) -> np.ndarray: """ Cast to a NumPy array with 'dtype'. @@ -555,8 +565,8 @@ def argsort( ascending: bool = True, kind: str = "quicksort", na_position: str = "last", - *args, - **kwargs, + *args: Any, + **kwargs: Any, ) -> np.ndarray: """ Return the indices that would sort this array. @@ -596,7 +606,7 @@ def argsort( mask=np.asarray(self.isna()), ) - def argmin(self): + def argmin(self) -> int: """ Return the index of minimum value. @@ -613,7 +623,7 @@ def argmin(self): """ return nargminmax(self, "argmin") - def argmax(self): + def argmax(self) -> int: """ Return the index of maximum value. @@ -630,7 +640,12 @@ def argmax(self): """ return nargminmax(self, "argmax") - def fillna(self, value=None, method=None, limit=None): + def fillna( + self, + value: Optional[Union[Any, ArrayLike]] = None, + method: Optional[Literal["backfill", "bfill", "ffill", "pad"]] = None, + limit: Optional[int] = None, + ) -> ExtensionArray: """ Fill NA/NaN values using the specified method. @@ -682,7 +697,7 @@ def fillna(self, value=None, method=None, limit=None): new_values = self.copy() return new_values - def dropna(self): + def dropna(self) -> ExtensionArray: """ Return ExtensionArray without NA values. @@ -746,7 +761,7 @@ def shift(self, periods: int = 1, fill_value: object = None) -> ExtensionArray: b = empty return self._concat_same_type([a, b]) - def unique(self): + def unique(self) -> "ExtensionArray": """ Compute the ExtensionArray of unique values. @@ -757,7 +772,12 @@ def unique(self): uniques = unique(self.astype(object)) return self._from_sequence(uniques, dtype=self.dtype) - def searchsorted(self, value, side="left", sorter=None): + def searchsorted( + self, + value: ArrayLike, + side: Optional[Literal["left", "right"]] = "left", + sorter: Optional[ArrayLike] = None, + ) -> np.ndarray: """ Find indices where elements should be inserted to maintain order. @@ -956,7 +976,9 @@ def factorize(self, na_sentinel: int = -1) -> Tuple[np.ndarray, ExtensionArray]: @Substitution(klass="ExtensionArray") @Appender(_extension_array_shared_docs["repeat"]) - def repeat(self, repeats, axis=None): + def repeat( + self, repeats: Union[int, Sequence[int]], axis: Literal[None] = None + ) -> ExtensionArray: nv.validate_repeat((), {"axis": axis}) ind = np.arange(len(self)).repeat(repeats) return self.take(ind) @@ -1140,7 +1162,7 @@ def _formatter(self, boxed: bool = False) -> Callable[[Any], Optional[str]]: # Reshaping # ------------------------------------------------------------------------ - def transpose(self, *axes) -> ExtensionArray: + def transpose(self, *axes: int) -> ExtensionArray: """ Return a transposed view on this array. @@ -1153,7 +1175,9 @@ def transpose(self, *axes) -> ExtensionArray: def T(self) -> ExtensionArray: return self.transpose() - def ravel(self, order="C") -> ExtensionArray: + def ravel( + self, order: Optional[Literal["C", "F", "A", "K"]] = "C" + ) -> ExtensionArray: """ Return a flattened view on this array. @@ -1227,7 +1251,7 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs): """ raise TypeError(f"cannot perform {name} with type {self.dtype}") - def __hash__(self): + def __hash__(self) -> int: raise TypeError(f"unhashable type: {repr(type(self).__name__)}") diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 4cae2e48c84c8..6818364eef040 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -6,7 +6,7 @@ from collections import abc import numbers import operator -from typing import Any, Callable, Optional, Sequence, Type, TypeVar, Union +from typing import Any, Callable, Optional, Sequence, Type, TypeVar, Union, cast import warnings import numpy as np @@ -1172,7 +1172,7 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs): if skipna: arr = self else: - arr = self.dropna() + arr = cast(SparseArray, self.dropna()) # we don't support these kwargs. # They should only be present when called via pandas, so do it here. diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 227c1139c2967..c72d79f1f6167 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -2,7 +2,7 @@ Extend pandas with custom array types. """ -from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Type, Union +from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Type, Union, cast import numpy as np @@ -197,7 +197,7 @@ def construct_array_type(cls) -> Type["ExtensionArray"]: raise NotImplementedError @classmethod - def construct_from_string(cls, string: str): + def construct_from_string(cls, string: str) -> "ExtensionDtype": r""" Construct this type from a string. @@ -408,9 +408,7 @@ def register(self, dtype: Type[ExtensionDtype]) -> None: self.dtypes.append(dtype) - def find( - self, dtype: Union[Type[ExtensionDtype], str] - ) -> Optional[Type[ExtensionDtype]]: + def find(self, dtype: Union[Type[ExtensionDtype], str]) -> Optional[ExtensionDtype]: """ Parameters ---------- @@ -425,7 +423,7 @@ def find( if not isinstance(dtype, type): dtype_type = type(dtype) if issubclass(dtype_type, ExtensionDtype): - return dtype + return cast(ExtensionDtype, dtype) return None @@ -438,4 +436,4 @@ def find( return None -registry = Registry() +registry: Registry = Registry() From 03b2c4a09acb3484cfc55b5253f255d4ed26e0c1 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sun, 31 Jan 2021 12:23:25 -0500 Subject: [PATCH 03/19] fixes for delete, isin, unique --- pandas/core/arrays/base.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 0ae265e037904..87c3d48d9c9b7 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -776,7 +776,7 @@ def shift(self, periods: int = 1, fill_value: object = None) -> ExtensionArray: b = empty return self._concat_same_type([a, b]) - def unique(self) -> "ExtensionArray": + def unique(self) -> ExtensionArray: """ Compute the ExtensionArray of unique values. @@ -873,7 +873,7 @@ def equals(self, other: object) -> bool: equal_na = self.isna() & other.isna() return bool((equal_values | equal_na).all()) - def isin(self, values) -> np.ndarray: + def isin(self, values: Union[ExtensionArray, Sequence[Any]]) -> np.ndarray: """ Pointwise comparison for set containment in the given values. @@ -1288,7 +1288,9 @@ def __hash__(self) -> int: # ------------------------------------------------------------------------ # Non-Optimized Default Methods - def delete(self: ExtensionArrayT, loc) -> ExtensionArrayT: + def delete( + self: ExtensionArrayT, loc: Union[int, Sequence[int]] + ) -> ExtensionArrayT: indexer = np.delete(np.arange(len(self)), loc) return self.take(indexer) From 3e19958fb1dcfaf147c9a2088a158891e68e1196 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sun, 31 Jan 2021 12:29:57 -0500 Subject: [PATCH 04/19] fix import of Literal --- pandas/core/arrays/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 87c3d48d9c9b7..3438be96568eb 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -15,7 +15,6 @@ Callable, Dict, Iterator, - Literal, Optional, Sequence, Tuple, @@ -54,6 +53,8 @@ if TYPE_CHECKING: + from typing import Literal + class _dummy: ... From 6861901c6d7dfe33a067c1b238d87dfb68ed6743 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sun, 31 Jan 2021 13:26:01 -0500 Subject: [PATCH 05/19] remove quotes on ExtensionDType.construct_from_string --- pandas/core/dtypes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 3b7b012faba9d..fd64bd821e4e6 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -199,7 +199,7 @@ def construct_array_type(cls) -> Type[ExtensionArray]: raise NotImplementedError @classmethod - def construct_from_string(cls, string: str) -> "ExtensionDtype": + def construct_from_string(cls, string: str) -> ExtensionDtype: r""" Construct this type from a string. From 9be6486b2b6088d108b038443db45d6c13498273 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Mon, 1 Feb 2021 13:28:50 -0500 Subject: [PATCH 06/19] move numpy workaround to _typing.py --- pandas/_typing.py | 1 + pandas/core/arrays/base.py | 9 --------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index 40f0a371e7dea..852a640d17716 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -100,6 +100,7 @@ class _dummy: ... np.dtype = _dummy + np.ndarray = _dummy NpDtype = Union[str, np.dtype] Dtype = Union[ diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 3438be96568eb..dc3bf609dc48c 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -51,15 +51,6 @@ from pandas.core.missing import get_fill_func from pandas.core.sorting import nargminmax, nargsort -if TYPE_CHECKING: - - from typing import Literal - - class _dummy: - ... - - np.ndarray = _dummy - _extension_array_shared_docs: Dict[str, str] = {} ExtensionArrayT = TypeVar("ExtensionArrayT", bound="ExtensionArray") From 260b367291f4ae254a927aa7073092f2401212aa Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Tue, 2 Feb 2021 11:26:34 -0500 Subject: [PATCH 07/19] remove numpy dummy --- pandas/_typing.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index 852a640d17716..7922ec2681261 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -94,13 +94,6 @@ Axes = Collection[Any] # dtypes -if TYPE_CHECKING: - - class _dummy: - ... - - np.dtype = _dummy - np.ndarray = _dummy NpDtype = Union[str, np.dtype] Dtype = Union[ From 6276725a5a78489c404f1f1a0e3b170f47346f76 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Tue, 2 Feb 2021 11:27:18 -0500 Subject: [PATCH 08/19] remove extra line in _typing --- pandas/_typing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index 7922ec2681261..f03b3c9eaf65a 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -94,7 +94,6 @@ Axes = Collection[Any] # dtypes - NpDtype = Union[str, np.dtype] Dtype = Union[ "ExtensionDtype", NpDtype, Type[Union[str, float, int, complex, bool, object]] From 8b2cee2b3e18117cc8a37f4bdbdc80226e67c2f6 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Wed, 3 Feb 2021 09:15:46 -0500 Subject: [PATCH 09/19] import Literal --- pandas/core/arrays/base.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index dc3bf609dc48c..62cef2cec2ee1 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -51,6 +51,9 @@ from pandas.core.missing import get_fill_func from pandas.core.sorting import nargminmax, nargsort +if TYPE_CHECKING: + from typing import Literal + _extension_array_shared_docs: Dict[str, str] = {} ExtensionArrayT = TypeVar("ExtensionArrayT", bound="ExtensionArray") From 8cd6b76f7312733728d54a1e74f821f0bb233398 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Mon, 8 Mar 2021 10:58:08 -0500 Subject: [PATCH 10/19] isort precommit fix --- pandas/core/arrays/sparse/array.py | 11 ++++++++++- pandas/core/dtypes/base.py | 11 ++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 824b2493a5de0..4b93e92971daf 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -6,7 +6,16 @@ from collections import abc import numbers import operator -from typing import Any, Callable, Optional, Sequence, Type, TypeVar, Union, cast +from typing import ( + Any, + Callable, + Optional, + Sequence, + Type, + TypeVar, + Union, + cast, +) import warnings import numpy as np diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index af5621bf2020d..11469fac2be32 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -4,7 +4,16 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Type, Union, cast +from typing import ( + TYPE_CHECKING, + Any, + List, + Optional, + Tuple, + Type, + Union, + cast, +) import numpy as np From e0e0131c80e013239bff4ec6f058191aadf752d7 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Mon, 8 Mar 2021 12:29:43 -0500 Subject: [PATCH 11/19] fix interval.repeat() typing --- pandas/core/arrays/interval.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index f192a34514390..44fc9529c6441 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -11,6 +11,7 @@ Sequence, Type, TypeVar, + Union, cast, ) @@ -1493,7 +1494,9 @@ def delete(self: IntervalArrayT, loc) -> IntervalArrayT: return self._shallow_copy(left=new_left, right=new_right) @Appender(_extension_array_shared_docs["repeat"] % _shared_docs_kwargs) - def repeat(self: IntervalArrayT, repeats: int, axis=None) -> IntervalArrayT: + def repeat( + self: IntervalArrayT, repeats: Union[int, Sequence[int]], axis=None + ) -> IntervalArrayT: nv.validate_repeat((), {"axis": axis}) left_repeat = self.left.repeat(repeats) right_repeat = self.right.repeat(repeats) From 6a6a21f0a88b570e23249abcf51b05e7bc6c96b3 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Tue, 9 Mar 2021 18:01:43 -0500 Subject: [PATCH 12/19] overload for __getitem__ and use pattern with ExtensionArrayT as self and return type --- pandas/core/arrays/_mixins.py | 17 +++++++++++ pandas/core/arrays/base.py | 45 ++++++++++++++++++++---------- pandas/core/arrays/sparse/array.py | 3 +- 3 files changed, 49 insertions(+), 16 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 4615cb4ec7abd..6ab951dc4e7a4 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -8,6 +8,7 @@ Type, TypeVar, Union, + overload, ) import numpy as np @@ -248,6 +249,22 @@ def __setitem__(self, key, value): def _validate_setitem_value(self, value): return value + @overload + def __getitem__(self: NDArrayBackedExtensionArrayT, key: int) -> Any: + ... + + @overload + def __getitem__( + self: NDArrayBackedExtensionArrayT, key: slice + ) -> NDArrayBackedExtensionArrayT: + ... + + @overload + def __getitem__( + self: NDArrayBackedExtensionArrayT, key: np.ndarray + ) -> NDArrayBackedExtensionArrayT: + ... + def __getitem__( self: NDArrayBackedExtensionArrayT, key: Union[int, slice, np.ndarray] ) -> Union[NDArrayBackedExtensionArrayT, Any]: diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index ce517f549f507..541331fa9f7ce 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -22,6 +22,7 @@ TypeVar, Union, cast, + overload, ) import numpy as np @@ -287,8 +288,20 @@ def _from_factorized(cls, values, original): # Must be a Sequence # ------------------------------------------------------------------------ + @overload + def __getitem__(self: ExtensionArrayT, item: int) -> Any: + ... + + @overload + def __getitem__(self: ExtensionArrayT, item: slice) -> ExtensionArrayT: + ... + + @overload + def __getitem__(self: ExtensionArrayT, item: np.ndarray) -> ExtensionArrayT: + ... + def __getitem__( - self, item: Union[int, slice, np.ndarray] + self: ExtensionArrayT, item: Union[int, slice, np.ndarray] ) -> Union[ExtensionArray, Any]: """ Select a subset of self. @@ -673,11 +686,11 @@ def argmax(self, skipna: bool = True) -> int: return nargminmax(self, "argmax") def fillna( - self, + self: ExtensionArrayT, value: Optional[Union[Any, ArrayLike]] = None, method: Optional[Literal["backfill", "bfill", "ffill", "pad"]] = None, limit: Optional[int] = None, - ) -> ExtensionArray: + ) -> ExtensionArrayT: """ Fill NA/NaN values using the specified method. @@ -722,7 +735,7 @@ def fillna( new_values = self.copy() return new_values - def dropna(self) -> ExtensionArray: + def dropna(self) -> ExtensionArrayT: """ Return ExtensionArray without NA values. @@ -732,7 +745,9 @@ def dropna(self) -> ExtensionArray: """ return self[~self.isna()] - def shift(self, periods: int = 1, fill_value: object = None) -> ExtensionArray: + def shift( + self: ExtensionArrayT, periods: int = 1, fill_value: object = None + ) -> ExtensionArrayT: """ Shift values by desired number. @@ -780,13 +795,13 @@ def shift(self, periods: int = 1, fill_value: object = None) -> ExtensionArray: ) if periods > 0: a = empty - b = self[:-periods] + b: ExtensionArrayT = self[:-periods] else: a = self[abs(periods) :] b = empty return self._concat_same_type([a, b]) - def unique(self) -> ExtensionArray: + def unique(self: ExtensionArrayT) -> ExtensionArrayT: """ Compute the ExtensionArray of unique values. @@ -1018,8 +1033,10 @@ def factorize(self, na_sentinel: int = -1) -> Tuple[np.ndarray, ExtensionArray]: @Substitution(klass="ExtensionArray") @Appender(_extension_array_shared_docs["repeat"]) def repeat( - self, repeats: Union[int, Sequence[int]], axis: Literal[None] = None - ) -> ExtensionArray: + self: ExtensionArrayT, + repeats: Union[int, Sequence[int]], + axis: Literal[None] = None, + ) -> ExtensionArrayT: nv.validate_repeat((), {"axis": axis}) ind = np.arange(len(self)).repeat(repeats) return self.take(ind) @@ -1203,7 +1220,7 @@ def _formatter(self, boxed: bool = False) -> Callable[[Any], Optional[str]]: # Reshaping # ------------------------------------------------------------------------ - def transpose(self, *axes: int) -> ExtensionArray: + def transpose(self: ExtensionArrayT, *axes: int) -> ExtensionArrayT: """ Return a transposed view on this array. @@ -1213,12 +1230,12 @@ def transpose(self, *axes: int) -> ExtensionArray: return self[:] @property - def T(self) -> ExtensionArray: + def T(self: ExtensionArrayT) -> ExtensionArrayT: return self.transpose() def ravel( - self, order: Optional[Literal["C", "F", "A", "K"]] = "C" - ) -> ExtensionArray: + self: ExtensionArrayT, order: Optional[Literal["C", "F", "A", "K"]] = "C" + ) -> ExtensionArrayT: """ Return a flattened view on this array. @@ -1240,7 +1257,7 @@ def ravel( @classmethod def _concat_same_type( cls: Type[ExtensionArrayT], to_concat: Sequence[ExtensionArrayT] - ) -> ExtensionArrayT: + ): """ Concatenate multiple array of this dtype. diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 4b93e92971daf..a209037f9a9a6 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -14,7 +14,6 @@ Type, TypeVar, Union, - cast, ) import warnings @@ -1204,7 +1203,7 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs): if skipna: arr = self else: - arr = cast(SparseArray, self.dropna()) + arr = self.dropna() # we don't support these kwargs. # They should only be present when called via pandas, so do it here. From bf753e645be8275461192b2175edf990358bd41d Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Wed, 10 Mar 2021 13:54:56 -0500 Subject: [PATCH 13/19] lose less ExtensionArrayT. Make registry private. consolidate overload --- pandas/core/arrays/_mixins.py | 8 +--- pandas/core/arrays/base.py | 48 +++++++++------------ pandas/core/construction.py | 2 +- pandas/core/dtypes/base.py | 4 +- pandas/core/dtypes/common.py | 2 +- pandas/tests/arrays/test_array.py | 2 +- pandas/tests/arrays/test_period.py | 2 +- pandas/tests/dtypes/test_dtypes.py | 2 +- pandas/tests/frame/indexing/test_setitem.py | 2 +- 9 files changed, 30 insertions(+), 42 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 6ab951dc4e7a4..6512c18c2eaf9 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -255,13 +255,7 @@ def __getitem__(self: NDArrayBackedExtensionArrayT, key: int) -> Any: @overload def __getitem__( - self: NDArrayBackedExtensionArrayT, key: slice - ) -> NDArrayBackedExtensionArrayT: - ... - - @overload - def __getitem__( - self: NDArrayBackedExtensionArrayT, key: np.ndarray + self: NDArrayBackedExtensionArrayT, key: Union[slice, np.ndarray] ) -> NDArrayBackedExtensionArrayT: ... diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 541331fa9f7ce..890ed938efce6 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -289,19 +289,15 @@ def _from_factorized(cls, values, original): # ------------------------------------------------------------------------ @overload - def __getitem__(self: ExtensionArrayT, item: int) -> Any: + def __getitem__(self, item: int) -> Any: ... @overload - def __getitem__(self: ExtensionArrayT, item: slice) -> ExtensionArrayT: - ... - - @overload - def __getitem__(self: ExtensionArrayT, item: np.ndarray) -> ExtensionArrayT: + def __getitem__(self, item: Union[slice, np.ndarray]) -> ExtensionArray: ... def __getitem__( - self: ExtensionArrayT, item: Union[int, slice, np.ndarray] + self, item: Union[int, slice, np.ndarray] ) -> Union[ExtensionArray, Any]: """ Select a subset of self. @@ -686,11 +682,11 @@ def argmax(self, skipna: bool = True) -> int: return nargminmax(self, "argmax") def fillna( - self: ExtensionArrayT, + self: ExtensionArray, value: Optional[Union[Any, ArrayLike]] = None, method: Optional[Literal["backfill", "bfill", "ffill", "pad"]] = None, limit: Optional[int] = None, - ) -> ExtensionArrayT: + ) -> ExtensionArray: """ Fill NA/NaN values using the specified method. @@ -746,8 +742,8 @@ def dropna(self) -> ExtensionArrayT: return self[~self.isna()] def shift( - self: ExtensionArrayT, periods: int = 1, fill_value: object = None - ) -> ExtensionArrayT: + self: ExtensionArray, periods: int = 1, fill_value: object = None + ) -> ExtensionArray: """ Shift values by desired number. @@ -795,13 +791,13 @@ def shift( ) if periods > 0: a = empty - b: ExtensionArrayT = self[:-periods] + b = self[:-periods] else: a = self[abs(periods) :] b = empty return self._concat_same_type([a, b]) - def unique(self: ExtensionArrayT) -> ExtensionArrayT: + def unique(self: ExtensionArray) -> ExtensionArray: """ Compute the ExtensionArray of unique values. @@ -815,7 +811,7 @@ def unique(self: ExtensionArrayT) -> ExtensionArrayT: def searchsorted( self, value: ArrayLike, - side: Optional[Literal["left", "right"]] = "left", + side: Literal["left", "right"] = "left", sorter: Optional[ArrayLike] = None, ) -> np.ndarray: """ @@ -1033,10 +1029,10 @@ def factorize(self, na_sentinel: int = -1) -> Tuple[np.ndarray, ExtensionArray]: @Substitution(klass="ExtensionArray") @Appender(_extension_array_shared_docs["repeat"]) def repeat( - self: ExtensionArrayT, + self: ExtensionArray, repeats: Union[int, Sequence[int]], axis: Literal[None] = None, - ) -> ExtensionArrayT: + ) -> ExtensionArray: nv.validate_repeat((), {"axis": axis}) ind = np.arange(len(self)).repeat(repeats) return self.take(ind) @@ -1046,12 +1042,12 @@ def repeat( # ------------------------------------------------------------------------ def take( - self: ExtensionArrayT, + self: ExtensionArray, indices: Sequence[int], *, allow_fill: bool = False, fill_value: Any = None, - ) -> ExtensionArrayT: + ) -> ExtensionArray: """ Take elements from an array. @@ -1140,7 +1136,7 @@ def take(self, indices, allow_fill=False, fill_value=None): # pandas.api.extensions.take raise AbstractMethodError(self) - def copy(self: ExtensionArrayT) -> ExtensionArrayT: + def copy(self: ExtensionArray) -> ExtensionArray: """ Return a copy of the array. @@ -1220,7 +1216,7 @@ def _formatter(self, boxed: bool = False) -> Callable[[Any], Optional[str]]: # Reshaping # ------------------------------------------------------------------------ - def transpose(self: ExtensionArrayT, *axes: int) -> ExtensionArrayT: + def transpose(self: ExtensionArray, *axes: int) -> ExtensionArray: """ Return a transposed view on this array. @@ -1230,12 +1226,12 @@ def transpose(self: ExtensionArrayT, *axes: int) -> ExtensionArrayT: return self[:] @property - def T(self: ExtensionArrayT) -> ExtensionArrayT: + def T(self: ExtensionArray) -> ExtensionArray: return self.transpose() def ravel( - self: ExtensionArrayT, order: Optional[Literal["C", "F", "A", "K"]] = "C" - ) -> ExtensionArrayT: + self: ExtensionArray, order: Optional[Literal["C", "F", "A", "K"]] = "C" + ) -> ExtensionArray: """ Return a flattened view on this array. @@ -1257,7 +1253,7 @@ def ravel( @classmethod def _concat_same_type( cls: Type[ExtensionArrayT], to_concat: Sequence[ExtensionArrayT] - ): + ) -> ExtensionArrayT: """ Concatenate multiple array of this dtype. @@ -1315,9 +1311,7 @@ def __hash__(self) -> int: # ------------------------------------------------------------------------ # Non-Optimized Default Methods - def delete( - self: ExtensionArrayT, loc: Union[int, Sequence[int]] - ) -> ExtensionArrayT: + def delete(self: ExtensionArray, loc: Union[int, Sequence[int]]) -> ExtensionArray: indexer = np.delete(np.arange(len(self)), loc) return self.take(indexer) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 43900709ad11f..3fb351be5f486 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -33,7 +33,7 @@ from pandas.core.dtypes.base import ( ExtensionDtype, - registry, + _registry as registry, ) from pandas.core.dtypes.cast import ( construct_1d_arraylike_from_scalar, diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 11469fac2be32..d7b1b05f5d5a9 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -389,7 +389,7 @@ def register_extension_dtype(cls: Type[ExtensionDtype]) -> Type[ExtensionDtype]: ... class MyExtensionDtype(ExtensionDtype): ... name = "myextension" """ - registry.register(cls) + _registry.register(cls) return cls @@ -451,4 +451,4 @@ def find(self, dtype: Union[Type[ExtensionDtype], str]) -> Optional[ExtensionDty return None -registry: Registry = Registry() +_registry = Registry() diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 0966d0b93cc25..a6b8bc34fd523 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -23,7 +23,7 @@ Optional, ) -from pandas.core.dtypes.base import registry +from pandas.core.dtypes.base import _registry as registry from pandas.core.dtypes.dtypes import ( CategoricalDtype, DatetimeTZDtype, diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index 5d2b7c43f6765..bfe588883d9f3 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -5,7 +5,7 @@ import pytest import pytz -from pandas.core.dtypes.base import registry +from pandas.core.dtypes.base import _registry as registry import pandas as pd import pandas._testing as tm diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index e7f3e8c659316..2592a0263c585 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -4,7 +4,7 @@ from pandas._libs.tslibs import iNaT from pandas._libs.tslibs.period import IncompatibleFrequency -from pandas.core.dtypes.base import registry +from pandas.core.dtypes.base import _registry as registry from pandas.core.dtypes.dtypes import PeriodDtype import pandas as pd diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index ca311768dc2d9..51a7969162abf 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -4,7 +4,7 @@ import pytest import pytz -from pandas.core.dtypes.base import registry +from pandas.core.dtypes.base import _registry as registry from pandas.core.dtypes.common import ( is_bool_dtype, is_categorical, diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 9d61be5887b7e..259558afb968b 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from pandas.core.dtypes.base import registry as ea_registry +from pandas.core.dtypes.base import _registry as ea_registry from pandas.core.dtypes.common import ( is_categorical_dtype, is_interval_dtype, From c9795a5ae5e3f4f3f15673cc524c67a2a35fb627 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Wed, 10 Mar 2021 15:50:09 -0500 Subject: [PATCH 14/19] remove ExtensionArray typing of self --- pandas/core/arrays/base.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 890ed938efce6..c7439ce386022 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -682,7 +682,7 @@ def argmax(self, skipna: bool = True) -> int: return nargminmax(self, "argmax") def fillna( - self: ExtensionArray, + self, value: Optional[Union[Any, ArrayLike]] = None, method: Optional[Literal["backfill", "bfill", "ffill", "pad"]] = None, limit: Optional[int] = None, @@ -741,9 +741,7 @@ def dropna(self) -> ExtensionArrayT: """ return self[~self.isna()] - def shift( - self: ExtensionArray, periods: int = 1, fill_value: object = None - ) -> ExtensionArray: + def shift(self, periods: int = 1, fill_value: object = None) -> ExtensionArray: """ Shift values by desired number. @@ -797,7 +795,7 @@ def shift( b = empty return self._concat_same_type([a, b]) - def unique(self: ExtensionArray) -> ExtensionArray: + def unique(self) -> ExtensionArray: """ Compute the ExtensionArray of unique values. @@ -1029,7 +1027,7 @@ def factorize(self, na_sentinel: int = -1) -> Tuple[np.ndarray, ExtensionArray]: @Substitution(klass="ExtensionArray") @Appender(_extension_array_shared_docs["repeat"]) def repeat( - self: ExtensionArray, + self, repeats: Union[int, Sequence[int]], axis: Literal[None] = None, ) -> ExtensionArray: @@ -1042,7 +1040,7 @@ def repeat( # ------------------------------------------------------------------------ def take( - self: ExtensionArray, + self, indices: Sequence[int], *, allow_fill: bool = False, @@ -1136,7 +1134,7 @@ def take(self, indices, allow_fill=False, fill_value=None): # pandas.api.extensions.take raise AbstractMethodError(self) - def copy(self: ExtensionArray) -> ExtensionArray: + def copy(self) -> ExtensionArray: """ Return a copy of the array. @@ -1216,7 +1214,7 @@ def _formatter(self, boxed: bool = False) -> Callable[[Any], Optional[str]]: # Reshaping # ------------------------------------------------------------------------ - def transpose(self: ExtensionArray, *axes: int) -> ExtensionArray: + def transpose(self, *axes: int) -> ExtensionArray: """ Return a transposed view on this array. @@ -1226,11 +1224,11 @@ def transpose(self: ExtensionArray, *axes: int) -> ExtensionArray: return self[:] @property - def T(self: ExtensionArray) -> ExtensionArray: + def T(self) -> ExtensionArray: return self.transpose() def ravel( - self: ExtensionArray, order: Optional[Literal["C", "F", "A", "K"]] = "C" + self, order: Optional[Literal["C", "F", "A", "K"]] = "C" ) -> ExtensionArray: """ Return a flattened view on this array. @@ -1311,7 +1309,7 @@ def __hash__(self) -> int: # ------------------------------------------------------------------------ # Non-Optimized Default Methods - def delete(self: ExtensionArray, loc: Union[int, Sequence[int]]) -> ExtensionArray: + def delete(self, loc: Union[int, Sequence[int]]) -> ExtensionArray: indexer = np.delete(np.arange(len(self)), loc) return self.take(indexer) From 548c198015be1d0040f0ae6061acf8a0b246e5dd Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Fri, 12 Mar 2021 15:43:36 -0500 Subject: [PATCH 15/19] make extension arrays work with new typing, fixing astype and to_numpy --- pandas/_testing/asserters.py | 10 ++++-- pandas/_typing.py | 10 ++++-- pandas/core/algorithms.py | 45 +++++++++++++------------ pandas/core/array_algos/putmask.py | 2 +- pandas/core/arrays/_mixins.py | 6 ++-- pandas/core/arrays/base.py | 39 +++++++++++---------- pandas/core/arrays/boolean.py | 14 ++++++-- pandas/core/arrays/categorical.py | 15 ++++++--- pandas/core/arrays/datetimelike.py | 19 +++++------ pandas/core/arrays/floating.py | 12 ++++++- pandas/core/arrays/integer.py | 9 +++++ pandas/core/arrays/masked.py | 18 +++++++--- pandas/core/arrays/numpy_.py | 7 ++-- pandas/core/arrays/period.py | 6 +--- pandas/core/arrays/string_arrow.py | 12 +++---- pandas/core/base.py | 19 ++++------- pandas/core/dtypes/common.py | 27 +++++++++------ pandas/core/indexes/base.py | 6 +--- pandas/core/internals/blocks.py | 8 ++--- pandas/core/internals/concat.py | 1 + pandas/core/internals/managers.py | 31 ++++++----------- pandas/core/internals/ops.py | 24 ++++++------- pandas/core/missing.py | 6 +--- pandas/core/tools/datetimes.py | 4 +-- pandas/io/formats/format.py | 6 +--- pandas/io/parsers/base_parser.py | 10 ++---- pandas/tests/extension/decimal/array.py | 4 ++- 27 files changed, 197 insertions(+), 173 deletions(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 731b55464c11b..dad1522aada41 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -382,9 +382,13 @@ def _get_ilevel_values(index, level): # skip exact index checking when `check_categorical` is False if check_exact and check_categorical: if not left.equals(right): - diff = ( - np.sum((left._values != right._values).astype(int)) * 100.0 / len(left) - ) + # error: Value of type variable "_Number" of "sum" cannot be + # "Union[ExtensionArray, ndarray, Any]" + thesum = np.sum( + (left._values != right._values).astype(int) + ) # type: ignore[type-var] + # error: Unsupported operand types for * ("ExtensionArray" and "float") + diff = thesum * 100.0 / len(left) # type: ignore[operator] msg = f"{obj} values are different ({np.round(diff, 5)} %)" raise_assert_detail(obj, msg, left, right) else: diff --git a/pandas/_typing.py b/pandas/_typing.py index 3e584774e539a..9ce3adda926cf 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -76,11 +76,17 @@ ArrayLike = Union["ExtensionArray", np.ndarray] AnyArrayLike = Union[ArrayLike, "Index", "Series"] - +AnySequenceLike = Union[ + "ExtensionArray", + "Index", + "Series", + Sequence[Any], + np.ndarray, +] # scalars PythonScalar = Union[str, int, float, bool] -DatetimeLikeScalar = TypeVar("DatetimeLikeScalar", "Period", "Timestamp", "Timedelta") +DatetimeLikeScalar = Union["Period", "Timestamp", "Timedelta"] PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"] Scalar = Union[PythonScalar, PandasScalar] diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index c3705fada724a..02152446c414f 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -30,7 +30,9 @@ ) from pandas._typing import ( AnyArrayLike, + AnySequenceLike, ArrayLike, + Dtype, DtypeObj, FrameOrSeriesUnion, ) @@ -216,7 +218,7 @@ def _ensure_data(values: ArrayLike) -> Tuple[np.ndarray, DtypeObj]: def _reconstruct_data( - values: ArrayLike, dtype: DtypeObj, original: AnyArrayLike + values: ArrayLike, dtype: Dtype, original: AnyArrayLike ) -> ArrayLike: """ reverse of _ensure_data @@ -244,11 +246,12 @@ def _reconstruct_data( values = cls._from_sequence(values) elif is_bool_dtype(dtype): - # error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has - # incompatible type "Union[dtype, ExtensionDtype]"; expected - # "Union[dtype, None, type, _SupportsDtype, str, Tuple[Any, int], - # Tuple[Any, Union[int, Sequence[int]]], List[Any], _DtypeDict, - # Tuple[Any, Any]]" + # error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has incompatible + # type "Union[ExtensionDtype, Union[str, dtype[Any]], Type[str], Type[float], + # Type[int], Type[complex], Type[bool], Type[object]]"; expected + # "Union[dtype[Any], None, type, _SupportsDType, str, Union[Tuple[Any, int], + # Tuple[Any, Union[int, Sequence[int]]], List[Any], _DTypeDict, + # Tuple[Any, Any]]]" values = values.astype(dtype, copy=False) # type: ignore[arg-type] # we only support object dtypes bool Index @@ -256,19 +259,15 @@ def _reconstruct_data( values = values.astype(object, copy=False) elif dtype is not None: if is_datetime64_dtype(dtype): - # error: Incompatible types in assignment (expression has type - # "str", variable has type "Union[dtype, ExtensionDtype]") - dtype = "datetime64[ns]" # type: ignore[assignment] + dtype = "datetime64[ns]" elif is_timedelta64_dtype(dtype): - # error: Incompatible types in assignment (expression has type - # "str", variable has type "Union[dtype, ExtensionDtype]") - dtype = "timedelta64[ns]" # type: ignore[assignment] - - # error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has - # incompatible type "Union[dtype, ExtensionDtype]"; expected - # "Union[dtype, None, type, _SupportsDtype, str, Tuple[Any, int], - # Tuple[Any, Union[int, Sequence[int]]], List[Any], _DtypeDict, - # Tuple[Any, Any]]" + dtype = "timedelta64[ns]" + # error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has incompatible + # type "Union[ExtensionDtype, Union[str, dtype[Any]], Type[str], Type[float], + # Type[int], Type[complex], Type[bool], Type[object]]"; expected + # "Union[dtype[Any], None, type, _SupportsDType, str, Union[Tuple[Any, int], + # Tuple[Any, Union[int, Sequence[int]]], List[Any], _DTypeDict, + # Tuple[Any, Any]]]" values = values.astype(dtype, copy=False) # type: ignore[arg-type] return values @@ -461,7 +460,7 @@ def unique(values): unique1d = unique -def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray: +def isin(comps: AnySequenceLike, values: AnySequenceLike) -> np.ndarray: """ Compute the isin boolean array. @@ -497,9 +496,11 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray: comps = _ensure_arraylike(comps) comps = extract_array(comps, extract_numpy=True) if is_extension_array_dtype(comps.dtype): - # error: Incompatible return value type (got "Series", expected "ndarray") - # error: Item "ndarray" of "Union[Any, ndarray]" has no attribute "isin" - return comps.isin(values) # type: ignore[return-value,union-attr] + # error: Argument 1 to "isin" of "ExtensionArray" has incompatible type + # "Union[Any, ExtensionArray, ndarray]"; expected "Sequence[Any]" + # error: Item "ndarray" of "Union[Any, ExtensionArray, ndarray]" has no + # attribute "isin" + return comps.isin(values) # type: ignore[arg-type, union-attr] elif needs_i8_conversion(comps.dtype): # Dispatch to DatetimeLikeArrayMixin.isin diff --git a/pandas/core/array_algos/putmask.py b/pandas/core/array_algos/putmask.py index 3daf1b3ae3902..0666112cec33d 100644 --- a/pandas/core/array_algos/putmask.py +++ b/pandas/core/array_algos/putmask.py @@ -191,7 +191,7 @@ def extract_bool_array(mask: ArrayLike) -> np.ndarray: # We could have BooleanArray, Sparse[bool], ... # Except for BooleanArray, this is equivalent to just # np.asarray(mask, dtype=bool) - mask = mask.to_numpy(dtype=bool, na_value=False) + mask = mask.to_numpy(dtype=np.dtype(bool), na_value=False) mask = np.asarray(mask, dtype=bool) return mask diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 3e78fbf7109cb..255e5f43335e4 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -3,6 +3,7 @@ from functools import wraps from typing import ( Any, + List, Optional, Sequence, Type, @@ -260,12 +261,13 @@ def __getitem__(self: NDArrayBackedExtensionArrayT, key: int) -> Any: @overload def __getitem__( - self: NDArrayBackedExtensionArrayT, key: Union[slice, np.ndarray] + self: NDArrayBackedExtensionArrayT, key: Union[slice, np.ndarray, List[Any]] ) -> NDArrayBackedExtensionArrayT: ... def __getitem__( - self: NDArrayBackedExtensionArrayT, key: Union[int, slice, np.ndarray] + self: NDArrayBackedExtensionArrayT, + key: Union[int, slice, np.ndarray, List[Any]], ) -> Union[NDArrayBackedExtensionArrayT, Any]: if lib.is_integer(key): # fast-path diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index b22dcb05e777c..a42a0d08bb11a 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -15,6 +15,7 @@ Callable, Dict, Iterator, + List, Literal, Optional, Sequence, @@ -32,6 +33,7 @@ from pandas._typing import ( ArrayLike, Dtype, + NpDtype, Shape, ) from pandas.compat import set_function_name @@ -301,11 +303,11 @@ def __getitem__(self, item: int) -> Any: ... @overload - def __getitem__(self, item: Union[slice, np.ndarray]) -> ExtensionArray: + def __getitem__(self, item: Union[slice, np.ndarray, List[Any]]) -> ExtensionArray: ... def __getitem__( - self, item: Union[int, slice, np.ndarray] + self, item: Union[int, slice, np.ndarray, List[Any]] ) -> Union[ExtensionArray, Any]: """ Select a subset of self. @@ -441,9 +443,10 @@ def __ne__(self, other: Any) -> ArrayLike: # type: ignore[override] def to_numpy( self, - dtype: Optional[Dtype] = None, + dtype: Optional[NpDtype] = None, copy: bool = False, na_value: Optional[Any] = lib.no_default, + **kwargs: Any, ) -> np.ndarray: """ Convert to a NumPy ndarray. @@ -470,12 +473,7 @@ def to_numpy( ------- numpy.ndarray """ - # error: Argument "dtype" to "asarray" has incompatible type - # "Union[ExtensionDtype, str, dtype[Any], Type[str], Type[float], Type[int], - # Type[complex], Type[bool], Type[object], None]"; expected "Union[dtype[Any], - # None, type, _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, Union[int, - # Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]" - result = np.asarray(self, dtype=dtype) # type: ignore[arg-type] + result = np.asarray(self, dtype=dtype) if copy or na_value is not lib.no_default: result = result.copy() if na_value is not lib.no_default: @@ -527,8 +525,15 @@ def nbytes(self) -> int: # ------------------------------------------------------------------------ # Additional Methods # ------------------------------------------------------------------------ + @overload + def astype(self, dtype: Type[str], copy: bool = True) -> np.ndarray: + ... + + @overload + def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: + ... - def astype(self, dtype: Dtype, copy: bool = True) -> np.ndarray: + def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: """ Cast to a NumPy array with 'dtype'. @@ -562,7 +567,7 @@ def astype(self, dtype: Dtype, copy: bool = True) -> np.ndarray: ): # allow conversion to StringArrays return dtype.construct_array_type()._from_sequence(self, copy=False) - return np.array(self, dtype=dtype, copy=copy) + return np.array(self, dtype=cast(NpDtype, dtype), copy=copy) def isna(self) -> Union[np.ndarray, ExtensionArraySupportsAnyAll]: """ @@ -829,9 +834,9 @@ def unique(self) -> ExtensionArray: def searchsorted( self, - value: ArrayLike, + value: Sequence[Any], side: Literal["left", "right"] = "left", - sorter: Optional[ArrayLike] = None, + sorter: Optional[Sequence[Any]] = None, ) -> np.ndarray: """ Find indices where elements should be inserted to maintain order. @@ -877,7 +882,7 @@ def searchsorted( # 1. Values outside the range of the `data_for_sorting` fixture # 2. Values between the values in the `data_for_sorting` fixture # 3. Missing values. - arr = self.astype(object) + arr = cast(np.ndarray, self.astype(object)) return arr.searchsorted(value, side=side, sorter=sorter) def equals(self, other: object) -> bool: @@ -914,7 +919,7 @@ def equals(self, other: object) -> bool: equal_na = self.isna() & other.isna() # type: ignore[operator] return bool((equal_values | equal_na).all()) - def isin(self, values: Union[ExtensionArray, Sequence[Any]]) -> np.ndarray: + def isin(self, values: Sequence[Any]) -> np.ndarray: """ Pointwise comparison for set containment in the given values. @@ -928,7 +933,7 @@ def isin(self, values: Union[ExtensionArray, Sequence[Any]]) -> np.ndarray: ------- np.ndarray[bool] """ - return isin(np.asarray(self), values) + return isin(self.astype(object), values) def _values_for_factorize(self) -> Tuple[np.ndarray, Any]: """ @@ -952,7 +957,7 @@ def _values_for_factorize(self) -> Tuple[np.ndarray, Any]: The values returned by this method are also used in :func:`pandas.util.hash_pandas_object`. """ - return self.astype(object), np.nan + return cast(np.ndarray, self.astype(object)), np.nan def factorize(self, na_sentinel: int = -1) -> Tuple[np.ndarray, ExtensionArray]: """ diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 4258279e37551..f2cb696fab07f 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -8,6 +8,7 @@ Tuple, Type, Union, + overload, ) import warnings @@ -20,6 +21,7 @@ from pandas._typing import ( ArrayLike, Dtype, + DtypeArg, ) from pandas.compat.numpy import function as nv @@ -296,7 +298,7 @@ def dtype(self) -> BooleanDtype: @classmethod def _from_sequence( - cls, scalars, *, dtype: Optional[Dtype] = None, copy: bool = False + cls, scalars, *, dtype: Optional[DtypeArg] = None, copy: bool = False ) -> BooleanArray: if dtype: assert dtype == "boolean" @@ -379,7 +381,15 @@ def reconstruct(x): def _coerce_to_array(self, value) -> Tuple[np.ndarray, np.ndarray]: return coerce_to_array(value) - def astype(self, dtype, copy: bool = True) -> ArrayLike: + @overload + def astype(self, dtype: Type[str], copy: bool = True) -> np.ndarray: + ... + + @overload + def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: + ... + + def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: """ Cast to a NumPy array or ExtensionArray with 'dtype'. diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 0bf5e05786d4d..afb3d7fc1aa22 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -15,6 +15,7 @@ TypeVar, Union, cast, + overload, ) from warnings import warn @@ -479,6 +480,14 @@ def _constructor(self) -> Type[Categorical]: def _from_sequence(cls, scalars, *, dtype: Optional[Dtype] = None, copy=False): return Categorical(scalars, dtype=dtype, copy=copy) + @overload + def astype(self, dtype: Type[str], copy: bool = True) -> np.ndarray: + ... + + @overload + def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: + ... + def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: """ Coerce this type to another dtype @@ -2454,11 +2463,7 @@ def _str_get_dummies(self, sep="|"): # sep may not be in categories. Just bail on this. from pandas.core.arrays import PandasArray - # error: Argument 1 to "PandasArray" has incompatible type - # "ExtensionArray"; expected "Union[ndarray, PandasArray]" - return PandasArray(self.astype(str))._str_get_dummies( # type: ignore[arg-type] - sep - ) + return PandasArray(self.astype(str))._str_get_dummies(sep) # The Series.cat accessor diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index bd5cc04659a06..4c4d0ec98813a 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -9,6 +9,7 @@ TYPE_CHECKING, Any, Callable, + List, Optional, Sequence, Tuple, @@ -339,7 +340,7 @@ def __array__(self, dtype: Optional[NpDtype] = None) -> np.ndarray: return self._ndarray def __getitem__( - self, key: Union[int, slice, np.ndarray] + self, key: Union[int, slice, np.ndarray, List[Any]] ) -> Union[DatetimeLikeArrayMixin, DTScalarOrNaT]: """ This getitem defers to the underlying array, which by-definition can @@ -459,7 +460,9 @@ def view(self: DatetimeLikeArrayT) -> DatetimeLikeArrayT: ... @overload - def view(self, dtype: Optional[Dtype] = ...) -> ArrayLike: + def view( + self: DatetimeLikeArrayT, dtype: Optional[Dtype] = ... + ) -> DatetimeLikeArrayT: ... def view(self, dtype: Optional[Dtype] = None) -> ArrayLike: @@ -553,8 +556,7 @@ def _validate_comparison_value(self, other): raise InvalidComparison(other) if isinstance(other, self._recognized_scalars) or other is NaT: - # error: Too many arguments for "object" - other = self._scalar_type(other) # type: ignore[call-arg] + other = self._scalar_type(other) try: self._check_compatible_with(other) except (TypeError, IncompatibleFrequency) as err: @@ -604,16 +606,14 @@ def _validate_shift_value(self, fill_value): if is_valid_na_for_dtype(fill_value, self.dtype): fill_value = NaT elif isinstance(fill_value, self._recognized_scalars): - # error: Too many arguments for "object" - fill_value = self._scalar_type(fill_value) # type: ignore[call-arg] + fill_value = self._scalar_type(fill_value) else: # only warn if we're not going to raise if self._scalar_type is Period and lib.is_integer(fill_value): # kludge for #31971 since Period(integer) tries to cast to str new_fill = Period._from_ordinal(fill_value, freq=self.freq) else: - # error: Too many arguments for "object" - new_fill = self._scalar_type(fill_value) # type: ignore[call-arg] + new_fill = self._scalar_type(fill_value) # stacklevel here is chosen to be correct when called from # DataFrame.shift or Series.shift @@ -674,8 +674,7 @@ def _validate_scalar( raise TypeError(msg) elif isinstance(value, self._recognized_scalars): - # error: Too many arguments for "object" - value = self._scalar_type(value) # type: ignore[call-arg] + value = self._scalar_type(value) else: msg = self._validation_error_message(value, allow_listlike) diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py index fdd358a1b3856..8119906c1c5c0 100644 --- a/pandas/core/arrays/floating.py +++ b/pandas/core/arrays/floating.py @@ -5,6 +5,7 @@ Optional, Tuple, Type, + overload, ) import warnings @@ -16,6 +17,7 @@ ) from pandas._typing import ( ArrayLike, + Dtype, DtypeObj, ) from pandas.compat.numpy import function as nv @@ -277,7 +279,15 @@ def _from_sequence_of_strings( def _coerce_to_array(self, value) -> Tuple[np.ndarray, np.ndarray]: return coerce_to_array(value, dtype=self.dtype) - def astype(self, dtype, copy: bool = True) -> ArrayLike: + @overload + def astype(self, dtype: Type[str], copy: bool = True) -> np.ndarray: + ... + + @overload + def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: + ... + + def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: """ Cast to a NumPy array or ExtensionArray with 'dtype'. diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index ae44acf06591f..72c67d25895a8 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -6,6 +6,7 @@ Optional, Tuple, Type, + overload, ) import warnings @@ -342,6 +343,14 @@ def _from_sequence_of_strings( def _coerce_to_array(self, value) -> Tuple[np.ndarray, np.ndarray]: return coerce_to_array(value, dtype=self.dtype) + @overload + def astype(self, dtype: Type[str], copy: bool = True) -> np.ndarray: + ... + + @overload + def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: + ... + def astype(self, dtype, copy: bool = True) -> ArrayLike: """ Cast to a NumPy array or ExtensionArray with 'dtype'. diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 31d58d9d89d49..0ea067fef887f 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -3,12 +3,14 @@ from typing import ( TYPE_CHECKING, Any, + List, Optional, Sequence, Tuple, Type, TypeVar, Union, + overload, ) import numpy as np @@ -138,7 +140,7 @@ def dtype(self) -> BaseMaskedDtype: raise AbstractMethodError(self) def __getitem__( - self, item: Union[int, slice, np.ndarray] + self, item: Union[int, slice, np.ndarray, List[Any]] ) -> Union[BaseMaskedArray, Any]: if is_integer(item): if self._mask[item]: @@ -212,14 +214,12 @@ def __len__(self) -> int: def __invert__(self: BaseMaskedArrayT) -> BaseMaskedArrayT: return type(self)(~self._data, self._mask.copy()) - # error: Argument 1 of "to_numpy" is incompatible with supertype "ExtensionArray"; - # supertype defines the argument type as "Union[ExtensionDtype, str, dtype[Any], - # Type[str], Type[float], Type[int], Type[complex], Type[bool], Type[object], None]" - def to_numpy( # type: ignore[override] + def to_numpy( self, dtype: Optional[NpDtype] = None, copy: bool = False, na_value: Scalar = lib.no_default, + **kwargs: Any, ) -> np.ndarray: """ Convert to a NumPy Array. @@ -305,6 +305,14 @@ def to_numpy( # type: ignore[override] data = self._data.astype(dtype, copy=copy) return data + @overload + def astype(self, dtype: Type[str], copy: bool = True) -> np.ndarray: + ... + + @overload + def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: + ... + def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: dtype = pandas_dtype(dtype) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 5ef3c24726924..0a26d0bb77820 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -2,6 +2,7 @@ import numbers from typing import ( + Any, Optional, Tuple, Union, @@ -346,14 +347,12 @@ def skew( # ------------------------------------------------------------------------ # Additional Methods - # error: Argument 1 of "to_numpy" is incompatible with supertype "ExtensionArray"; - # supertype defines the argument type as "Union[ExtensionDtype, str, dtype[Any], - # Type[str], Type[float], Type[int], Type[complex], Type[bool], Type[object], None]" - def to_numpy( # type: ignore[override] + def to_numpy( self, dtype: Optional[NpDtype] = None, copy: bool = False, na_value=lib.no_default, + **kwargs: Any, ) -> np.ndarray: result = np.asarray(self._ndarray, dtype=dtype) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index a39182d61a8fb..d91522a9e1bb6 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -643,11 +643,7 @@ def fillna(self, value=None, method=None, limit=None) -> PeriodArray: if method is not None: # view as dt64 so we get treated as timelike in core.missing dta = self.view("M8[ns]") - # error: Item "ndarray" of "Union[ExtensionArray, ndarray]" has no attribute - # "fillna" - result = dta.fillna( # type: ignore[union-attr] - value=value, method=method, limit=limit - ) + result = dta.fillna(value=value, method=method, limit=limit) return result.view(self.dtype) return super().fillna(value=value, method=method, limit=limit) diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 6f7badd3c2cd2..e7475d254d590 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -18,7 +18,7 @@ missing as libmissing, ) from pandas._typing import ( - Dtype, + DtypeArg, NpDtype, ) from pandas.util._decorators import doc @@ -221,7 +221,7 @@ def _chk_pyarrow_available(cls) -> None: raise ImportError(msg) @classmethod - def _from_sequence(cls, scalars, dtype: Optional[Dtype] = None, copy=False): + def _from_sequence(cls, scalars, dtype: Optional[DtypeArg] = None, copy=False): cls._chk_pyarrow_available() # convert non-na-likes to str, and nan-likes to ArrowStringDtype.na_value scalars = lib.ensure_string_array(scalars, copy=False) @@ -229,7 +229,7 @@ def _from_sequence(cls, scalars, dtype: Optional[Dtype] = None, copy=False): @classmethod def _from_sequence_of_strings( - cls, strings, dtype: Optional[Dtype] = None, copy=False + cls, strings, dtype: Optional[DtypeArg] = None, copy=False ): return cls._from_sequence(strings, dtype=dtype, copy=copy) @@ -248,14 +248,12 @@ def __arrow_array__(self, type=None): """Convert myself to a pyarrow Array or ChunkedArray.""" return self._data - # error: Argument 1 of "to_numpy" is incompatible with supertype "ExtensionArray"; - # supertype defines the argument type as "Union[ExtensionDtype, str, dtype[Any], - # Type[str], Type[float], Type[int], Type[complex], Type[bool], Type[object], None]" - def to_numpy( # type: ignore[override] + def to_numpy( self, dtype: Optional[NpDtype] = None, copy: bool = False, na_value=lib.no_default, + **kwargs: Any, ) -> np.ndarray: """ Convert to a NumPy ndarray. diff --git a/pandas/core/base.py b/pandas/core/base.py index 56ec2597314b2..7b552c05312c0 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -20,9 +20,10 @@ import pandas._libs.lib as lib from pandas._typing import ( - Dtype, + ArrayLike, DtypeObj, IndexLabel, + NpDtype, ) from pandas.compat import PYPY from pandas.compat.numpy import function as nv @@ -507,7 +508,7 @@ def array(self) -> ExtensionArray: def to_numpy( self, - dtype: Optional[Dtype] = None, + dtype: Optional[NpDtype] = None, copy: bool = False, na_value=lib.no_default, **kwargs, @@ -607,22 +608,14 @@ def to_numpy( dtype='datetime64[ns]') """ if is_extension_array_dtype(self.dtype): - # error: Too many arguments for "to_numpy" of "ExtensionArray" - return self.array.to_numpy( # type: ignore[call-arg] - dtype, copy=copy, na_value=na_value, **kwargs - ) + return self.array.to_numpy(dtype, copy=copy, na_value=na_value, **kwargs) elif kwargs: bad_keys = list(kwargs.keys())[0] raise TypeError( f"to_numpy() got an unexpected keyword argument '{bad_keys}'" ) - # error: Argument "dtype" to "asarray" has incompatible type - # "Union[ExtensionDtype, str, dtype[Any], Type[str], Type[float], Type[int], - # Type[complex], Type[bool], Type[object], None]"; expected "Union[dtype[Any], - # None, type, _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, Union[int, - # Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]" - result = np.asarray(self._values, dtype=dtype) # type: ignore[arg-type] + result = np.asarray(self._values, dtype=dtype) # TODO(GH-24345): Avoid potential double copy if copy or na_value is not lib.no_default: result = result.copy() @@ -1073,7 +1066,7 @@ def unique(self): values = self._values if not isinstance(values, np.ndarray): - result = values.unique() + result: ArrayLike = values.unique() if self.dtype.kind in ["m", "M"] and isinstance(self, ABCSeries): # GH#31182 Series._values returns EA, unpack for backward-compat if getattr(self.dtype, "tz", None) is None: diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 5d1b5b97b3c45..278e34fbee5b4 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -3,9 +3,11 @@ """ from typing import ( + TYPE_CHECKING, Any, Callable, Union, + cast, ) import warnings @@ -58,6 +60,9 @@ is_sequence, ) +if TYPE_CHECKING: + from pandas.core.arrays.base import ExtensionArray + POSSIBLY_CAST_DTYPES = { np.dtype(t).name for t in [ @@ -128,7 +133,7 @@ def ensure_str(value: Union[bytes, Any]) -> str: return value -def ensure_int_or_float(arr: ArrayLike, copy: bool = False) -> np.ndarray: +def ensure_int_or_float(arr: ArrayLike, copy: bool = False) -> ArrayLike: """ Ensure that an dtype array of some integer dtype has an int64 dtype if possible. @@ -155,19 +160,21 @@ def ensure_int_or_float(arr: ArrayLike, copy: bool = False) -> np.ndarray: will remain unchanged. """ # TODO: GH27506 potential bug with ExtensionArrays + def call_right_astype(arr: ArrayLike, inttype: str) -> ArrayLike: + if isinstance(arr, np.ndarray): + return arr.astype(inttype, copy=copy, casting="safe") + else: + return arr.astype(inttype, copy=copy) + try: - # error: Unexpected keyword argument "casting" for "astype" - return arr.astype("int64", copy=copy, casting="safe") # type: ignore[call-arg] + return call_right_astype(arr, "int64") except TypeError: pass try: - # error: Unexpected keyword argument "casting" for "astype" - return arr.astype("uint64", copy=copy, casting="safe") # type: ignore[call-arg] + return call_right_astype(arr, "uint64") except TypeError: if is_extension_array_dtype(arr.dtype): - # pandas/core/dtypes/common.py:168: error: Item "ndarray" of - # "Union[ExtensionArray, ndarray]" has no attribute "to_numpy" [union-attr] - return arr.to_numpy( # type: ignore[union-attr] + return cast("ExtensionArray", arr).to_numpy( dtype="float64", na_value=np.nan ) return arr.astype("float64", copy=copy) @@ -1848,9 +1855,7 @@ def pandas_dtype(dtype) -> DtypeObj: # registered extension types result = registry.find(dtype) if result is not None: - # error: Incompatible return value type (got "Type[ExtensionDtype]", - # expected "Union[dtype, ExtensionDtype]") - return result # type: ignore[return-value] + return result # try a numpy dtype # raise a consistent TypeError if failed diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b001139bef6c5..094db5758eb15 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4622,11 +4622,7 @@ def __getitem__(self, key): result = getitem(key) if not is_scalar(result): - # error: Argument 1 to "ndim" has incompatible type "Union[ExtensionArray, - # Any]"; expected "Union[Union[int, float, complex, str, bytes, generic], - # Sequence[Union[int, float, complex, str, bytes, generic]], - # Sequence[Sequence[Any]], _SupportsArray]" - if np.ndim(result) > 1: # type: ignore[arg-type] + if np.ndim(result) > 1: deprecate_ndim_indexing(result) return result # NB: Using _constructor._simple_new would break if MultiIndex diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 1bcddee4d726e..b5dc9e0e907c0 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -241,7 +241,9 @@ def get_values(self, dtype: Optional[DtypeObj] = None) -> np.ndarray: this is often overridden to handle to_dense like operations """ if dtype == _dtype_obj: - return self.values.astype(_dtype_obj) + # error: Incompatible return value type (got "Union[ndarray, + # ExtensionArray]", expected "ndarray") + return self.values.astype(_dtype_obj) # type: ignore[return-value] # error: Incompatible return value type (got "Union[ndarray, ExtensionArray]", # expected "ndarray") return self.values # type: ignore[return-value] @@ -1470,9 +1472,7 @@ def iget(self, col): elif isinstance(col, slice): if col != slice(None): raise NotImplementedError(col) - # error: Invalid index type "List[Any]" for "ExtensionArray"; expected - # type "Union[int, slice, ndarray]" - return self.values[[loc]] # type: ignore[index] + return self.values[[loc]] return self.values[loc] else: if col != 0: diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index e2949eb227fbf..cf034d45aaec6 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -435,6 +435,7 @@ def _concatenate_join_units( elif any(isinstance(t, ExtensionArray) for t in to_concat): # concatting with at least one EA means we are concatting a single column # the non-EA values are 2D arrays with shape (1, n) + to_concat = [t if isinstance(t, ExtensionArray) else t[0, :] for t in to_concat] concat_values = concat_compat(to_concat, axis=0, ea_compat_axis=True) concat_values = ensure_block_shape(concat_values, 2) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 6bd3e37ae101e..8651a89df4b2b 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -3,6 +3,7 @@ from collections import defaultdict import itertools from typing import ( + TYPE_CHECKING, Any, Callable, DefaultDict, @@ -14,6 +15,7 @@ Tuple, TypeVar, Union, + cast, ) import warnings @@ -27,6 +29,7 @@ ArrayLike, Dtype, DtypeObj, + NpDtype, Shape, ) from pandas.errors import PerformanceWarning @@ -79,6 +82,9 @@ operate_blockwise, ) +if TYPE_CHECKING: + from pandas.core.arrays.base import ExtensionArray + # TODO: flexible with index=None and/or items=None T = TypeVar("T", bound="BlockManager") @@ -845,7 +851,7 @@ def copy_func(ax): def as_array( self, transpose: bool = False, - dtype: Optional[Dtype] = None, + dtype: Optional[NpDtype] = None, copy: bool = False, na_value=lib.no_default, ) -> np.ndarray: @@ -890,12 +896,7 @@ def as_array( else: arr = np.asarray(blk.get_values()) if dtype: - # error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has - # incompatible type "Union[ExtensionDtype, str, dtype[Any], - # Type[object]]"; expected "Union[dtype[Any], None, type, - # _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, Union[int, - # Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]" - arr = arr.astype(dtype, copy=False) # type: ignore[arg-type] + arr = arr.astype(dtype, copy=False) else: arr = self._interleave(dtype=dtype, na_value=na_value) # The underlying data was copied within _interleave @@ -928,25 +929,15 @@ def _interleave( elif is_dtype_equal(dtype, str): dtype = "object" - # error: Argument "dtype" to "empty" has incompatible type - # "Union[ExtensionDtype, str, dtype[Any], Type[object], None]"; expected - # "Union[dtype[Any], None, type, _SupportsDType, str, Union[Tuple[Any, int], - # Tuple[Any, Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, - # Any]]]" - result = np.empty(self.shape, dtype=dtype) # type: ignore[arg-type] - + result = np.empty(self.shape, dtype=cast(NpDtype, dtype)) itemmask = np.zeros(self.shape[0]) for blk in self.blocks: rl = blk.mgr_locs if blk.is_extension: # Avoid implicit conversion of extension blocks to object - - # error: Item "ndarray" of "Union[ndarray, ExtensionArray]" has no - # attribute "to_numpy" - arr = blk.values.to_numpy( # type: ignore[union-attr] - dtype=dtype, na_value=na_value - ) + blk_values = cast("ExtensionArray", blk.values) + arr = blk_values.to_numpy(dtype=cast(NpDtype, dtype), na_value=na_value) else: # error: Argument 1 to "get_values" of "Block" has incompatible type # "Union[ExtensionDtype, str, dtype[Any], Type[object], None]"; expected diff --git a/pandas/core/internals/ops.py b/pandas/core/internals/ops.py index 88e70723517e3..46eb7da4b6725 100644 --- a/pandas/core/internals/ops.py +++ b/pandas/core/internals/ops.py @@ -108,28 +108,28 @@ def _get_same_shape_values( # TODO(EA2D): with 2D EAs only this first clause would be needed if not (left_ea or right_ea): - # error: Invalid index type "Tuple[Any, slice]" for "Union[ndarray, - # ExtensionArray]"; expected type "Union[int, slice, ndarray]" - lvals = lvals[rblk.mgr_locs.indexer, :] # type: ignore[index] + # error: No overload variant of "__getitem__" of + # "ExtensionArray" matches argument type "Tuple[Any, slice]" + lvals = lvals[rblk.mgr_locs.indexer, :] # type: ignore[call-overload] assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape) elif left_ea and right_ea: assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape) elif right_ea: # lvals are 2D, rvals are 1D - # error: Invalid index type "Tuple[Any, slice]" for "Union[ndarray, - # ExtensionArray]"; expected type "Union[int, slice, ndarray]" - lvals = lvals[rblk.mgr_locs.indexer, :] # type: ignore[index] + # error: No overload variant of "__getitem__" of + # "ExtensionArray" matches argument type "Tuple[Any, slice]"" + lvals = lvals[rblk.mgr_locs.indexer, :] # type: ignore[call-overload] assert lvals.shape[0] == 1, lvals.shape - # error: Invalid index type "Tuple[int, slice]" for "Union[Any, - # ExtensionArray]"; expected type "Union[int, slice, ndarray]" - lvals = lvals[0, :] # type: ignore[index] + # error: No overload variant of "__getitem__" of + # "ExtensionArray" matches argument type "Tuple[int, slice]" + lvals = lvals[0, :] # type: ignore[call-overload] else: # lvals are 1D, rvals are 2D assert rvals.shape[0] == 1, rvals.shape - # error: Invalid index type "Tuple[int, slice]" for "Union[ndarray, - # ExtensionArray]"; expected type "Union[int, slice, ndarray]" - rvals = rvals[0, :] # type: ignore[index] + # error: No overload variant of "__getitem__" of + # "ExtensionArray" matches argument type "Tuple[int, slice]" + rvals = rvals[0, :] # type: ignore[call-overload] return lvals, rvals diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 48b2084319292..bd6f645e7f424 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -310,11 +310,7 @@ def interpolate_1d( if method in NP_METHODS: # np.interp requires sorted X values, #21037 - # error: Argument 1 to "argsort" has incompatible type "Union[ExtensionArray, - # Any]"; expected "Union[Union[int, float, complex, str, bytes, generic], - # Sequence[Union[int, float, complex, str, bytes, generic]], - # Sequence[Sequence[Any]], _SupportsArray]" - indexer = np.argsort(inds[valid]) # type: ignore[arg-type] + indexer = np.argsort(inds[valid]) result[invalid] = np.interp( inds[invalid], inds[valid][indexer], yvalues[valid][indexer] ) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 1e71069e5be4d..2cda7dacfd51a 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -1081,9 +1081,7 @@ def calc_with_mask(carg, mask): # string with NaN-like try: - # error: Argument 2 to "isin" has incompatible type "List[Any]"; expected - # "Union[Union[ExtensionArray, ndarray], Index, Series]" - mask = ~algorithms.isin(arg, list(nat_strings)) # type: ignore[arg-type] + mask = ~algorithms.isin(arg, list(nat_strings)) return calc_with_mask(arg, mask) except (ValueError, OverflowError, TypeError): pass diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index a768ec8ad4eb3..4ada1aad139ee 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1803,11 +1803,7 @@ def get_format_timedelta64( one_day_nanos = 86400 * 10 ** 9 even_days = ( - # error: Unsupported operand types for % ("ExtensionArray" and "int") - np.logical_and( - consider_values, values_int % one_day_nanos != 0 # type: ignore[operator] - ).sum() - == 0 + np.logical_and(consider_values, values_int % one_day_nanos != 0).sum() == 0 ) if even_days: diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 8cfbae3cafc18..4539ceabbb92f 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -531,11 +531,7 @@ def _convert_to_ndarrays( try: values = lib.map_infer(values, conv_f) except ValueError: - # error: Argument 2 to "isin" has incompatible type "List[Any]"; - # expected "Union[Union[ExtensionArray, ndarray], Index, Series]" - mask = algorithms.isin( - values, list(na_values) # type: ignore[arg-type] - ).view(np.uint8) + mask = algorithms.isin(values, list(na_values)).view(np.uint8) values = lib.map_infer_mask(values, conv_f, mask) cvals, na_count = self._infer_types( @@ -661,9 +657,7 @@ def _infer_types(self, values, na_values, try_num_bool=True): """ na_count = 0 if issubclass(values.dtype.type, (np.number, np.bool_)): - # error: Argument 2 to "isin" has incompatible type "List[Any]"; expected - # "Union[Union[ExtensionArray, ndarray], Index, Series]" - mask = algorithms.isin(values, list(na_values)) # type: ignore[arg-type] + mask = algorithms.isin(values, list(na_values)) # error: Incompatible types in assignment (expression has type # "number[Any]", variable has type "int") na_count = mask.sum() # type: ignore[assignment] diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 58e5dc34d59d5..316f31ed879c6 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -5,6 +5,7 @@ import random import sys from typing import ( + Any, Type, Union, ) @@ -103,9 +104,10 @@ def _from_factorized(cls, values, original): _HANDLED_TYPES = (decimal.Decimal, numbers.Number, np.ndarray) def to_numpy( - self, dtype=None, copy: bool = False, na_value=no_default, decimals=None + self, dtype=None, copy: bool = False, na_value=no_default, **kwargs: Any ) -> np.ndarray: result = np.asarray(self, dtype=dtype) + decimals = kwargs.get("decimals", None) if decimals is not None: result = np.asarray([round(x, decimals) for x in result]) return result From db8ed9bc4c96f7306d007eac1297da345a2bb30d Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Fri, 12 Mar 2021 15:55:21 -0500 Subject: [PATCH 16/19] fix Literal import --- pandas/core/arrays/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index a42a0d08bb11a..4fc568d04cb44 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -16,7 +16,6 @@ Dict, Iterator, List, - Literal, Optional, Sequence, Tuple, @@ -78,6 +77,7 @@ ) if TYPE_CHECKING: + from typing import Literal class ExtensionArraySupportsAnyAll("ExtensionArray"): def any(self, *, skipna: bool = True) -> bool: From f8191f88d5550bfff5db7884550902c031720c3d Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Fri, 12 Mar 2021 17:16:19 -0500 Subject: [PATCH 17/19] fix logic in ensure_int_or_float --- pandas/core/dtypes/common.py | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 278e34fbee5b4..d087c6ba50096 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -160,24 +160,18 @@ def ensure_int_or_float(arr: ArrayLike, copy: bool = False) -> ArrayLike: will remain unchanged. """ # TODO: GH27506 potential bug with ExtensionArrays - def call_right_astype(arr: ArrayLike, inttype: str) -> ArrayLike: - if isinstance(arr, np.ndarray): - return arr.astype(inttype, copy=copy, casting="safe") - else: - return arr.astype(inttype, copy=copy) - - try: - return call_right_astype(arr, "int64") - except TypeError: - pass - try: - return call_right_astype(arr, "uint64") - except TypeError: - if is_extension_array_dtype(arr.dtype): - return cast("ExtensionArray", arr).to_numpy( - dtype="float64", na_value=np.nan - ) - return arr.astype("float64", copy=copy) + if is_extension_array_dtype(arr.dtype): + return cast("ExtensionArray", arr).to_numpy(dtype="float64", na_value=np.nan) + else: + assert isinstance(arr, np.ndarray) # For typing + try: + return arr.astype("int64", copy=copy, casting="safe") + except TypeError: + pass + try: + return arr.astype("uint64", copy=copy, casting="safe") + except TypeError: + return arr.astype("float64", copy=copy) def ensure_python_int(value: Union[int, np.integer]) -> int: From 6f8fcb5025abe68ba675721ddfd2287fc300c933 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Fri, 12 Mar 2021 18:25:08 -0500 Subject: [PATCH 18/19] fix typing in groupby to_numpy call --- pandas/core/groupby/groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 979c7aa990184..a42accc480444 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2275,7 +2275,7 @@ def pre_processor(vals: ArrayLike) -> Tuple[np.ndarray, Optional[np.dtype]]: inference: Optional[np.dtype] = None if is_integer_dtype(vals.dtype): if isinstance(vals, ExtensionArray): - out = vals.to_numpy(dtype=float, na_value=np.nan) + out = vals.to_numpy(dtype=np.dtype(float), na_value=np.nan) else: out = vals inference = np.dtype(np.int64) From 3ea2420b4f38b2b340bade8decb4d126586c6c16 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Fri, 12 Mar 2021 22:08:30 -0500 Subject: [PATCH 19/19] fix groupby again. Allow kwargs for extension to_numpy --- pandas/core/groupby/groupby.py | 2 +- pandas/tests/base/test_conversion.py | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index a42accc480444..7c18d4b74c19c 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2280,7 +2280,7 @@ def pre_processor(vals: ArrayLike) -> Tuple[np.ndarray, Optional[np.dtype]]: out = vals inference = np.dtype(np.int64) elif is_bool_dtype(vals.dtype) and isinstance(vals, ExtensionArray): - out = vals.to_numpy(dtype=float, na_value=np.nan) + out = vals.to_numpy(dtype=np.dtype(float), na_value=np.nan) elif is_datetime64_dtype(vals.dtype): inference = np.dtype("datetime64[ns]") out = np.asarray(vals).astype(float) diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index 7045a0abbeb81..1d0971749ec6b 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -426,11 +426,6 @@ def test_to_numpy_kwargs_raises(): with pytest.raises(TypeError, match=msg): s.to_numpy(foo=True) - # extension - s = Series([1, 2, 3], dtype="Int64") - with pytest.raises(TypeError, match=msg): - s.to_numpy(foo=True) - @pytest.mark.parametrize( "data",