diff --git a/pandas/_typing.py b/pandas/_typing.py index 433f8645d35a8..93d49497a85e0 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -126,10 +126,9 @@ ] # dtypes -NpDtype = Union[str, np.dtype] -Dtype = Union[ - "ExtensionDtype", NpDtype, type_t[Union[str, float, int, complex, bool, object]] -] +NpDtype = Union[str, np.dtype, type_t[Union[str, float, int, complex, bool, object]]] +Dtype = Union["ExtensionDtype", NpDtype] +AstypeArg = Union["ExtensionDtype", "npt.DTypeLike"] # DtypeArg specifies all allowable dtypes in a functions its dtype argument DtypeArg = Union[Dtype, Dict[Hashable, Dtype]] DtypeObj = Union[np.dtype, "ExtensionDtype"] diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 4cc0d4185b22c..b0b7b81d059e6 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -18,6 +18,7 @@ Sequence, TypeVar, cast, + overload, ) import numpy as np @@ -25,6 +26,7 @@ from pandas._libs import lib from pandas._typing import ( ArrayLike, + AstypeArg, Dtype, FillnaOptions, PositionalIndexer, @@ -520,9 +522,21 @@ def nbytes(self) -> int: # Additional Methods # ------------------------------------------------------------------------ - def astype(self, dtype, copy=True): + @overload + def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: + ... + + @overload + def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: + ... + + @overload + def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: + ... + + def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: """ - Cast to a NumPy array with 'dtype'. + Cast to a NumPy array or ExtensionArray with 'dtype'. Parameters ---------- @@ -535,8 +549,10 @@ def astype(self, dtype, copy=True): Returns ------- - array : ndarray - NumPy ndarray with 'dtype' for its dtype. + array : np.ndarray or ExtensionArray + An ExtensionArray if dtype is StringDtype, + or same as that of underlying array. + Otherwise a NumPy ndarray with 'dtype' for its dtype. """ from pandas.core.arrays.string_ import StringDtype @@ -552,7 +568,11 @@ def astype(self, dtype, copy=True): # allow conversion to StringArrays return dtype.construct_array_type()._from_sequence(self, copy=False) - return np.array(self, dtype=dtype, copy=copy) + # error: Argument "dtype" to "array" has incompatible type + # "Union[ExtensionDtype, dtype[Any]]"; expected "Union[dtype[Any], None, type, + # _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, Union[int, + # Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]" + return np.array(self, dtype=dtype, copy=copy) # type: ignore[arg-type] def isna(self) -> np.ndarray | ExtensionArraySupportsAnyAll: """ @@ -863,6 +883,8 @@ def searchsorted( # 2. Values between the values in the `data_for_sorting` fixture # 3. Missing values. arr = self.astype(object) + if isinstance(value, ExtensionArray): + value = value.astype(object) return arr.searchsorted(value, side=side, sorter=sorter) def equals(self, other: object) -> bool: diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 14d059c04b7c0..6f473249c9a7e 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -1,7 +1,10 @@ from __future__ import annotations import numbers -from typing import TYPE_CHECKING +from typing import ( + TYPE_CHECKING, + overload, +) import warnings import numpy as np @@ -12,7 +15,9 @@ ) from pandas._typing import ( ArrayLike, + AstypeArg, Dtype, + npt, type_t, ) from pandas.compat.numpy import function as nv @@ -33,6 +38,7 @@ from pandas.core.dtypes.missing import isna from pandas.core import ops +from pandas.core.arrays import ExtensionArray from pandas.core.arrays.masked import ( BaseMaskedArray, BaseMaskedDtype, @@ -392,7 +398,20 @@ def reconstruct(x): def _coerce_to_array(self, value) -> tuple[np.ndarray, np.ndarray]: return coerce_to_array(value) - def astype(self, dtype, copy: bool = True) -> ArrayLike: + @overload + def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: + ... + + @overload + def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: + ... + + @overload + def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: + ... + + def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: + """ Cast to a NumPy array or ExtensionArray with 'dtype'. diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index c6be9e5886a1d..b8ceef3d52e41 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -11,6 +11,7 @@ TypeVar, Union, cast, + overload, ) from warnings import ( catch_warnings, @@ -32,6 +33,7 @@ from pandas._libs.lib import no_default from pandas._typing import ( ArrayLike, + AstypeArg, Dtype, NpDtype, Ordered, @@ -482,7 +484,19 @@ def _constructor(self) -> type[Categorical]: def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False): return Categorical(scalars, dtype=dtype, copy=copy) - def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: + @overload + def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: + ... + + @overload + def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: + ... + + @overload + def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: + ... + + def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: """ Coerce this type to another dtype @@ -2458,11 +2472,7 @@ def _str_get_dummies(self, sep="|"): # sep may not be in categories. Just bail on this. from pandas.core.arrays import PandasArray - # error: Argument 1 to "PandasArray" has incompatible type - # "ExtensionArray"; expected "Union[ndarray, PandasArray]" - return PandasArray(self.astype(str))._str_get_dummies( # type: ignore[arg-type] - sep - ) + return PandasArray(self.astype(str))._str_get_dummies(sep) # The Series.cat accessor diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py index 1acbcf17dfffd..25b4076bd23c6 100644 --- a/pandas/core/arrays/floating.py +++ b/pandas/core/arrays/floating.py @@ -1,5 +1,6 @@ from __future__ import annotations +from typing import overload import warnings import numpy as np @@ -10,7 +11,9 @@ ) from pandas._typing import ( ArrayLike, + AstypeArg, DtypeObj, + npt, ) from pandas.compat.numpy import function as nv from pandas.util._decorators import cache_readonly @@ -31,6 +34,7 @@ ) from pandas.core.dtypes.missing import isna +from pandas.core.arrays import ExtensionArray from pandas.core.arrays.numeric import ( NumericArray, NumericDtype, @@ -271,7 +275,19 @@ def _from_sequence_of_strings( def _coerce_to_array(self, value) -> tuple[np.ndarray, np.ndarray]: return coerce_to_array(value, dtype=self.dtype) - def astype(self, dtype, copy: bool = True) -> ArrayLike: + @overload + def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: + ... + + @overload + def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: + ... + + @overload + def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: + ... + + def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: """ Cast to a NumPy array or ExtensionArray with 'dtype'. diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index c9ba762a271bd..e62a2f95b0340 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -1,5 +1,6 @@ from __future__ import annotations +from typing import overload import warnings import numpy as np @@ -11,8 +12,10 @@ ) from pandas._typing import ( ArrayLike, + AstypeArg, Dtype, DtypeObj, + npt, ) from pandas.compat.numpy import function as nv from pandas.util._decorators import cache_readonly @@ -33,6 +36,7 @@ ) from pandas.core.dtypes.missing import isna +from pandas.core.arrays import ExtensionArray from pandas.core.arrays.masked import ( BaseMaskedArray, BaseMaskedDtype, @@ -333,7 +337,19 @@ def _from_sequence_of_strings( def _coerce_to_array(self, value) -> tuple[np.ndarray, np.ndarray]: return coerce_to_array(value, dtype=self.dtype) - def astype(self, dtype, copy: bool = True) -> ArrayLike: + @overload + def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: + ... + + @overload + def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: + ... + + @overload + def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: + ... + + def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: """ Cast to a NumPy array or ExtensionArray with 'dtype'. diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 718e135464385..cccfd58aa914d 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -5,6 +5,7 @@ Any, Sequence, TypeVar, + overload, ) import numpy as np @@ -15,10 +16,11 @@ ) from pandas._typing import ( ArrayLike, - Dtype, + AstypeArg, NpDtype, PositionalIndexer, Scalar, + npt, type_t, ) from pandas.errors import AbstractMethodError @@ -282,9 +284,7 @@ def to_numpy( # type: ignore[override] if na_value is lib.no_default: na_value = libmissing.NA if dtype is None: - # error: Incompatible types in assignment (expression has type - # "Type[object]", variable has type "Union[str, dtype[Any], None]") - dtype = object # type: ignore[assignment] + dtype = object if self._hasna: if ( not is_object_dtype(dtype) @@ -303,7 +303,19 @@ def to_numpy( # type: ignore[override] data = self._data.astype(dtype, copy=copy) return data - def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: + @overload + def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: + ... + + @overload + def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: + ... + + @overload + def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: + ... + + def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: dtype = pandas_dtype(dtype) if is_dtype_equal(dtype, self.dtype): diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 488981bcc9687..2db1f00e237ee 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -351,9 +351,7 @@ def freq(self) -> BaseOffset: def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: if dtype == "i8": return self.asi8 - # error: Non-overlapping equality check (left operand type: "Optional[Union[str, - # dtype[Any]]]", right operand type: "Type[bool]") - elif dtype == bool: # type: ignore[comparison-overlap] + elif dtype == bool: return ~self._isnan # This will raise TypeError for non-object dtypes diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index b1c794ac03b31..6dce9b4475d1b 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -27,6 +27,7 @@ from pandas._libs.tslibs import NaT from pandas._typing import ( ArrayLike, + AstypeArg, Dtype, NpDtype, Scalar, @@ -527,9 +528,7 @@ def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: try: dtype = np.result_type(self.sp_values.dtype, type(fill_value)) except TypeError: - # error: Incompatible types in assignment (expression has type - # "Type[object]", variable has type "Union[str, dtype[Any], None]") - dtype = object # type: ignore[assignment] + dtype = object out = np.full(self.shape, fill_value, dtype=dtype) out[self.sp_index.to_int_index().indices] = self.sp_values @@ -1072,7 +1071,7 @@ def _concat_same_type( return cls(data, sparse_index=sp_index, fill_value=fill_value) - def astype(self, dtype: Dtype | None = None, copy=True): + def astype(self, dtype: AstypeArg | None = None, copy=True): """ Change the dtype of a SparseArray. diff --git a/pandas/core/common.py b/pandas/core/common.py index b32614577393d..2bf925466e176 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -232,12 +232,7 @@ def asarray_tuplesafe(values, dtype: NpDtype | None = None) -> np.ndarray: # expected "ndarray") return values._values # type: ignore[return-value] - # error: Non-overlapping container check (element type: "Union[str, dtype[Any], - # None]", container item type: "type") - if isinstance(values, list) and dtype in [ # type: ignore[comparison-overlap] - np.object_, - object, - ]: + if isinstance(values, list) and dtype in [np.object_, object]: return construct_1d_object_array_from_listlike(values) result = np.asarray(values, dtype=dtype) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index bf1f5d37f07dc..6be2e803b5910 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -29,8 +29,8 @@ from pandas._typing import ( Dtype, DtypeObj, - NpDtype, Ordered, + npt, type_t, ) @@ -1294,7 +1294,7 @@ class PandasDtype(ExtensionDtype): _metadata = ("_dtype",) - def __init__(self, dtype: NpDtype | PandasDtype | None): + def __init__(self, dtype: npt.DTypeLike | PandasDtype | None): if isinstance(dtype, PandasDtype): # make constructor univalent dtype = dtype.numpy_dtype diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 0010624609907..e3fcff1557ca9 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1369,7 +1369,7 @@ def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: """ return object dtype as boxed values, such as Timestamps/Timedelta """ - values = self.values + values: ArrayLike = self.values if dtype == _dtype_obj: values = values.astype(object) # TODO(EA2D): reshape not needed with 2D EAs