diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 44cc108ed9cfd..ea2ca1f70d414 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -10,7 +10,6 @@ from pandas.core.dtypes.common import ( is_bool_dtype, - is_extension_array_dtype, is_float, is_float_dtype, is_integer_dtype, @@ -18,7 +17,7 @@ is_numeric_dtype, pandas_dtype, ) -from pandas.core.dtypes.dtypes import register_extension_dtype +from pandas.core.dtypes.dtypes import ExtensionDtype, register_extension_dtype from pandas.core.dtypes.missing import isna from pandas.core import ops @@ -372,18 +371,10 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike: if incompatible type with an BooleanDtype, equivalent of same_kind casting """ - from pandas.core.arrays.string_ import StringDtype - dtype = pandas_dtype(dtype) - if isinstance(dtype, BooleanDtype): - values, mask = coerce_to_array(self, copy=copy) - if not copy: - return self - else: - return BooleanArray(values, mask, copy=False) - elif isinstance(dtype, StringDtype): - return dtype.construct_array_type()._from_sequence(self, copy=False) + if isinstance(dtype, ExtensionDtype): + return super().astype(dtype, copy) if is_bool_dtype(dtype): # astype_nansafe converts np.nan to True @@ -391,15 +382,11 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike: raise ValueError("cannot convert float NaN to bool") else: return self._data.astype(dtype, copy=copy) - if is_extension_array_dtype(dtype) and is_integer_dtype(dtype): - from pandas.core.arrays import IntegerArray - return IntegerArray( - self._data.astype(dtype.numpy_dtype), self._mask.copy(), copy=False - ) # for integer, error if there are missing values if is_integer_dtype(dtype) and self._hasna: raise ValueError("cannot convert NA to integer") + # for float dtype, ensure we use np.nan before casting (numpy cannot # deal with pd.NA) na_value = self._na_value diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py index aedf6adf09db4..1e9024e32c5b7 100644 --- a/pandas/core/arrays/floating.py +++ b/pandas/core/arrays/floating.py @@ -19,14 +19,13 @@ is_object_dtype, pandas_dtype, ) -from pandas.core.dtypes.dtypes import register_extension_dtype +from pandas.core.dtypes.dtypes import ExtensionDtype, register_extension_dtype from pandas.core.dtypes.missing import isna from pandas.core import ops from pandas.core.ops import invalid_comparison from pandas.core.tools.numeric import to_numeric -from .masked import BaseMaskedDtype from .numeric import NumericArray, NumericDtype @@ -332,24 +331,10 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike: if incompatible type with an FloatingDtype, equivalent of same_kind casting """ - from pandas.core.arrays.string_ import StringArray, StringDtype - dtype = pandas_dtype(dtype) - # if the dtype is exactly the same, we can fastpath - if self.dtype == dtype: - # return the same object for copy=False - return self.copy() if copy else self - # if we are astyping to another nullable masked dtype, we can fastpath - if isinstance(dtype, BaseMaskedDtype): - # TODO deal with NaNs - data = self._data.astype(dtype.numpy_dtype, copy=copy) - # mask is copied depending on whether the data was copied, and - # not directly depending on the `copy` keyword - mask = self._mask if data is self._data else self._mask.copy() - return dtype.construct_array_type()(data, mask, copy=False) - elif isinstance(dtype, StringDtype): - return StringArray._from_sequence(self, copy=False) + if isinstance(dtype, ExtensionDtype): + return super().astype(dtype, copy=copy) # coerce if is_float_dtype(dtype): diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 98e29f2062983..d01f84b224a89 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -9,7 +9,7 @@ from pandas.compat.numpy import function as nv from pandas.util._decorators import cache_readonly -from pandas.core.dtypes.base import register_extension_dtype +from pandas.core.dtypes.base import ExtensionDtype, register_extension_dtype from pandas.core.dtypes.common import ( is_bool_dtype, is_datetime64_dtype, @@ -390,24 +390,10 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike: if incompatible type with an IntegerDtype, equivalent of same_kind casting """ - from pandas.core.arrays.masked import BaseMaskedDtype - from pandas.core.arrays.string_ import StringDtype - dtype = pandas_dtype(dtype) - # if the dtype is exactly the same, we can fastpath - if self.dtype == dtype: - # return the same object for copy=False - return self.copy() if copy else self - # if we are astyping to another nullable masked dtype, we can fastpath - if isinstance(dtype, BaseMaskedDtype): - data = self._data.astype(dtype.numpy_dtype, copy=copy) - # mask is copied depending on whether the data was copied, and - # not directly depending on the `copy` keyword - mask = self._mask if data is self._data else self._mask.copy() - return dtype.construct_array_type()(data, mask, copy=False) - elif isinstance(dtype, StringDtype): - return dtype.construct_array_type()._from_sequence(self, copy=False) + if isinstance(dtype, ExtensionDtype): + return super().astype(dtype, copy=copy) # coerce if is_float_dtype(dtype): diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index caed932cd7857..7821f103909da 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -5,16 +5,18 @@ import numpy as np from pandas._libs import lib, missing as libmissing -from pandas._typing import Scalar +from pandas._typing import ArrayLike, Dtype, Scalar from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly, doc from pandas.core.dtypes.base import ExtensionDtype from pandas.core.dtypes.common import ( + is_dtype_equal, is_integer, is_object_dtype, is_scalar, is_string_dtype, + pandas_dtype, ) from pandas.core.dtypes.missing import isna, notna @@ -229,6 +231,30 @@ def to_numpy( data = self._data.astype(dtype, copy=copy) return data + def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: + dtype = pandas_dtype(dtype) + + if is_dtype_equal(dtype, self.dtype): + if copy: + return self.copy() + return self + + # if we are astyping to another nullable masked dtype, we can fastpath + if isinstance(dtype, BaseMaskedDtype): + # TODO deal with NaNs for FloatingArray case + data = self._data.astype(dtype.numpy_dtype, copy=copy) + # mask is copied depending on whether the data was copied, and + # not directly depending on the `copy` keyword + mask = self._mask if data is self._data else self._mask.copy() + cls = dtype.construct_array_type() + return cls(data, mask, copy=False) + + if isinstance(dtype, ExtensionDtype): + eacls = dtype.construct_array_type() + return eacls._from_sequence(self, dtype=dtype, copy=copy) + + raise NotImplementedError("subclass must implement astype to np.dtype") + __array_priority__ = 1000 # higher than ndarray so ops dispatch to us def __array__(self, dtype=None) -> np.ndarray: diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index cc2013deb5252..74a41a0b64ff8 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -10,6 +10,7 @@ from pandas.core.dtypes.common import ( is_array_like, is_bool_dtype, + is_dtype_equal, is_integer_dtype, is_object_dtype, is_string_dtype, @@ -285,10 +286,12 @@ def __setitem__(self, key, value): def astype(self, dtype, copy=True): dtype = pandas_dtype(dtype) - if isinstance(dtype, StringDtype): + + if is_dtype_equal(dtype, self.dtype): if copy: return self.copy() return self + elif isinstance(dtype, _IntegerDtype): arr = self._ndarray.copy() mask = self.isna()