pandas-dev · mroeschke · Nov 16, 2022 · Nov 11, 2022 · Nov 11, 2022 · mroeschke
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -641,10 +641,12 @@ def astype(self, dtype, copy: bool = True):
 
         elif self.tz is None and isinstance(dtype, DatetimeTZDtype):
             # pre-2.0 this did self.tz_localize(dtype.tz), which did not match
-            #  the Series behavior
+            #  the Series behavior which did
+            #  values.tz_localize("UTC").tz_convert(dtype.tz)
             raise TypeError(
                 "Cannot use .astype to convert from timezone-naive dtype to "
-                "timezone-aware dtype. Use obj.tz_localize instead."
+                "timezone-aware dtype. Use obj.tz_localize instead or "
+                "series.dt.tz_localize instead"
             )
 
         elif self.tz is not None and is_datetime64_dtype(dtype):

diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py
@@ -23,7 +23,6 @@
 
 from pandas.core.dtypes.common import (
     is_datetime64_dtype,
-    is_datetime64tz_dtype,
     is_dtype_equal,
     is_integer_dtype,
     is_object_dtype,
@@ -211,16 +210,6 @@ def astype_array(values: ArrayLike, dtype: DtypeObj, copy: bool = False) -> Arra
         msg = rf"cannot astype a datetimelike from [{values.dtype}] to [{dtype}]"
         raise TypeError(msg)
 
-    if is_datetime64tz_dtype(dtype) and is_datetime64_dtype(values.dtype):
-        # Series.astype behavior pre-2.0 did
-        #  values.tz_localize("UTC").tz_convert(dtype.tz)
-        #  which did not match the DTA/DTI behavior.
-        # We special-case here to give a Series-specific exception message.
-        raise TypeError(
-            "Cannot use .astype to convert from timezone-naive dtype to "
-            "timezone-aware dtype. Use ser.dt.tz_localize instead."
-        )
-
     if is_dtype_equal(values.dtype, dtype):
         if copy:
             return values.copy()

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -1020,7 +1020,7 @@ def astype(self, dtype, copy: bool = True):
             # NB: this must come before the ExtensionDtype check below
             # TODO: this differs from Series behavior; can/should we align them?
             raise TypeError(
-                f"Cannot convert Float64Index to dtype {dtype}; integer "
+                f"Cannot convert dtype={self.dtype} to dtype {dtype}; integer "
                 "values are required for conversion"
             )
 

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
@@ -36,6 +36,7 @@
 from pandas._typing import (
     ArrayLike,
     DtypeArg,
+    DtypeObj,
     Scalar,
 )
 from pandas.errors import (
@@ -61,14 +62,18 @@
     is_string_dtype,
     pandas_dtype,
 )
-from pandas.core.dtypes.dtypes import CategoricalDtype
+from pandas.core.dtypes.dtypes import (
+    CategoricalDtype,
+    ExtensionDtype,
+)
 from pandas.core.dtypes.missing import isna
 
 from pandas import StringDtype
 from pandas.core import algorithms
 from pandas.core.arrays import (
     BooleanArray,
     Categorical,
+    ExtensionArray,
     FloatingArray,
     IntegerArray,
 )
@@ -599,14 +604,8 @@ def _convert_to_ndarrays(
                 # type specified in dtype param or cast_type is an EA
                 if cast_type and (not is_dtype_equal(cvals, cast_type) or is_ea):
                     if not is_ea and na_count > 0:
-                        try:
-                            if is_bool_dtype(cast_type):
-                                raise ValueError(
-                                    f"Bool column has NA values in column {c}"
-                                )
-                        except (AttributeError, TypeError):
-                            # invalid input to is_bool_dtype
-                            pass
+                        if is_bool_dtype(cast_type):
+                            raise ValueError(f"Bool column has NA values in column {c}")
                     cast_type = pandas_dtype(cast_type)
                     cvals = self._cast_types(cvals, cast_type, c)
 
@@ -686,7 +685,7 @@ def _set(x) -> int:
 
     def _infer_types(
         self, values, na_values, no_dtype_specified, try_num_bool: bool = True
-    ):
+    ) -> tuple[ArrayLike, int]:
         """
         Infer types of values, possibly casting
 
@@ -700,7 +699,7 @@ def _infer_types(
 
         Returns
         -------
-        converted : ndarray
+        converted : ndarray or ExtensionArray
         na_count : int
         """
         na_count = 0
@@ -777,48 +776,50 @@ def _infer_types(
 
         return result, na_count
 
-    def _cast_types(self, values, cast_type, column):
+    def _cast_types(self, values: ArrayLike, cast_type: DtypeObj, column) -> ArrayLike:
         """
         Cast values to specified type
 
         Parameters
         ----------
-        values : ndarray
-        cast_type : string or np.dtype
+        values : ndarray or ExtensionArray
+        cast_type : np.dtype or ExtensionDtype
            dtype to cast values to
         column : string
             column name - used only for error reporting
 
         Returns
         -------
-        converted : ndarray
+        converted : ndarray or ExtensionArray
         """
         if is_categorical_dtype(cast_type):
             known_cats = (
                 isinstance(cast_type, CategoricalDtype)
                 and cast_type.categories is not None
             )
 
-            if not is_object_dtype(values) and not known_cats:
+            if not is_object_dtype(values.dtype) and not known_cats:
                 # TODO: this is for consistency with
                 # c-parser which parses all categories
                 # as strings
 
-                values = astype_nansafe(values, np.dtype(str))
+                values = lib.ensure_string_array(
+                    values, skipna=False, convert_na_value=False
+                )
 
             cats = Index(values).unique().dropna()
             values = Categorical._from_inferred_categories(
                 cats, cats.get_indexer(values), cast_type, true_values=self.true_values
             )
 
         # use the EA's implementation of casting
-        elif is_extension_array_dtype(cast_type):
-            # ensure cast_type is an actual dtype and not a string
-            cast_type = pandas_dtype(cast_type)
+        elif isinstance(cast_type, ExtensionDtype):
             array_type = cast_type.construct_array_type()
             try:
                 if is_bool_dtype(cast_type):
-                    return array_type._from_sequence_of_strings(
+                    # error: Unexpected keyword argument "true_values" for
+                    # "_from_sequence_of_strings" of "ExtensionArray"
+                    return array_type._from_sequence_of_strings(  # type: ignore[call-arg]  # noqa:E501
                         values,
                         dtype=cast_type,
                         true_values=self.true_values,
@@ -832,6 +833,8 @@ def _cast_types(self, values, cast_type, column):
                     "_from_sequence_of_strings in order to be used in parser methods"
                 ) from err
 
+        elif isinstance(values, ExtensionArray):
+            values = values.astype(cast_type, copy=False)
         else:
             try:
                 values = astype_nansafe(values, cast_type, copy=True, skipna=True)

diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py
@@ -368,7 +368,7 @@ def test_astype_copies(self, dtype, other):
 
         if err:
             if dtype == "datetime64[ns]":
-                msg = "Use ser.dt.tz_localize instead"
+                msg = "Use obj.tz_localize instead or series.dt.tz_localize instead"
             else:
                 msg = "from timezone-aware dtype to timezone-naive dtype"
             with pytest.raises(TypeError, match=msg):

diff --git a/pandas/tests/indexes/numeric/test_astype.py b/pandas/tests/indexes/numeric/test_astype.py
@@ -75,7 +75,7 @@ def test_cannot_cast_to_datetimelike(self, dtype):
         idx = Float64Index([0, 1.1, 2])
 
         msg = (
-            f"Cannot convert Float64Index to dtype {pandas_dtype(dtype)}; "
+            f"Cannot convert dtype=float64 to dtype {pandas_dtype(dtype)}; "
             f"integer values are required for conversion"
         )
         with pytest.raises(TypeError, match=re.escape(msg)):