Skip to content

Commit ec5b62e

Browse files
authored
REF: share astype exception messages (#49655)
* REF: share astype exception messages * mypy fixup
1 parent 0a58c03 commit ec5b62e

File tree

6 files changed

+31
-37
lines changed

6 files changed

+31
-37
lines changed

pandas/core/arrays/datetimes.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -641,10 +641,12 @@ def astype(self, dtype, copy: bool = True):
641641

642642
elif self.tz is None and isinstance(dtype, DatetimeTZDtype):
643643
# pre-2.0 this did self.tz_localize(dtype.tz), which did not match
644-
# the Series behavior
644+
# the Series behavior which did
645+
# values.tz_localize("UTC").tz_convert(dtype.tz)
645646
raise TypeError(
646647
"Cannot use .astype to convert from timezone-naive dtype to "
647-
"timezone-aware dtype. Use obj.tz_localize instead."
648+
"timezone-aware dtype. Use obj.tz_localize instead or "
649+
"series.dt.tz_localize instead"
648650
)
649651

650652
elif self.tz is not None and is_datetime64_dtype(dtype):

pandas/core/dtypes/astype.py

-11
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323

2424
from pandas.core.dtypes.common import (
2525
is_datetime64_dtype,
26-
is_datetime64tz_dtype,
2726
is_dtype_equal,
2827
is_integer_dtype,
2928
is_object_dtype,
@@ -211,16 +210,6 @@ def astype_array(values: ArrayLike, dtype: DtypeObj, copy: bool = False) -> Arra
211210
msg = rf"cannot astype a datetimelike from [{values.dtype}] to [{dtype}]"
212211
raise TypeError(msg)
213212

214-
if is_datetime64tz_dtype(dtype) and is_datetime64_dtype(values.dtype):
215-
# Series.astype behavior pre-2.0 did
216-
# values.tz_localize("UTC").tz_convert(dtype.tz)
217-
# which did not match the DTA/DTI behavior.
218-
# We special-case here to give a Series-specific exception message.
219-
raise TypeError(
220-
"Cannot use .astype to convert from timezone-naive dtype to "
221-
"timezone-aware dtype. Use ser.dt.tz_localize instead."
222-
)
223-
224213
if is_dtype_equal(values.dtype, dtype):
225214
if copy:
226215
return values.copy()

pandas/core/indexes/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1020,7 +1020,7 @@ def astype(self, dtype, copy: bool = True):
10201020
# NB: this must come before the ExtensionDtype check below
10211021
# TODO: this differs from Series behavior; can/should we align them?
10221022
raise TypeError(
1023-
f"Cannot convert Float64Index to dtype {dtype}; integer "
1023+
f"Cannot convert dtype={self.dtype} to dtype {dtype}; integer "
10241024
"values are required for conversion"
10251025
)
10261026

pandas/io/parsers/base_parser.py

+24-21
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from pandas._typing import (
3737
ArrayLike,
3838
DtypeArg,
39+
DtypeObj,
3940
Scalar,
4041
)
4142
from pandas.errors import (
@@ -61,14 +62,18 @@
6162
is_string_dtype,
6263
pandas_dtype,
6364
)
64-
from pandas.core.dtypes.dtypes import CategoricalDtype
65+
from pandas.core.dtypes.dtypes import (
66+
CategoricalDtype,
67+
ExtensionDtype,
68+
)
6569
from pandas.core.dtypes.missing import isna
6670

6771
from pandas import StringDtype
6872
from pandas.core import algorithms
6973
from pandas.core.arrays import (
7074
BooleanArray,
7175
Categorical,
76+
ExtensionArray,
7277
FloatingArray,
7378
IntegerArray,
7479
)
@@ -599,14 +604,8 @@ def _convert_to_ndarrays(
599604
# type specified in dtype param or cast_type is an EA
600605
if cast_type and (not is_dtype_equal(cvals, cast_type) or is_ea):
601606
if not is_ea and na_count > 0:
602-
try:
603-
if is_bool_dtype(cast_type):
604-
raise ValueError(
605-
f"Bool column has NA values in column {c}"
606-
)
607-
except (AttributeError, TypeError):
608-
# invalid input to is_bool_dtype
609-
pass
607+
if is_bool_dtype(cast_type):
608+
raise ValueError(f"Bool column has NA values in column {c}")
610609
cast_type = pandas_dtype(cast_type)
611610
cvals = self._cast_types(cvals, cast_type, c)
612611

@@ -686,7 +685,7 @@ def _set(x) -> int:
686685

687686
def _infer_types(
688687
self, values, na_values, no_dtype_specified, try_num_bool: bool = True
689-
):
688+
) -> tuple[ArrayLike, int]:
690689
"""
691690
Infer types of values, possibly casting
692691
@@ -700,7 +699,7 @@ def _infer_types(
700699
701700
Returns
702701
-------
703-
converted : ndarray
702+
converted : ndarray or ExtensionArray
704703
na_count : int
705704
"""
706705
na_count = 0
@@ -777,48 +776,50 @@ def _infer_types(
777776

778777
return result, na_count
779778

780-
def _cast_types(self, values, cast_type, column):
779+
def _cast_types(self, values: ArrayLike, cast_type: DtypeObj, column) -> ArrayLike:
781780
"""
782781
Cast values to specified type
783782
784783
Parameters
785784
----------
786-
values : ndarray
787-
cast_type : string or np.dtype
785+
values : ndarray or ExtensionArray
786+
cast_type : np.dtype or ExtensionDtype
788787
dtype to cast values to
789788
column : string
790789
column name - used only for error reporting
791790
792791
Returns
793792
-------
794-
converted : ndarray
793+
converted : ndarray or ExtensionArray
795794
"""
796795
if is_categorical_dtype(cast_type):
797796
known_cats = (
798797
isinstance(cast_type, CategoricalDtype)
799798
and cast_type.categories is not None
800799
)
801800

802-
if not is_object_dtype(values) and not known_cats:
801+
if not is_object_dtype(values.dtype) and not known_cats:
803802
# TODO: this is for consistency with
804803
# c-parser which parses all categories
805804
# as strings
806805

807-
values = astype_nansafe(values, np.dtype(str))
806+
values = lib.ensure_string_array(
807+
values, skipna=False, convert_na_value=False
808+
)
808809

809810
cats = Index(values).unique().dropna()
810811
values = Categorical._from_inferred_categories(
811812
cats, cats.get_indexer(values), cast_type, true_values=self.true_values
812813
)
813814

814815
# use the EA's implementation of casting
815-
elif is_extension_array_dtype(cast_type):
816-
# ensure cast_type is an actual dtype and not a string
817-
cast_type = pandas_dtype(cast_type)
816+
elif isinstance(cast_type, ExtensionDtype):
818817
array_type = cast_type.construct_array_type()
819818
try:
820819
if is_bool_dtype(cast_type):
821-
return array_type._from_sequence_of_strings(
820+
# error: Unexpected keyword argument "true_values" for
821+
# "_from_sequence_of_strings" of "ExtensionArray"
822+
return array_type._from_sequence_of_strings( # type: ignore[call-arg] # noqa:E501
822823
values,
823824
dtype=cast_type,
824825
true_values=self.true_values,
@@ -832,6 +833,8 @@ def _cast_types(self, values, cast_type, column):
832833
"_from_sequence_of_strings in order to be used in parser methods"
833834
) from err
834835

836+
elif isinstance(values, ExtensionArray):
837+
values = values.astype(cast_type, copy=False)
835838
else:
836839
try:
837840
values = astype_nansafe(values, cast_type, copy=True, skipna=True)

pandas/tests/arrays/test_datetimes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -373,7 +373,7 @@ def test_astype_copies(self, dtype, other):
373373

374374
if err:
375375
if dtype == "datetime64[ns]":
376-
msg = "Use ser.dt.tz_localize instead"
376+
msg = "Use obj.tz_localize instead or series.dt.tz_localize instead"
377377
else:
378378
msg = "from timezone-aware dtype to timezone-naive dtype"
379379
with pytest.raises(TypeError, match=msg):

pandas/tests/indexes/numeric/test_astype.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def test_cannot_cast_to_datetimelike(self, dtype):
7070
idx = Index([0, 1.1, 2], dtype=np.float64)
7171

7272
msg = (
73-
f"Cannot convert Float64Index to dtype {pandas_dtype(dtype)}; "
73+
f"Cannot convert dtype=float64 to dtype {pandas_dtype(dtype)}; "
7474
f"integer values are required for conversion"
7575
)
7676
with pytest.raises(TypeError, match=re.escape(msg)):

0 commit comments

Comments
 (0)