Skip to content

Commit 9a46a4b

Browse files
authored
REF: Block._astype defer to astype_nansafe in more cases (#38562)
1 parent d3c52e4 commit 9a46a4b

File tree

7 files changed

+54
-30
lines changed

7 files changed

+54
-30
lines changed

pandas/core/arrays/datetimelike.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ def astype(self, dtype, copy=True):
346346
elif is_string_dtype(dtype) and not is_categorical_dtype(dtype):
347347
if is_extension_array_dtype(dtype):
348348
arr_cls = dtype.construct_array_type()
349-
return arr_cls._from_sequence(self, dtype=dtype)
349+
return arr_cls._from_sequence(self, dtype=dtype, copy=copy)
350350
else:
351351
return self._format_native_types()
352352
elif is_integer_dtype(dtype):

pandas/core/arrays/datetimes.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -589,10 +589,15 @@ def astype(self, dtype, copy=True):
589589

590590
if is_datetime64_ns_dtype(dtype) and not is_dtype_equal(dtype, self.dtype):
591591
# GH#18951: datetime64_ns dtype but not equal means different tz
592+
# FIXME: this doesn't match DatetimeBlock.astype, xref GH#33401
592593
new_tz = getattr(dtype, "tz", None)
593-
if getattr(self.dtype, "tz", None) is None:
594+
if self.tz is None:
594595
return self.tz_localize(new_tz)
595-
result = self.tz_convert(new_tz)
596+
elif new_tz is None:
597+
result = self.tz_convert("UTC").tz_localize(None)
598+
else:
599+
result = self.tz_convert(new_tz)
600+
596601
if copy:
597602
result = result.copy()
598603
if new_tz is None:

pandas/core/dtypes/cast.py

+10-7
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
conversion,
3131
iNaT,
3232
ints_to_pydatetime,
33-
ints_to_pytimedelta,
3433
)
3534
from pandas._libs.tslibs.timezones import tz_compare
3635
from pandas._typing import AnyArrayLike, ArrayLike, Dtype, DtypeObj, Scalar
@@ -987,15 +986,21 @@ def astype_nansafe(
987986
elif not isinstance(dtype, np.dtype):
988987
raise ValueError("dtype must be np.dtype or ExtensionDtype")
989988

989+
if arr.dtype.kind in ["m", "M"] and (
990+
issubclass(dtype.type, str) or dtype == object
991+
):
992+
from pandas.core.construction import ensure_wrapped_if_datetimelike
993+
994+
arr = ensure_wrapped_if_datetimelike(arr)
995+
return arr.astype(dtype, copy=copy)
996+
990997
if issubclass(dtype.type, str):
991998
return lib.ensure_string_array(
992999
arr.ravel(), skipna=skipna, convert_na_value=False
9931000
).reshape(arr.shape)
9941001

9951002
elif is_datetime64_dtype(arr):
996-
if is_object_dtype(dtype):
997-
return ints_to_pydatetime(arr.view(np.int64))
998-
elif dtype == np.int64:
1003+
if dtype == np.int64:
9991004
if isna(arr).any():
10001005
raise ValueError("Cannot convert NaT values to integer")
10011006
return arr.view(dtype)
@@ -1007,9 +1012,7 @@ def astype_nansafe(
10071012
raise TypeError(f"cannot astype a datetimelike from [{arr.dtype}] to [{dtype}]")
10081013

10091014
elif is_timedelta64_dtype(arr):
1010-
if is_object_dtype(dtype):
1011-
return ints_to_pytimedelta(arr.view(np.int64))
1012-
elif dtype == np.int64:
1015+
if dtype == np.int64:
10131016
if isna(arr).any():
10141017
raise ValueError("Cannot convert NaT values to integer")
10151018
return arr.view(dtype)

pandas/core/generic.py

+1
Original file line numberDiff line numberDiff line change
@@ -5857,6 +5857,7 @@ def astype(
58575857
elif is_extension_array_dtype(dtype) and self.ndim > 1:
58585858
# GH 18099/22869: columnwise conversion to extension dtype
58595859
# GH 24704: use iloc to handle duplicate column names
5860+
# TODO(EA2D): special case not needed with 2D EAs
58605861
results = [
58615862
self.iloc[:, i].astype(dtype, copy=copy)
58625863
for i in range(len(self.columns))

pandas/core/internals/blocks.py

+1-19
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@
7575
)
7676
from pandas.core.base import PandasObject
7777
import pandas.core.common as com
78-
from pandas.core.construction import array as pd_array, extract_array
78+
from pandas.core.construction import extract_array
7979
from pandas.core.indexers import (
8080
check_setitem_lengths,
8181
is_empty_indexer,
@@ -676,24 +676,6 @@ def _astype(self, dtype: DtypeObj, copy: bool) -> ArrayLike:
676676
values = values.astype(dtype, copy=copy)
677677

678678
else:
679-
if issubclass(dtype.type, str):
680-
if values.dtype.kind in ["m", "M"]:
681-
# use native type formatting for datetime/tz/timedelta
682-
arr = pd_array(values)
683-
# Note: in the case where dtype is an np.dtype, i.e. not
684-
# StringDtype, this matches arr.astype(dtype), xref GH#36153
685-
values = arr._format_native_types(na_rep="NaT")
686-
687-
elif is_object_dtype(dtype):
688-
if values.dtype.kind in ["m", "M"]:
689-
# Wrap in Timedelta/Timestamp
690-
arr = pd_array(values)
691-
values = arr.astype(object)
692-
else:
693-
values = values.astype(object)
694-
# We still need to go through astype_nansafe for
695-
# e.g. dtype = Sparse[object, 0]
696-
697679
values = astype_nansafe(values, dtype, copy=True)
698680

699681
return values

pandas/tests/arrays/string_/test_string.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,12 @@ def test_string_methods(input, method, dtype, request):
126126
def test_astype_roundtrip(dtype, request):
127127
if dtype == "arrow_string":
128128
reason = "ValueError: Could not convert object to NumPy datetime"
129-
mark = pytest.mark.xfail(reason=reason)
129+
mark = pytest.mark.xfail(reason=reason, raises=ValueError)
130+
request.node.add_marker(mark)
131+
else:
132+
mark = pytest.mark.xfail(
133+
reason="GH#36153 casting from StringArray to dt64 fails", raises=ValueError
134+
)
130135
request.node.add_marker(mark)
131136

132137
ser = pd.Series(pd.date_range("2000", periods=12))

pandas/tests/frame/methods/test_astype.py

+28
Original file line numberDiff line numberDiff line change
@@ -611,3 +611,31 @@ def test_astype_tz_object_conversion(self, tz):
611611
# do real test: object dtype to a specified tz, different from construction tz.
612612
result = result.astype({"tz": "datetime64[ns, Europe/London]"})
613613
tm.assert_frame_equal(result, expected)
614+
615+
def test_astype_dt64_to_string(self, frame_or_series, tz_naive_fixture, request):
616+
tz = tz_naive_fixture
617+
if tz is None:
618+
mark = pytest.mark.xfail(
619+
reason="GH#36153 uses ndarray formatting instead of DTA formatting"
620+
)
621+
request.node.add_marker(mark)
622+
623+
dti = date_range("2016-01-01", periods=3, tz=tz)
624+
dta = dti._data
625+
dta[0] = NaT
626+
627+
obj = frame_or_series(dta)
628+
result = obj.astype("string")
629+
630+
# Check that Series/DataFrame.astype matches DatetimeArray.astype
631+
expected = frame_or_series(dta.astype("string"))
632+
tm.assert_equal(result, expected)
633+
634+
item = result.iloc[0]
635+
if frame_or_series is DataFrame:
636+
item = item.iloc[0]
637+
assert item is pd.NA
638+
639+
# For non-NA values, we should match what we get for non-EA str
640+
alt = obj.astype(str)
641+
assert np.all(alt.iloc[1:] == result.iloc[1:])

0 commit comments

Comments
 (0)