Skip to content

Commit 3f3f518

Browse files
jbrockmendelJulianWgs
authored andcommitted
REF: simplify maybe_cast_to_datetime (pandas-dev#41624)
1 parent 9a1169c commit 3f3f518

File tree

3 files changed

+80
-69
lines changed

3 files changed

+80
-69
lines changed

pandas/core/construction.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -745,7 +745,9 @@ def _try_cast(
745745
except (ValueError, TypeError) as err:
746746
if dtype is not None and raise_cast_failure:
747747
raise
748-
elif "Cannot cast" in str(err):
748+
elif "Cannot cast" in str(err) or "cannot be converted to timedelta64" in str(
749+
err
750+
):
749751
# via _disallow_mismatched_datetimelike
750752
raise
751753
else:

pandas/core/dtypes/cast.py

+66-66
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@
6868
is_numeric_dtype,
6969
is_object_dtype,
7070
is_scalar,
71-
is_sparse,
7271
is_string_dtype,
7372
is_timedelta64_dtype,
7473
is_unsigned_integer_dtype,
@@ -1588,83 +1587,84 @@ def maybe_cast_to_datetime(
15881587
We allow a list *only* when dtype is not None.
15891588
"""
15901589
from pandas.core.arrays.datetimes import sequence_to_datetimes
1591-
from pandas.core.arrays.timedeltas import sequence_to_td64ns
1590+
from pandas.core.arrays.timedeltas import TimedeltaArray
15921591

15931592
if not is_list_like(value):
15941593
raise TypeError("value must be listlike")
15951594

1595+
if is_timedelta64_dtype(dtype):
1596+
# TODO: _from_sequence would raise ValueError in cases where
1597+
# ensure_nanosecond_dtype raises TypeError
1598+
dtype = cast(np.dtype, dtype)
1599+
dtype = ensure_nanosecond_dtype(dtype)
1600+
res = TimedeltaArray._from_sequence(value, dtype=dtype)
1601+
return res
1602+
15961603
if dtype is not None:
15971604
is_datetime64 = is_datetime64_dtype(dtype)
15981605
is_datetime64tz = is_datetime64tz_dtype(dtype)
1599-
is_timedelta64 = is_timedelta64_dtype(dtype)
16001606

16011607
vdtype = getattr(value, "dtype", None)
16021608

1603-
if is_datetime64 or is_datetime64tz or is_timedelta64:
1609+
if is_datetime64 or is_datetime64tz:
16041610
dtype = ensure_nanosecond_dtype(dtype)
16051611

1606-
if not is_sparse(value):
1607-
value = np.array(value, copy=False)
1608-
1609-
# we have an array of datetime or timedeltas & nulls
1610-
if value.size or not is_dtype_equal(value.dtype, dtype):
1611-
_disallow_mismatched_datetimelike(value, dtype)
1612-
1613-
try:
1614-
if is_datetime64:
1615-
dta = sequence_to_datetimes(value, allow_object=False)
1616-
# GH 25843: Remove tz information since the dtype
1617-
# didn't specify one
1618-
1619-
if dta.tz is not None:
1620-
warnings.warn(
1621-
"Data is timezone-aware. Converting "
1622-
"timezone-aware data to timezone-naive by "
1623-
"passing dtype='datetime64[ns]' to "
1624-
"DataFrame or Series is deprecated and will "
1625-
"raise in a future version. Use "
1626-
"`pd.Series(values).dt.tz_localize(None)` "
1627-
"instead.",
1628-
FutureWarning,
1629-
stacklevel=8,
1630-
)
1631-
# equiv: dta.view(dtype)
1632-
# Note: NOT equivalent to dta.astype(dtype)
1633-
dta = dta.tz_localize(None)
1634-
1635-
value = dta
1636-
elif is_datetime64tz:
1637-
dtype = cast(DatetimeTZDtype, dtype)
1638-
# The string check can be removed once issue #13712
1639-
# is solved. String data that is passed with a
1640-
# datetime64tz is assumed to be naive which should
1641-
# be localized to the timezone.
1642-
is_dt_string = is_string_dtype(value.dtype)
1643-
dta = sequence_to_datetimes(value, allow_object=False)
1644-
if dta.tz is not None:
1645-
value = dta.astype(dtype, copy=False)
1646-
elif is_dt_string:
1647-
# Strings here are naive, so directly localize
1648-
# equiv: dta.astype(dtype) # though deprecated
1649-
1650-
value = dta.tz_localize(dtype.tz)
1651-
else:
1652-
# Numeric values are UTC at this point,
1653-
# so localize and convert
1654-
# equiv: Series(dta).astype(dtype) # though deprecated
1655-
1656-
value = dta.tz_localize("UTC").tz_convert(dtype.tz)
1657-
elif is_timedelta64:
1658-
# if successful, we get a ndarray[td64ns]
1659-
value, _ = sequence_to_td64ns(value)
1660-
except OutOfBoundsDatetime:
1661-
raise
1662-
except ValueError:
1663-
# TODO(GH#40048): only catch dateutil's ParserError
1664-
# once we can reliably import it in all supported versions
1665-
if is_timedelta64:
1666-
raise
1667-
pass
1612+
value = np.array(value, copy=False)
1613+
1614+
# we have an array of datetime or timedeltas & nulls
1615+
if value.size or not is_dtype_equal(value.dtype, dtype):
1616+
_disallow_mismatched_datetimelike(value, dtype)
1617+
1618+
try:
1619+
if is_datetime64:
1620+
dta = sequence_to_datetimes(value, allow_object=False)
1621+
# GH 25843: Remove tz information since the dtype
1622+
# didn't specify one
1623+
1624+
if dta.tz is not None:
1625+
warnings.warn(
1626+
"Data is timezone-aware. Converting "
1627+
"timezone-aware data to timezone-naive by "
1628+
"passing dtype='datetime64[ns]' to "
1629+
"DataFrame or Series is deprecated and will "
1630+
"raise in a future version. Use "
1631+
"`pd.Series(values).dt.tz_localize(None)` "
1632+
"instead.",
1633+
FutureWarning,
1634+
stacklevel=8,
1635+
)
1636+
# equiv: dta.view(dtype)
1637+
# Note: NOT equivalent to dta.astype(dtype)
1638+
dta = dta.tz_localize(None)
1639+
1640+
value = dta
1641+
elif is_datetime64tz:
1642+
dtype = cast(DatetimeTZDtype, dtype)
1643+
# The string check can be removed once issue #13712
1644+
# is solved. String data that is passed with a
1645+
# datetime64tz is assumed to be naive which should
1646+
# be localized to the timezone.
1647+
is_dt_string = is_string_dtype(value.dtype)
1648+
dta = sequence_to_datetimes(value, allow_object=False)
1649+
if dta.tz is not None:
1650+
value = dta.astype(dtype, copy=False)
1651+
elif is_dt_string:
1652+
# Strings here are naive, so directly localize
1653+
# equiv: dta.astype(dtype) # though deprecated
1654+
1655+
value = dta.tz_localize(dtype.tz)
1656+
else:
1657+
# Numeric values are UTC at this point,
1658+
# so localize and convert
1659+
# equiv: Series(dta).astype(dtype) # though deprecated
1660+
1661+
value = dta.tz_localize("UTC").tz_convert(dtype.tz)
1662+
except OutOfBoundsDatetime:
1663+
raise
1664+
except ValueError:
1665+
# TODO(GH#40048): only catch dateutil's ParserError
1666+
# once we can reliably import it in all supported versions
1667+
pass
16681668

16691669
elif getattr(vdtype, "kind", None) in ["m", "M"]:
16701670
# we are already datetimelike and want to coerce to non-datetimelike;

pandas/tests/frame/test_constructors.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -2763,11 +2763,20 @@ def test_from_scalar_datetimelike_mismatched(self, constructor, cls, request):
27632763
scalar = cls("NaT", "ns")
27642764
dtype = {np.datetime64: "m8[ns]", np.timedelta64: "M8[ns]"}[cls]
27652765

2766-
with pytest.raises(TypeError, match="Cannot cast"):
2766+
msg = "Cannot cast"
2767+
if cls is np.datetime64:
2768+
msg = "|".join(
2769+
[
2770+
r"dtype datetime64\[ns\] cannot be converted to timedelta64\[ns\]",
2771+
"Cannot cast",
2772+
]
2773+
)
2774+
2775+
with pytest.raises(TypeError, match=msg):
27672776
constructor(scalar, dtype=dtype)
27682777

27692778
scalar = cls(4, "ns")
2770-
with pytest.raises(TypeError, match="Cannot cast"):
2779+
with pytest.raises(TypeError, match=msg):
27712780
constructor(scalar, dtype=dtype)
27722781

27732782
@pytest.mark.parametrize("cls", [datetime, np.datetime64])

0 commit comments

Comments
 (0)