Skip to content

Commit 5a36b5f

Browse files
authored
API: raise on unsupported dtype instead of silently swapping (#49285)
* API: raise on unsupported dtype instead of silently swapping * lint fixup * ipython blocks * okexcept * troubleshoto doc * jeffs suggestion * troubleshoot docbuild
1 parent 25832bc commit 5a36b5f

File tree

3 files changed

+73
-37
lines changed

3 files changed

+73
-37
lines changed

doc/source/whatsnew/v2.0.0.rst

+42
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,48 @@ notable_bug_fix2
113113
Backwards incompatible API changes
114114
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
115115

116+
.. _whatsnew_200.api_breaking.unsupported_datetimelike_dtype_arg:
117+
118+
Construction with datetime64 or timedelta64 dtype with unsupported resolution
119+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
120+
In past versions, when constructing a :class:`Series` or :class:`DataFrame` and
121+
passing a "datetime64" or "timedelta64" dtype with unsupported resolution
122+
(i.e. anything other than "ns"), pandas would silently replace the given dtype
123+
with its nanosecond analogue:
124+
125+
*Previous behavior*:
126+
127+
.. code-block:: ipython
128+
129+
In [5]: pd.Series(["2016-01-01"], dtype="datetime64[s]")
130+
Out[5]:
131+
0 2016-01-01
132+
dtype: datetime64[ns]
133+
134+
In [6] pd.Series(["2016-01-01"], dtype="datetime64[D]")
135+
Out[6]:
136+
0 2016-01-01
137+
dtype: datetime64[ns]
138+
139+
In pandas 2.0 we support resolutions "s", "ms", "us", and "ns". When passing
140+
a supported dtype (e.g. "datetime64[s]"), the result now has exactly
141+
the requested dtype:
142+
143+
*New behavior*:
144+
145+
.. ipython:: python
146+
147+
pd.Series(["2016-01-01"], dtype="datetime64[s]")
148+
149+
With an un-supported dtype, pandas now raises instead of silently swapping in
150+
a supported dtype:
151+
152+
*New behavior*:
153+
154+
.. ipython:: python
155+
:okexcept:
156+
157+
pd.Series(["2016-01-01"], dtype="datetime64[D]")
116158
117159
.. _whatsnew_200.api_breaking.astype_to_unsupported_datetimelike:
118160

pandas/core/dtypes/cast.py

+19-29
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,8 @@
2929
Timedelta,
3030
Timestamp,
3131
astype_overflowsafe,
32-
get_supported_reso,
3332
get_unit_from_dtype,
3433
is_supported_unit,
35-
npy_unit_to_abbrev,
3634
)
3735
from pandas._libs.tslibs.timedeltas import array_to_timedelta64
3836
from pandas._typing import (
@@ -1301,17 +1299,19 @@ def _ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj:
13011299
"""
13021300
Convert dtypes with granularity less than nanosecond to nanosecond
13031301
1304-
>>> _ensure_nanosecond_dtype(np.dtype("M8[D]"))
1305-
dtype('<M8[s]')
1306-
13071302
>>> _ensure_nanosecond_dtype(np.dtype("M8[us]"))
13081303
dtype('<M8[us]')
13091304
1305+
>>> _ensure_nanosecond_dtype(np.dtype("M8[D]"))
1306+
Traceback (most recent call last):
1307+
...
1308+
TypeError: dtype=datetime64[D] is not supported. Supported resolutions are 's', 'ms', 'us', and 'ns'
1309+
13101310
>>> _ensure_nanosecond_dtype(np.dtype("m8[ps]"))
13111311
Traceback (most recent call last):
13121312
...
1313-
TypeError: cannot convert timedeltalike to dtype [timedelta64[ps]]
1314-
"""
1313+
TypeError: dtype=timedelta64[ps] is not supported. Supported resolutions are 's', 'ms', 'us', and 'ns'
1314+
""" # noqa:E501
13151315
msg = (
13161316
f"The '{dtype.name}' dtype has no unit. "
13171317
f"Please pass in '{dtype.name}[ns]' instead."
@@ -1324,29 +1324,19 @@ def _ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj:
13241324
# i.e. datetime64tz
13251325
pass
13261326

1327-
elif dtype.kind == "M" and not is_supported_unit(get_unit_from_dtype(dtype)):
1328-
# pandas supports dtype whose granularity is less than [ns]
1329-
# e.g., [ps], [fs], [as]
1330-
if dtype <= np.dtype("M8[ns]"):
1331-
if dtype.name == "datetime64":
1327+
elif dtype.kind in ["m", "M"]:
1328+
reso = get_unit_from_dtype(dtype)
1329+
if not is_supported_unit(reso):
1330+
# pre-2.0 we would silently swap in nanos for lower-resolutions,
1331+
# raise for above-nano resolutions
1332+
if dtype.name in ["datetime64", "timedelta64"]:
13321333
raise ValueError(msg)
1333-
reso = get_supported_reso(get_unit_from_dtype(dtype))
1334-
unit = npy_unit_to_abbrev(reso)
1335-
dtype = np.dtype(f"M8[{unit}]")
1336-
else:
1337-
raise TypeError(f"cannot convert datetimelike to dtype [{dtype}]")
1338-
1339-
elif dtype.kind == "m" and dtype != TD64NS_DTYPE:
1340-
# pandas supports dtype whose granularity is less than [ns]
1341-
# e.g., [ps], [fs], [as]
1342-
if dtype <= np.dtype("m8[ns]"):
1343-
if dtype.name == "timedelta64":
1344-
raise ValueError(msg)
1345-
reso = get_supported_reso(get_unit_from_dtype(dtype))
1346-
unit = npy_unit_to_abbrev(reso)
1347-
dtype = np.dtype(f"m8[{unit}]")
1348-
else:
1349-
raise TypeError(f"cannot convert timedeltalike to dtype [{dtype}]")
1334+
# TODO: ValueError or TypeError? existing test
1335+
# test_constructor_generic_timestamp_bad_frequency expects TypeError
1336+
raise TypeError(
1337+
f"dtype={dtype} is not supported. Supported resolutions are 's', "
1338+
"'ms', 'us', and 'ns'"
1339+
)
13501340
return dtype
13511341

13521342

pandas/tests/series/test_constructors.py

+12-8
Original file line numberDiff line numberDiff line change
@@ -1665,19 +1665,23 @@ def test_constructor_generic_timestamp_no_frequency(self, dtype, request):
16651665
with pytest.raises(ValueError, match=msg):
16661666
Series([], dtype=dtype)
16671667

1668-
@pytest.mark.parametrize(
1669-
"dtype,msg",
1670-
[
1671-
("m8[ps]", "cannot convert timedeltalike"),
1672-
("M8[ps]", "cannot convert datetimelike"),
1673-
],
1674-
)
1675-
def test_constructor_generic_timestamp_bad_frequency(self, dtype, msg):
1668+
@pytest.mark.parametrize("unit", ["ps", "as", "fs", "Y", "M", "W", "D", "h", "m"])
1669+
@pytest.mark.parametrize("kind", ["m", "M"])
1670+
def test_constructor_generic_timestamp_bad_frequency(self, kind, unit):
16761671
# see gh-15524, gh-15987
1672+
# as of 2.0 we raise on any non-supported unit rather than silently
1673+
# cast to nanos; previously we only raised for frequencies higher
1674+
# than ns
1675+
dtype = f"{kind}8[{unit}]"
16771676

1677+
msg = "dtype=.* is not supported. Supported resolutions are"
16781678
with pytest.raises(TypeError, match=msg):
16791679
Series([], dtype=dtype)
16801680

1681+
with pytest.raises(TypeError, match=msg):
1682+
# pre-2.0 the DataFrame cast raised but the Series case did not
1683+
DataFrame([[0]], dtype=dtype)
1684+
16811685
@pytest.mark.parametrize("dtype", [None, "uint8", "category"])
16821686
def test_constructor_range_dtype(self, dtype):
16831687
# GH 16804

0 commit comments

Comments
 (0)