diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 1c170f3d6caee..1b7b917625c0f 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -113,6 +113,48 @@ notable_bug_fix2 Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. _whatsnew_200.api_breaking.unsupported_datetimelike_dtype_arg: + +Construction with datetime64 or timedelta64 dtype with unsupported resolution +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +In past versions, when constructing a :class:`Series` or :class:`DataFrame` and +passing a "datetime64" or "timedelta64" dtype with unsupported resolution +(i.e. anything other than "ns"), pandas would silently replace the given dtype +with its nanosecond analogue: + +*Previous behavior*: + +.. code-block:: ipython + + In [5]: pd.Series(["2016-01-01"], dtype="datetime64[s]") + Out[5]: + 0 2016-01-01 + dtype: datetime64[ns] + + In [6] pd.Series(["2016-01-01"], dtype="datetime64[D]") + Out[6]: + 0 2016-01-01 + dtype: datetime64[ns] + +In pandas 2.0 we support resolutions "s", "ms", "us", and "ns". When passing +a supported dtype (e.g. "datetime64[s]"), the result now has exactly +the requested dtype: + +*New behavior*: + +.. ipython:: python + + pd.Series(["2016-01-01"], dtype="datetime64[s]") + +With an un-supported dtype, pandas now raises instead of silently swapping in +a supported dtype: + +*New behavior*: + +.. ipython:: python + :okexcept: + + pd.Series(["2016-01-01"], dtype="datetime64[D]") .. _whatsnew_200.api_breaking.astype_to_unsupported_datetimelike: diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 1c7effa93f4ef..c470170724a2c 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -29,10 +29,8 @@ Timedelta, Timestamp, astype_overflowsafe, - get_supported_reso, get_unit_from_dtype, is_supported_unit, - npy_unit_to_abbrev, ) from pandas._libs.tslibs.timedeltas import array_to_timedelta64 from pandas._typing import ( @@ -1336,17 +1334,19 @@ def _ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj: """ Convert dtypes with granularity less than nanosecond to nanosecond - >>> _ensure_nanosecond_dtype(np.dtype("M8[D]")) - dtype('>> _ensure_nanosecond_dtype(np.dtype("M8[us]")) dtype('>> _ensure_nanosecond_dtype(np.dtype("M8[D]")) + Traceback (most recent call last): + ... + TypeError: dtype=datetime64[D] is not supported. Supported resolutions are 's', 'ms', 'us', and 'ns' + >>> _ensure_nanosecond_dtype(np.dtype("m8[ps]")) Traceback (most recent call last): ... - TypeError: cannot convert timedeltalike to dtype [timedelta64[ps]] - """ + TypeError: dtype=timedelta64[ps] is not supported. Supported resolutions are 's', 'ms', 'us', and 'ns' + """ # noqa:E501 msg = ( f"The '{dtype.name}' dtype has no unit. " f"Please pass in '{dtype.name}[ns]' instead." @@ -1359,29 +1359,19 @@ def _ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj: # i.e. datetime64tz pass - elif dtype.kind == "M" and not is_supported_unit(get_unit_from_dtype(dtype)): - # pandas supports dtype whose granularity is less than [ns] - # e.g., [ps], [fs], [as] - if dtype <= np.dtype("M8[ns]"): - if dtype.name == "datetime64": + elif dtype.kind in ["m", "M"]: + reso = get_unit_from_dtype(dtype) + if not is_supported_unit(reso): + # pre-2.0 we would silently swap in nanos for lower-resolutions, + # raise for above-nano resolutions + if dtype.name in ["datetime64", "timedelta64"]: raise ValueError(msg) - reso = get_supported_reso(get_unit_from_dtype(dtype)) - unit = npy_unit_to_abbrev(reso) - dtype = np.dtype(f"M8[{unit}]") - else: - raise TypeError(f"cannot convert datetimelike to dtype [{dtype}]") - - elif dtype.kind == "m" and dtype != TD64NS_DTYPE: - # pandas supports dtype whose granularity is less than [ns] - # e.g., [ps], [fs], [as] - if dtype <= np.dtype("m8[ns]"): - if dtype.name == "timedelta64": - raise ValueError(msg) - reso = get_supported_reso(get_unit_from_dtype(dtype)) - unit = npy_unit_to_abbrev(reso) - dtype = np.dtype(f"m8[{unit}]") - else: - raise TypeError(f"cannot convert timedeltalike to dtype [{dtype}]") + # TODO: ValueError or TypeError? existing test + # test_constructor_generic_timestamp_bad_frequency expects TypeError + raise TypeError( + f"dtype={dtype} is not supported. Supported resolutions are 's', " + "'ms', 'us', and 'ns'" + ) return dtype diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index abb0ca5407505..1209d92ffa14a 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1665,19 +1665,23 @@ def test_constructor_generic_timestamp_no_frequency(self, dtype, request): with pytest.raises(ValueError, match=msg): Series([], dtype=dtype) - @pytest.mark.parametrize( - "dtype,msg", - [ - ("m8[ps]", "cannot convert timedeltalike"), - ("M8[ps]", "cannot convert datetimelike"), - ], - ) - def test_constructor_generic_timestamp_bad_frequency(self, dtype, msg): + @pytest.mark.parametrize("unit", ["ps", "as", "fs", "Y", "M", "W", "D", "h", "m"]) + @pytest.mark.parametrize("kind", ["m", "M"]) + def test_constructor_generic_timestamp_bad_frequency(self, kind, unit): # see gh-15524, gh-15987 + # as of 2.0 we raise on any non-supported unit rather than silently + # cast to nanos; previously we only raised for frequencies higher + # than ns + dtype = f"{kind}8[{unit}]" + msg = "dtype=.* is not supported. Supported resolutions are" with pytest.raises(TypeError, match=msg): Series([], dtype=dtype) + with pytest.raises(TypeError, match=msg): + # pre-2.0 the DataFrame cast raised but the Series case did not + DataFrame([[0]], dtype=dtype) + @pytest.mark.parametrize("dtype", [None, "uint8", "category"]) def test_constructor_range_dtype(self, dtype): # GH 16804