From 6f5bdfcdba0f5581436d2d57cf1140e5cd1813fc Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 24 Oct 2022 14:05:19 -0700 Subject: [PATCH 1/7] API: raise on unsupported dtype instead of silently swapping --- doc/source/whatsnew/v2.0.0.rst | 42 +++++++++++++++++++++ pandas/core/dtypes/cast.py | 48 ++++++++++-------------- pandas/tests/series/test_constructors.py | 19 ++++++---- 3 files changed, 72 insertions(+), 37 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index ff26df96d1a89..9f8921ee60c83 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -99,6 +99,48 @@ notable_bug_fix2 Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. _whatsnew_200.api_breaking.unsupported_datetimelike_dtype_arg: + +Construction with datetime64 or timedelta64 dtype with unsupported resolution +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +In past versions, when constructing a :class:`Series` or :class:`DataFrame` and +passing a "datetime64" or "timedelta64" dtype with unsupported resolution +(i.e. anything other than "ns"), pandas would silently replace the given dtype +with its nanosecond analogue: + +*Previous behavior*: + +.. code-block:: ipython + + In [5]: pd.Series(["2016-01-01"], dtype="datetime64[s]") + Out[5]: + 0 2016-01-01 + dtype: datetime64[ns] + + In [6]: pd.Series(["2016-01-01"], dtype="datetime64[D]") + Out[6]: + 0 2016-01-01 + dtype: datetime64[ns] + +In pandas 2.0 we support resolutions "s", "ms", "us", and "ns". When passing +a supported dtype (e.g. "datetime64[s]"), the result now has exactly +the requested dtype: + +*New behavior*: + + In [5]: pd.Series(["2016-01-01"], dtype="datetime64[s]") + Out[5]: + 0 2016-01-01 + dtype: datetime64[s] + +With an un-supported dtype, pandas now raises instead of silently swapping in +a supported dtype: + +*New behavior*: + + In [6]: pd.Series(["2016-01-01"], dtype="datetime64[D]") + TypeError: dtype=datetime64[D] is not supported. Supported resolutions are 's', 'ms', 'us', and 'ns' + .. _whatsnew_200.api_breaking.deps: Increased minimum versions for dependencies diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 9830d22f3e2e5..6f4208da6cf80 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -31,10 +31,8 @@ Timedelta, Timestamp, astype_overflowsafe, - get_supported_reso, get_unit_from_dtype, is_supported_unit, - npy_unit_to_abbrev, ) from pandas._libs.tslibs.timedeltas import array_to_timedelta64 from pandas._typing import ( @@ -1454,17 +1452,19 @@ def _ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj: """ Convert dtypes with granularity less than nanosecond to nanosecond - >>> _ensure_nanosecond_dtype(np.dtype("M8[D]")) - dtype('>> _ensure_nanosecond_dtype(np.dtype("M8[us]")) dtype('>> _ensure_nanosecond_dtype(np.dtype("M8[D]")) + Traceback (most recent call last): + ... + TypeError: dtype=datetime64[D] is not supported. Supported resolutions are 's', 'ms', 'us', and 'ns' + >>> _ensure_nanosecond_dtype(np.dtype("m8[ps]")) Traceback (most recent call last): ... - TypeError: cannot convert timedeltalike to dtype [timedelta64[ps]] - """ + TypeError: dtype=timedelta64[ps] is not supported. Supported resolutions are 's', 'ms', 'us', and 'ns' + """ # noqa:E501 msg = ( f"The '{dtype.name}' dtype has no unit. " f"Please pass in '{dtype.name}[ns]' instead." @@ -1477,29 +1477,19 @@ def _ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj: # i.e. datetime64tz pass - elif dtype.kind == "M" and not is_supported_unit(get_unit_from_dtype(dtype)): - # pandas supports dtype whose granularity is less than [ns] - # e.g., [ps], [fs], [as] - if dtype <= np.dtype("M8[ns]"): - if dtype.name == "datetime64": + elif dtype.kind in ["m", "M"]: + reso = get_unit_from_dtype(dtype) + if not is_supported_unit(reso): + # pre-2.0 we would silently swap in nanos for lower-resolutions, + # raise for above-nano resolutions + if dtype.name == "datetime64" or dtype.name == "timedelta64": raise ValueError(msg) - reso = get_supported_reso(get_unit_from_dtype(dtype)) - unit = npy_unit_to_abbrev(reso) - dtype = np.dtype(f"M8[{unit}]") - else: - raise TypeError(f"cannot convert datetimelike to dtype [{dtype}]") - - elif dtype.kind == "m" and dtype != TD64NS_DTYPE: - # pandas supports dtype whose granularity is less than [ns] - # e.g., [ps], [fs], [as] - if dtype <= np.dtype("m8[ns]"): - if dtype.name == "timedelta64": - raise ValueError(msg) - reso = get_supported_reso(get_unit_from_dtype(dtype)) - unit = npy_unit_to_abbrev(reso) - dtype = np.dtype(f"m8[{unit}]") - else: - raise TypeError(f"cannot convert timedeltalike to dtype [{dtype}]") + # TODO: ValueError or TypeError? existing test + # test_constructor_generic_timestamp_bad_frequency expects TypeError + raise TypeError( + f"dtype={dtype} is not supported. Supported resolutions are 's', " + "'ms', 'us', and 'ns'" + ) return dtype diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 3faddfeca38bd..f636de8f491be 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1669,19 +1669,22 @@ def test_constructor_generic_timestamp_no_frequency(self, dtype, request): with pytest.raises(ValueError, match=msg): Series([], dtype=dtype) - @pytest.mark.parametrize( - "dtype,msg", - [ - ("m8[ps]", "cannot convert timedeltalike"), - ("M8[ps]", "cannot convert datetimelike"), - ], - ) - def test_constructor_generic_timestamp_bad_frequency(self, dtype, msg): + @pytest.mark.parametrize("unit", ["ps", "as", "fs", "Y", "M", "W", "D", "h", "m"]) + @pytest.mark.parametrize("kind", ["m", "M"]) + def test_constructor_generic_timestamp_bad_frequency(self, kind, unit): # see gh-15524, gh-15987 + # as of 2.0 we raise on any non-supported unit rather than silently + # cast to nanos; previously we only raised for frequencies higher + # than ns + dtype = f"{kind}8[{unit}]" + msg = "dtype=.* is not supported. Supported resolutions are" with pytest.raises(TypeError, match=msg): Series([], dtype=dtype) + with pytest.raises(TypeError, match=msg): + DataFrame([[0]], dtype=dtype) + @pytest.mark.parametrize("dtype", [None, "uint8", "category"]) def test_constructor_range_dtype(self, dtype): # GH 16804 From 5cbdcd9931b2d28d8c1cd892ecb1664ebe2d5e2f Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 24 Oct 2022 19:32:40 -0700 Subject: [PATCH 2/7] lint fixup --- pandas/core/dtypes/cast.py | 2 +- pandas/tests/series/test_constructors.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 6f4208da6cf80..805630ac5f2db 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1482,7 +1482,7 @@ def _ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj: if not is_supported_unit(reso): # pre-2.0 we would silently swap in nanos for lower-resolutions, # raise for above-nano resolutions - if dtype.name == "datetime64" or dtype.name == "timedelta64": + if dtype.name in ["datetime64", "timedelta64"]: raise ValueError(msg) # TODO: ValueError or TypeError? existing test # test_constructor_generic_timestamp_bad_frequency expects TypeError diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index f636de8f491be..078ac9a9caed8 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1683,6 +1683,7 @@ def test_constructor_generic_timestamp_bad_frequency(self, kind, unit): Series([], dtype=dtype) with pytest.raises(TypeError, match=msg): + # pre-2.0 the DataFrame cast raised but the Series case did not DataFrame([[0]], dtype=dtype) @pytest.mark.parametrize("dtype", [None, "uint8", "category"]) From a20b0d25569e30b5f1edd949613413e55d1dde42 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 25 Oct 2022 13:38:44 -0700 Subject: [PATCH 3/7] ipython blocks --- doc/source/whatsnew/v2.0.0.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 9f8921ee60c83..31fcd42fc2808 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -128,6 +128,8 @@ the requested dtype: *New behavior*: +.. ipython:: python + In [5]: pd.Series(["2016-01-01"], dtype="datetime64[s]") Out[5]: 0 2016-01-01 @@ -138,6 +140,8 @@ a supported dtype: *New behavior*: +.. ipython:: python + In [6]: pd.Series(["2016-01-01"], dtype="datetime64[D]") TypeError: dtype=datetime64[D] is not supported. Supported resolutions are 's', 'ms', 'us', and 'ns' From 371d08ede273c9251c70c736af856a23ed8eaaed Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 25 Oct 2022 20:01:31 -0700 Subject: [PATCH 4/7] okexcept --- doc/source/whatsnew/v2.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 31fcd42fc2808..40c77228dd6ef 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -141,6 +141,7 @@ a supported dtype: *New behavior*: .. ipython:: python + :okexcept: In [6]: pd.Series(["2016-01-01"], dtype="datetime64[D]") TypeError: dtype=datetime64[D] is not supported. Supported resolutions are 's', 'ms', 'us', and 'ns' From 6a152e2e25ee1c62fdbdae1b1f1f31492dce6510 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 26 Oct 2022 10:15:38 -0700 Subject: [PATCH 5/7] troubleshoto doc --- doc/source/whatsnew/v2.0.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 40c77228dd6ef..2176ec422b007 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -144,7 +144,6 @@ a supported dtype: :okexcept: In [6]: pd.Series(["2016-01-01"], dtype="datetime64[D]") - TypeError: dtype=datetime64[D] is not supported. Supported resolutions are 's', 'ms', 'us', and 'ns' .. _whatsnew_200.api_breaking.deps: From ff8a3ee0b6fa8e00fb2db4ca229695757361cc19 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 1 Nov 2022 19:40:13 -0700 Subject: [PATCH 6/7] jeffs suggestion --- doc/source/whatsnew/v2.0.0.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 9eee394589c8a..0695faffbd7f9 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -113,12 +113,12 @@ with its nanosecond analogue: .. code-block:: ipython - In [5]: pd.Series(["2016-01-01"], dtype="datetime64[s]") + pd.Series(["2016-01-01"], dtype="datetime64[s]") Out[5]: 0 2016-01-01 dtype: datetime64[ns] - In [6]: pd.Series(["2016-01-01"], dtype="datetime64[D]") + pd.Series(["2016-01-01"], dtype="datetime64[D]") Out[6]: 0 2016-01-01 dtype: datetime64[ns] @@ -144,7 +144,7 @@ a supported dtype: .. ipython:: python :okexcept: - In [6]: pd.Series(["2016-01-01"], dtype="datetime64[D]") + pd.Series(["2016-01-01"], dtype="datetime64[D]") .. _whatsnew_200.api_breaking.deps: From 37829cb42f76fc3daa9962cf57a52b445c2bac00 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 7 Nov 2022 13:18:39 -0800 Subject: [PATCH 7/7] troubleshoot docbuild --- doc/source/whatsnew/v2.0.0.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index b394f40041c15..1b7b917625c0f 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -126,12 +126,12 @@ with its nanosecond analogue: .. code-block:: ipython - pd.Series(["2016-01-01"], dtype="datetime64[s]") + In [5]: pd.Series(["2016-01-01"], dtype="datetime64[s]") Out[5]: 0 2016-01-01 dtype: datetime64[ns] - pd.Series(["2016-01-01"], dtype="datetime64[D]") + In [6] pd.Series(["2016-01-01"], dtype="datetime64[D]") Out[6]: 0 2016-01-01 dtype: datetime64[ns]