diff --git a/doc/source/user_guide/timedeltas.rst b/doc/source/user_guide/timedeltas.rst index 180de1df53f9e..318ca045847f4 100644 --- a/doc/source/user_guide/timedeltas.rst +++ b/doc/source/user_guide/timedeltas.rst @@ -236,9 +236,7 @@ Numeric reduction operation for ``timedelta64[ns]`` will return ``Timedelta`` ob Frequency conversion -------------------- -Timedelta Series, ``TimedeltaIndex``, and ``Timedelta`` scalars can be converted to other 'frequencies' by dividing by another timedelta, -or by astyping to a specific timedelta type. These operations yield Series and propagate ``NaT`` -> ``nan``. -Note that division by the NumPy scalar is true division, while astyping is equivalent of floor division. +Timedelta Series and ``TimedeltaIndex``, and ``Timedelta`` can be converted to other frequencies by astyping to a specific timedelta dtype. .. ipython:: python @@ -250,14 +248,17 @@ Note that division by the NumPy scalar is true division, while astyping is equiv td[3] = np.nan td - # to days - td / np.timedelta64(1, "D") - td.astype("timedelta64[D]") - # to seconds - td / np.timedelta64(1, "s") td.astype("timedelta64[s]") +For timedelta64 resolutions other than the supported "s", "ms", "us", "ns", +an alternative is to divide by another timedelta object. Note that division by the NumPy scalar is true division, while astyping is equivalent of floor division. + +.. ipython:: python + + # to days + td / np.timedelta64(1, "D") + # to months (these are constant months) td / np.timedelta64(1, "M") diff --git a/doc/source/whatsnew/v0.13.0.rst b/doc/source/whatsnew/v0.13.0.rst index 8265ad58f7ea3..f5d3fc572ca98 100644 --- a/doc/source/whatsnew/v0.13.0.rst +++ b/doc/source/whatsnew/v0.13.0.rst @@ -532,6 +532,7 @@ Enhancements is frequency conversion. See :ref:`the docs` for the docs. .. ipython:: python + :okexcept: import datetime td = pd.Series(pd.date_range('20130101', periods=4)) - pd.Series( diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index ff26df96d1a89..37af627f30748 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -99,6 +99,91 @@ notable_bug_fix2 Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_200.api_breaking.astype_to_unsupported_datetimelike: + +Disallow astype conversion to non-supported datetime64/timedelta64 dtypes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +In previous versions, converting a :class:`Series` or :class:`DataFrame` +from ``datetime64[ns]`` to a different ``datetime64[X]`` dtype would return +with ``datetime64[ns]`` dtype instead of the requested dtype. In pandas 2.0, +support is added for "datetime64[s]", "datetime64[ms]", and "datetime64[us]" dtypes, +so converting to those dtypes gives exactly the requested dtype: + +*Previous behavior*: + +.. ipython:: python + + idx = pd.date_range("2016-01-01", periods=3) + ser = pd.Series(idx) + +*Previous behavior*: + +.. code-block:: ipython + + In [4]: ser.astype("datetime64[s]") + Out[4]: + 0 2016-01-01 + 1 2016-01-02 + 2 2016-01-03 + dtype: datetime64[ns] + +With the new behavior, we get exactly the requested dtype: + +*New behavior*: + +.. ipython:: python + + ser.astype("datetime64[s]") + +For non-supported resolutions e.g. "datetime64[D]", we raise instead of silently +ignoring the requested dtype: + +*New behavior*: + +.. ipython:: python + :okexcept: + + ser.astype("datetime64[D]") + +For conversion from ``timedelta64[ns]`` dtypes, the old behavior converted +to a floating point format. + +*Previous behavior*: + +.. ipython:: python + + idx = pd.timedelta_range("1 Day", periods=3) + ser = pd.Series(idx) + +*Previous behavior*: + +.. code-block:: ipython + + In [7]: ser.astype("timedelta64[s]") + Out[7]: + 0 86400.0 + 1 172800.0 + 2 259200.0 + dtype: float64 + + In [8]: ser.astype("timedelta64[D]") + Out[8]: + 0 1.0 + 1 2.0 + 2 3.0 + dtype: float64 + +The new behavior, as for datetime64, either gives exactly the requested dtype or raises: + +*New behavior*: + +.. ipython:: python + :okexcept: + + ser.astype("timedelta64[s]") + ser.astype("timedelta64[D]") + .. _whatsnew_200.api_breaking.deps: Increased minimum versions for dependencies diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 92b9222cfc9bc..32c90de6f1ce9 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -45,7 +45,6 @@ from pandas.compat.numpy import function as nv from pandas.util._validators import validate_endpoints -from pandas.core.dtypes.astype import astype_td64_unit_conversion from pandas.core.dtypes.common import ( TD64NS_DTYPE, is_dtype_equal, @@ -327,8 +326,11 @@ def astype(self, dtype, copy: bool = True): return type(self)._simple_new( res_values, dtype=res_values.dtype, freq=self.freq ) - - return astype_td64_unit_conversion(self._ndarray, dtype, copy=copy) + else: + raise ValueError( + f"Cannot convert from {self.dtype} to {dtype}. " + "Supported resolutions are 's', 'ms', 'us', 'ns'" + ) return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy=copy) diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py index 718badc2e4085..d95b71a5ea890 100644 --- a/pandas/core/dtypes/astype.py +++ b/pandas/core/dtypes/astype.py @@ -13,11 +13,6 @@ import numpy as np from pandas._libs import lib -from pandas._libs.tslibs import ( - get_unit_from_dtype, - is_supported_unit, - is_unitless, -) from pandas._libs.tslibs.timedeltas import array_to_timedelta64 from pandas._typing import ( ArrayLike, @@ -132,12 +127,10 @@ def astype_nansafe( # TODO(2.0): change to use the same logic as TDA.astype, i.e. # giving the requested dtype for supported units (s, ms, us, ns) # and doing the old convert-to-float behavior otherwise. - if is_supported_unit(get_unit_from_dtype(arr.dtype)): - from pandas.core.construction import ensure_wrapped_if_datetimelike + from pandas.core.construction import ensure_wrapped_if_datetimelike - arr = ensure_wrapped_if_datetimelike(arr) - return arr.astype(dtype, copy=copy) - return astype_td64_unit_conversion(arr, dtype, copy=copy) + arr = ensure_wrapped_if_datetimelike(arr) + return arr.astype(dtype, copy=copy) raise TypeError(f"cannot astype a timedelta from [{arr.dtype}] to [{dtype}]") @@ -292,20 +285,6 @@ def astype_array_safe( # Ensure we don't end up with a PandasArray dtype = dtype.numpy_dtype - if ( - is_datetime64_dtype(values.dtype) - # need to do np.dtype check instead of is_datetime64_dtype - # otherwise pyright complains - and isinstance(dtype, np.dtype) - and dtype.kind == "M" - and not is_unitless(dtype) - and not is_dtype_equal(dtype, values.dtype) - and not is_supported_unit(get_unit_from_dtype(dtype)) - ): - # Supported units we handle in DatetimeArray.astype; but that raises - # on non-supported units, so we handle that here. - return np.asarray(values).astype(dtype) - try: new_values = astype_array(values, dtype, copy=copy) except (ValueError, TypeError): @@ -317,36 +296,3 @@ def astype_array_safe( raise return new_values - - -def astype_td64_unit_conversion( - values: np.ndarray, dtype: np.dtype, copy: bool -) -> np.ndarray: - """ - By pandas convention, converting to non-nano timedelta64 - returns an int64-dtyped array with ints representing multiples - of the desired timedelta unit. This is essentially division. - - Parameters - ---------- - values : np.ndarray[timedelta64[ns]] - dtype : np.dtype - timedelta64 with unit not-necessarily nano - copy : bool - - Returns - ------- - np.ndarray - """ - if is_dtype_equal(values.dtype, dtype): - if copy: - return values.copy() - return values - - # otherwise we are converting to non-nano - result = values.astype(dtype, copy=False) # avoid double-copying - result = result.astype(np.float64) - - mask = isna(values) - np.putmask(result, mask, np.nan) - return result diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 56c97ac7a4dc5..6955b2e7b6aca 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1861,8 +1861,8 @@ def test_is_timedelta(self): assert is_timedelta64_ns_dtype(tdi.astype("timedelta64[ns]")) # Conversion to Int64Index: - assert not is_timedelta64_ns_dtype(tdi.astype("timedelta64")) - assert not is_timedelta64_ns_dtype(tdi.astype("timedelta64[h]")) + assert not is_timedelta64_ns_dtype(Index([], dtype=np.float64)) + assert not is_timedelta64_ns_dtype(Index([], dtype=np.int64)) class TestIsScalar: diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py index b2efa0713b513..7c4ed68dfd0ef 100644 --- a/pandas/tests/frame/constructors/test_from_records.py +++ b/pandas/tests/frame/constructors/test_from_records.py @@ -44,7 +44,8 @@ def test_from_records_with_datetimes(self): dtypes = [("EXPIRY", " float64, so this cannot be done inplace, so + # timedelta64[m] -> float, so this cannot be done inplace, so # no warning df.loc[:, ("Respondent", "Duration")] = df.loc[ :, ("Respondent", "Duration") - ].astype("timedelta64[m]") + ] / Timedelta(60_000_000_000) expected = Series( [23.0, 12.0, 14.0, 36.0], index=df.index, name=("Respondent", "Duration")