API: Series.astype(td64_unsupported) raise (pandas-dev#49290)

jbrockmendel · phofl · commit e8b47d9470e7 · 2022-11-09T19:05:01.000+01:00
* API: Series.astype(td64_unsupported) raise

* update docs
diff --git a/doc/source/user_guide/timedeltas.rst b/doc/source/user_guide/timedeltas.rst
@@ -236,9 +236,7 @@ Numeric reduction operation for ``timedelta64[ns]`` will return ``Timedelta`` ob
 Frequency conversion
 --------------------
 
-Timedelta Series, ``TimedeltaIndex``, and ``Timedelta`` scalars can be converted to other 'frequencies' by dividing by another timedelta,
-or by astyping to a specific timedelta type. These operations yield Series and propagate ``NaT`` -> ``nan``.
-Note that division by the NumPy scalar is true division, while astyping is equivalent of floor division.
+Timedelta Series and ``TimedeltaIndex``, and ``Timedelta`` can be converted to other frequencies by astyping to a specific timedelta dtype.
 
 .. ipython:: python
 
@@ -250,14 +248,17 @@ Note that division by the NumPy scalar is true division, while astyping is equiv
    td[3] = np.nan
    td
 
-   # to days
-   td / np.timedelta64(1, "D")
-   td.astype("timedelta64[D]")
-
    # to seconds
-   td / np.timedelta64(1, "s")
    td.astype("timedelta64[s]")
 
+For timedelta64 resolutions other than the supported "s", "ms", "us", "ns",
+an alternative is to divide by another timedelta object. Note that division by the NumPy scalar is true division, while astyping is equivalent of floor division.
+
+.. ipython:: python
+
+   # to days
+   td / np.timedelta64(1, "D")
+
    # to months (these are constant months)
    td / np.timedelta64(1, "M")
 
diff --git a/doc/source/whatsnew/v0.13.0.rst b/doc/source/whatsnew/v0.13.0.rst
@@ -532,6 +532,7 @@ Enhancements
   is frequency conversion. See :ref:`the docs<timedeltas.timedeltas_convert>` for the docs.
 
   .. ipython:: python
+     :okexcept:
 
      import datetime
      td = pd.Series(pd.date_range('20130101', periods=4)) - pd.Series(
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -100,6 +100,91 @@ notable_bug_fix2
 Backwards incompatible API changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+
+.. _whatsnew_200.api_breaking.astype_to_unsupported_datetimelike:
+
+Disallow astype conversion to non-supported datetime64/timedelta64 dtypes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+In previous versions, converting a :class:`Series` or :class:`DataFrame`
+from ``datetime64[ns]`` to a different ``datetime64[X]`` dtype would return
+with ``datetime64[ns]`` dtype instead of the requested dtype. In pandas 2.0,
+support is added for "datetime64[s]", "datetime64[ms]", and "datetime64[us]" dtypes,
+so converting to those dtypes gives exactly the requested dtype:
+
+*Previous behavior*:
+
+.. ipython:: python
+
+   idx = pd.date_range("2016-01-01", periods=3)
+   ser = pd.Series(idx)
+
+*Previous behavior*:
+
+.. code-block:: ipython
+
+   In [4]: ser.astype("datetime64[s]")
+   Out[4]:
+   0   2016-01-01
+   1   2016-01-02
+   2   2016-01-03
+   dtype: datetime64[ns]
+
+With the new behavior, we get exactly the requested dtype:
+
+*New behavior*:
+
+.. ipython:: python
+
+   ser.astype("datetime64[s]")
+
+For non-supported resolutions e.g. "datetime64[D]", we raise instead of silently
+ignoring the requested dtype:
+
+*New behavior*:
+
+.. ipython:: python
+   :okexcept:
+
+   ser.astype("datetime64[D]")
+
+For conversion from ``timedelta64[ns]`` dtypes, the old behavior converted
+to a floating point format.
+
+*Previous behavior*:
+
+.. ipython:: python
+
+   idx = pd.timedelta_range("1 Day", periods=3)
+   ser = pd.Series(idx)
+
+*Previous behavior*:
+
+.. code-block:: ipython
+
+   In [7]: ser.astype("timedelta64[s]")
+   Out[7]:
+   0     86400.0
+   1    172800.0
+   2    259200.0
+   dtype: float64
+
+   In [8]: ser.astype("timedelta64[D]")
+   Out[8]:
+   0    1.0
+   1    2.0
+   2    3.0
+   dtype: float64
+
+The new behavior, as for datetime64, either gives exactly the requested dtype or raises:
+
+*New behavior*:
+
+.. ipython:: python
+   :okexcept:
+
+   ser.astype("timedelta64[s]")
+   ser.astype("timedelta64[D]")
+
 .. _whatsnew_200.api_breaking.deps:
 
 Increased minimum versions for dependencies
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
@@ -45,7 +45,6 @@
 from pandas.compat.numpy import function as nv
 from pandas.util._validators import validate_endpoints
 
-from pandas.core.dtypes.astype import astype_td64_unit_conversion
 from pandas.core.dtypes.common import (
     TD64NS_DTYPE,
     is_dtype_equal,
@@ -330,8 +329,11 @@ def astype(self, dtype, copy: bool = True):
                 return type(self)._simple_new(
                     res_values, dtype=res_values.dtype, freq=self.freq
                 )
-
-            return astype_td64_unit_conversion(self._ndarray, dtype, copy=copy)
+            else:
+                raise ValueError(
+                    f"Cannot convert from {self.dtype} to {dtype}. "
+                    "Supported resolutions are 's', 'ms', 'us', 'ns'"
+                )
 
         return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy=copy)
 
diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py
@@ -13,11 +13,6 @@
 import numpy as np
 
 from pandas._libs import lib
-from pandas._libs.tslibs import (
-    get_unit_from_dtype,
-    is_supported_unit,
-    is_unitless,
-)
 from pandas._libs.tslibs.timedeltas import array_to_timedelta64
 from pandas._typing import (
     ArrayLike,
@@ -131,12 +126,10 @@ def astype_nansafe(
         elif dtype.kind == "m":
             # give the requested dtype for supported units (s, ms, us, ns)
             #  and doing the old convert-to-float behavior otherwise.
-            if is_supported_unit(get_unit_from_dtype(arr.dtype)):
-                from pandas.core.construction import ensure_wrapped_if_datetimelike
+            from pandas.core.construction import ensure_wrapped_if_datetimelike
 
-                arr = ensure_wrapped_if_datetimelike(arr)
-                return arr.astype(dtype, copy=copy)
-            return astype_td64_unit_conversion(arr, dtype, copy=copy)
+            arr = ensure_wrapped_if_datetimelike(arr)
+            return arr.astype(dtype, copy=copy)
 
         raise TypeError(f"cannot astype a timedelta from [{arr.dtype}] to [{dtype}]")
 
@@ -291,20 +284,6 @@ def astype_array_safe(
         # Ensure we don't end up with a PandasArray
         dtype = dtype.numpy_dtype
 
-    if (
-        is_datetime64_dtype(values.dtype)
-        # need to do np.dtype check instead of is_datetime64_dtype
-        #  otherwise pyright complains
-        and isinstance(dtype, np.dtype)
-        and dtype.kind == "M"
-        and not is_unitless(dtype)
-        and not is_dtype_equal(dtype, values.dtype)
-        and not is_supported_unit(get_unit_from_dtype(dtype))
-    ):
-        # Supported units we handle in DatetimeArray.astype; but that raises
-        #  on non-supported units, so we handle that here.
-        return np.asarray(values).astype(dtype)
-
     try:
         new_values = astype_array(values, dtype, copy=copy)
     except (ValueError, TypeError):
@@ -316,36 +295,3 @@ def astype_array_safe(
             raise
 
     return new_values
-
-
-def astype_td64_unit_conversion(
-    values: np.ndarray, dtype: np.dtype, copy: bool
-) -> np.ndarray:
-    """
-    By pandas convention, converting to non-nano timedelta64
-    returns an int64-dtyped array with ints representing multiples
-    of the desired timedelta unit.  This is essentially division.
-
-    Parameters
-    ----------
-    values : np.ndarray[timedelta64[ns]]
-    dtype : np.dtype
-        timedelta64 with unit not-necessarily nano
-    copy : bool
-
-    Returns
-    -------
-    np.ndarray
-    """
-    if is_dtype_equal(values.dtype, dtype):
-        if copy:
-            return values.copy()
-        return values
-
-    # otherwise we are converting to non-nano
-    result = values.astype(dtype, copy=False)  # avoid double-copying
-    result = result.astype(np.float64)
-
-    mask = isna(values)
-    np.putmask(result, mask, np.nan)
-    return result
diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
@@ -1868,8 +1868,8 @@ def test_is_timedelta(self):
         assert is_timedelta64_ns_dtype(tdi.astype("timedelta64[ns]"))
 
         # Conversion to Int64Index:
-        assert not is_timedelta64_ns_dtype(tdi.astype("timedelta64"))
-        assert not is_timedelta64_ns_dtype(tdi.astype("timedelta64[h]"))
+        assert not is_timedelta64_ns_dtype(Index([], dtype=np.float64))
+        assert not is_timedelta64_ns_dtype(Index([], dtype=np.int64))
 
 
 class TestIsScalar:
diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py
@@ -44,7 +44,8 @@ def test_from_records_with_datetimes(self):
         dtypes = [("EXPIRY", "<M8[m]")]
         recarray = np.core.records.fromarrays(arrdata, dtype=dtypes)
         result = DataFrame.from_records(recarray)
-        expected["EXPIRY"] = expected["EXPIRY"].astype("M8[m]")
+        # we get the closest supported unit, "s"
+        expected["EXPIRY"] = expected["EXPIRY"].astype("M8[s]")
         tm.assert_frame_equal(result, expected)
 
     def test_from_records_sequencelike(self):
diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py
@@ -418,14 +418,28 @@ def test_astype_to_datetime_unit(self, unit):
         idx = pd.Index(ser)
         dta = ser._values
 
-        result = df.astype(dtype)
-
         if unit in ["ns", "us", "ms", "s"]:
             # GH#48928
             exp_dtype = dtype
+            result = df.astype(dtype)
         else:
             # we use the nearest supported dtype (i.e. M8[s])
             exp_dtype = "M8[s]"
+            msg = rf"Cannot cast DatetimeArray to dtype datetime64\[{unit}\]"
+            with pytest.raises(TypeError, match=msg):
+                df.astype(dtype)
+
+            with pytest.raises(TypeError, match=msg):
+                ser.astype(dtype)
+
+            with pytest.raises(TypeError, match=msg.replace("Array", "Index")):
+                idx.astype(dtype)
+
+            with pytest.raises(TypeError, match=msg):
+                dta.astype(dtype)
+
+            return
+
         # TODO(2.0): once DataFrame constructor doesn't cast ndarray inputs.
         #  can simplify this
         exp_values = arr.astype(exp_dtype)
@@ -437,32 +451,22 @@ def test_astype_to_datetime_unit(self, unit):
 
         tm.assert_frame_equal(result, exp_df)
 
-        # TODO(2.0): make Series/DataFrame raise like Index and DTA?
         res_ser = ser.astype(dtype)
         exp_ser = exp_df.iloc[:, 0]
         assert exp_ser.dtype == exp_dtype
         tm.assert_series_equal(res_ser, exp_ser)
 
-        if unit in ["ns", "us", "ms", "s"]:
-            exp_dta = exp_ser._values
+        exp_dta = exp_ser._values
 
-            res_index = idx.astype(dtype)
-            # TODO(2.0): should be able to just call pd.Index(exp_ser)
-            exp_index = pd.DatetimeIndex._simple_new(exp_dta, name=idx.name)
-            assert exp_index.dtype == exp_dtype
-            tm.assert_index_equal(res_index, exp_index)
+        res_index = idx.astype(dtype)
+        # TODO(2.0): should be able to just call pd.Index(exp_ser)
+        exp_index = pd.DatetimeIndex._simple_new(exp_dta, name=idx.name)
+        assert exp_index.dtype == exp_dtype
+        tm.assert_index_equal(res_index, exp_index)
 
-            res_dta = dta.astype(dtype)
-            assert exp_dta.dtype == exp_dtype
-            tm.assert_extension_array_equal(res_dta, exp_dta)
-        else:
-            msg = rf"Cannot cast DatetimeIndex to dtype datetime64\[{unit}\]"
-            with pytest.raises(TypeError, match=msg):
-                idx.astype(dtype)
-
-            msg = rf"Cannot cast DatetimeArray to dtype datetime64\[{unit}\]"
-            with pytest.raises(TypeError, match=msg):
-                dta.astype(dtype)
+        res_dta = dta.astype(dtype)
+        assert exp_dta.dtype == exp_dtype
+        tm.assert_extension_array_equal(res_dta, exp_dta)
 
     @pytest.mark.parametrize("unit", ["ns"])
     def test_astype_to_timedelta_unit_ns(self, unit):
@@ -483,22 +487,35 @@ def test_astype_to_timedelta_unit(self, unit):
         dtype = f"m8[{unit}]"
         arr = np.array([[1, 2, 3]], dtype=dtype)
         df = DataFrame(arr)
+        ser = df.iloc[:, 0]
+        tdi = pd.Index(ser)
+        tda = tdi._values
+
         if unit in ["us", "ms", "s"]:
             assert (df.dtypes == dtype).all()
+            result = df.astype(dtype)
         else:
             # We get the nearest supported unit, i.e. "s"
             assert (df.dtypes == "m8[s]").all()
 
-        result = df.astype(dtype)
-        if unit in ["m", "h", "D"]:
-            # We don't support these, so we use the pre-2.0 logic to convert to float
-            #  (xref GH#48979)
-
-            expected = DataFrame(df.values.astype(dtype).astype(float))
-        else:
-            # The conversion is a no-op, so we just get a copy
-            expected = df
+            msg = (
+                rf"Cannot convert from timedelta64\[s\] to timedelta64\[{unit}\]. "
+                "Supported resolutions are 's', 'ms', 'us', 'ns'"
+            )
+            with pytest.raises(ValueError, match=msg):
+                df.astype(dtype)
+            with pytest.raises(ValueError, match=msg):
+                ser.astype(dtype)
+            with pytest.raises(ValueError, match=msg):
+                tdi.astype(dtype)
+            with pytest.raises(ValueError, match=msg):
+                tda.astype(dtype)
+
+            return
 
+        result = df.astype(dtype)
+        # The conversion is a no-op, so we just get a copy
+        expected = df
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"])
diff --git a/pandas/tests/indexes/timedeltas/methods/test_astype.py b/pandas/tests/indexes/timedeltas/methods/test_astype.py
diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py