diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index 098750aa3a2b2..461ef370c7c88 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -117,9 +117,9 @@ Other API changes ^^^^^^^^^^^^^^^^^ - Passing ``nanoseconds`` greater than 999 or less than 0 in :class:`Timestamp` now raises a ``ValueError`` (:issue:`48538`, :issue:`48255`) - :func:`read_csv`: specifying an incorrect number of columns with ``index_col`` of now raises ``ParserError`` instead of ``IndexError`` when using the c parser. +- :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting datetime64 data to any of "datetime64[s]", "datetime64[ms]", "datetime64[us]" will return an object with the given resolution instead of coercing back to "datetime64[ns]" (:issue:`48928`) - - .. --------------------------------------------------------------------------- .. _whatsnew_160.deprecations: diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py index 6d04dd755dbfd..892b53a261b26 100644 --- a/pandas/core/dtypes/astype.py +++ b/pandas/core/dtypes/astype.py @@ -15,7 +15,11 @@ import numpy as np from pandas._libs import lib -from pandas._libs.tslibs import is_unitless +from pandas._libs.tslibs import ( + get_unit_from_dtype, + is_supported_unit, + is_unitless, +) from pandas._libs.tslibs.timedeltas import array_to_timedelta64 from pandas._typing import ( ArrayLike, @@ -289,11 +293,11 @@ def astype_array_safe( and dtype.kind == "M" and not is_unitless(dtype) and not is_dtype_equal(dtype, values.dtype) + and not is_supported_unit(get_unit_from_dtype(dtype)) ): - # unit conversion, we would re-cast to nanosecond, so this is - # effectively just a copy (regardless of copy kwd) - # TODO(2.0): remove special-case - return values.copy() + # Supported units we handle in DatetimeArray.astype; but that raises + # on non-supported units, so we handle that here. + return np.asarray(values).astype(dtype) try: new_values = astype_array(values, dtype, copy=copy) diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index 6d343de9f5d3a..7e7ce4321fb64 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -414,10 +414,56 @@ def test_astype_to_datetime_unit(self, unit): dtype = f"M8[{unit}]" arr = np.array([[1, 2, 3]], dtype=dtype) df = DataFrame(arr) + ser = df.iloc[:, 0] + idx = pd.Index(ser) + dta = ser._values + result = df.astype(dtype) - expected = DataFrame(arr.astype(dtype)) - tm.assert_frame_equal(result, expected) + if unit in ["ns", "us", "ms", "s"]: + # GH#48928 + exp_dtype = dtype + else: + # TODO(2.0): use the nearest supported dtype (i.e. M8[s]) instead + # of nanos + exp_dtype = "M8[ns]" + # TODO(2.0): once DataFrame constructor doesn't cast ndarray inputs. + # can simplify this + exp_values = arr.astype(exp_dtype) + exp_dta = pd.core.arrays.DatetimeArray._simple_new( + exp_values, dtype=exp_values.dtype + ) + exp_df = DataFrame(exp_dta) + assert (exp_df.dtypes == exp_dtype).all() + + tm.assert_frame_equal(result, exp_df) + + # TODO(2.0): make Series/DataFrame raise like Index and DTA? + res_ser = ser.astype(dtype) + exp_ser = exp_df.iloc[:, 0] + assert exp_ser.dtype == exp_dtype + tm.assert_series_equal(res_ser, exp_ser) + + if unit in ["ns", "us", "ms", "s"]: + exp_dta = exp_ser._values + + res_index = idx.astype(dtype) + # TODO(2.0): should be able to just call pd.Index(exp_ser) + exp_index = pd.DatetimeIndex._simple_new(exp_dta, name=idx.name) + assert exp_index.dtype == exp_dtype + tm.assert_index_equal(res_index, exp_index) + + res_dta = dta.astype(dtype) + assert exp_dta.dtype == exp_dtype + tm.assert_extension_array_equal(res_dta, exp_dta) + else: + msg = rf"Cannot cast DatetimeIndex to dtype datetime64\[{unit}\]" + with pytest.raises(TypeError, match=msg): + idx.astype(dtype) + + msg = rf"Cannot cast DatetimeArray to dtype datetime64\[{unit}\]" + with pytest.raises(TypeError, match=msg): + dta.astype(dtype) @pytest.mark.parametrize("unit", ["ns"]) def test_astype_to_timedelta_unit_ns(self, unit):