Skip to content

API: .astype to non-nano return the specified dtype #48928

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.6.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,9 @@ Other API changes
^^^^^^^^^^^^^^^^^
- Passing ``nanoseconds`` greater than 999 or less than 0 in :class:`Timestamp` now raises a ``ValueError`` (:issue:`48538`, :issue:`48255`)
- :func:`read_csv`: specifying an incorrect number of columns with ``index_col`` of now raises ``ParserError`` instead of ``IndexError`` when using the c parser.
- :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting datetime64 data to any of "datetime64[s]", "datetime64[ms]", "datetime64[us]" will return an object with the given resolution instead of coercing back to "datetime64[ns]" (:issue:`48928`)
-


.. ---------------------------------------------------------------------------
.. _whatsnew_160.deprecations:

Expand Down
14 changes: 9 additions & 5 deletions pandas/core/dtypes/astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@
import numpy as np

from pandas._libs import lib
from pandas._libs.tslibs import is_unitless
from pandas._libs.tslibs import (
get_unit_from_dtype,
is_supported_unit,
is_unitless,
)
from pandas._libs.tslibs.timedeltas import array_to_timedelta64
from pandas._typing import (
ArrayLike,
Expand Down Expand Up @@ -289,11 +293,11 @@ def astype_array_safe(
and dtype.kind == "M"
and not is_unitless(dtype)
and not is_dtype_equal(dtype, values.dtype)
and not is_supported_unit(get_unit_from_dtype(dtype))
):
# unit conversion, we would re-cast to nanosecond, so this is
# effectively just a copy (regardless of copy kwd)
# TODO(2.0): remove special-case
return values.copy()
# Supported units we handle in DatetimeArray.astype; but that raises
# on non-supported units, so we handle that here.
return np.asarray(values).astype(dtype)

try:
new_values = astype_array(values, dtype, copy=copy)
Expand Down
50 changes: 48 additions & 2 deletions pandas/tests/frame/methods/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,10 +414,56 @@ def test_astype_to_datetime_unit(self, unit):
dtype = f"M8[{unit}]"
arr = np.array([[1, 2, 3]], dtype=dtype)
df = DataFrame(arr)
ser = df.iloc[:, 0]
idx = pd.Index(ser)
dta = ser._values

result = df.astype(dtype)
expected = DataFrame(arr.astype(dtype))

tm.assert_frame_equal(result, expected)
if unit in ["ns", "us", "ms", "s"]:
# GH#48928
exp_dtype = dtype
else:
# TODO(2.0): use the nearest supported dtype (i.e. M8[s]) instead
# of nanos
exp_dtype = "M8[ns]"
# TODO(2.0): once DataFrame constructor doesn't cast ndarray inputs.
# can simplify this
exp_values = arr.astype(exp_dtype)
exp_dta = pd.core.arrays.DatetimeArray._simple_new(
exp_values, dtype=exp_values.dtype
)
exp_df = DataFrame(exp_dta)
assert (exp_df.dtypes == exp_dtype).all()

tm.assert_frame_equal(result, exp_df)

# TODO(2.0): make Series/DataFrame raise like Index and DTA?
res_ser = ser.astype(dtype)
exp_ser = exp_df.iloc[:, 0]
assert exp_ser.dtype == exp_dtype
tm.assert_series_equal(res_ser, exp_ser)

if unit in ["ns", "us", "ms", "s"]:
exp_dta = exp_ser._values

res_index = idx.astype(dtype)
# TODO(2.0): should be able to just call pd.Index(exp_ser)
exp_index = pd.DatetimeIndex._simple_new(exp_dta, name=idx.name)
assert exp_index.dtype == exp_dtype
tm.assert_index_equal(res_index, exp_index)

res_dta = dta.astype(dtype)
assert exp_dta.dtype == exp_dtype
tm.assert_extension_array_equal(res_dta, exp_dta)
else:
msg = rf"Cannot cast DatetimeIndex to dtype datetime64\[{unit}\]"
with pytest.raises(TypeError, match=msg):
idx.astype(dtype)

msg = rf"Cannot cast DatetimeArray to dtype datetime64\[{unit}\]"
with pytest.raises(TypeError, match=msg):
dta.astype(dtype)

@pytest.mark.parametrize("unit", ["ns"])
def test_astype_to_timedelta_unit_ns(self, unit):
Expand Down