Skip to content

Commit d338dad

Browse files
jbrockmendelnoatamir
authored andcommitted
API: .astype to non-nano return the specified dtype (pandas-dev#48928)
* API: .astype to non-nano return the specified dtype * GH ref
1 parent 3a00ab1 commit d338dad

File tree

3 files changed

+58
-8
lines changed

3 files changed

+58
-8
lines changed

doc/source/whatsnew/v1.6.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -117,9 +117,9 @@ Other API changes
117117
^^^^^^^^^^^^^^^^^
118118
- Passing ``nanoseconds`` greater than 999 or less than 0 in :class:`Timestamp` now raises a ``ValueError`` (:issue:`48538`, :issue:`48255`)
119119
- :func:`read_csv`: specifying an incorrect number of columns with ``index_col`` of now raises ``ParserError`` instead of ``IndexError`` when using the c parser.
120+
- :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting datetime64 data to any of "datetime64[s]", "datetime64[ms]", "datetime64[us]" will return an object with the given resolution instead of coercing back to "datetime64[ns]" (:issue:`48928`)
120121
-
121122

122-
123123
.. ---------------------------------------------------------------------------
124124
.. _whatsnew_160.deprecations:
125125

pandas/core/dtypes/astype.py

+9-5
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,11 @@
1515
import numpy as np
1616

1717
from pandas._libs import lib
18-
from pandas._libs.tslibs import is_unitless
18+
from pandas._libs.tslibs import (
19+
get_unit_from_dtype,
20+
is_supported_unit,
21+
is_unitless,
22+
)
1923
from pandas._libs.tslibs.timedeltas import array_to_timedelta64
2024
from pandas._typing import (
2125
ArrayLike,
@@ -289,11 +293,11 @@ def astype_array_safe(
289293
and dtype.kind == "M"
290294
and not is_unitless(dtype)
291295
and not is_dtype_equal(dtype, values.dtype)
296+
and not is_supported_unit(get_unit_from_dtype(dtype))
292297
):
293-
# unit conversion, we would re-cast to nanosecond, so this is
294-
# effectively just a copy (regardless of copy kwd)
295-
# TODO(2.0): remove special-case
296-
return values.copy()
298+
# Supported units we handle in DatetimeArray.astype; but that raises
299+
# on non-supported units, so we handle that here.
300+
return np.asarray(values).astype(dtype)
297301

298302
try:
299303
new_values = astype_array(values, dtype, copy=copy)

pandas/tests/frame/methods/test_astype.py

+48-2
Original file line numberDiff line numberDiff line change
@@ -414,10 +414,56 @@ def test_astype_to_datetime_unit(self, unit):
414414
dtype = f"M8[{unit}]"
415415
arr = np.array([[1, 2, 3]], dtype=dtype)
416416
df = DataFrame(arr)
417+
ser = df.iloc[:, 0]
418+
idx = pd.Index(ser)
419+
dta = ser._values
420+
417421
result = df.astype(dtype)
418-
expected = DataFrame(arr.astype(dtype))
419422

420-
tm.assert_frame_equal(result, expected)
423+
if unit in ["ns", "us", "ms", "s"]:
424+
# GH#48928
425+
exp_dtype = dtype
426+
else:
427+
# TODO(2.0): use the nearest supported dtype (i.e. M8[s]) instead
428+
# of nanos
429+
exp_dtype = "M8[ns]"
430+
# TODO(2.0): once DataFrame constructor doesn't cast ndarray inputs.
431+
# can simplify this
432+
exp_values = arr.astype(exp_dtype)
433+
exp_dta = pd.core.arrays.DatetimeArray._simple_new(
434+
exp_values, dtype=exp_values.dtype
435+
)
436+
exp_df = DataFrame(exp_dta)
437+
assert (exp_df.dtypes == exp_dtype).all()
438+
439+
tm.assert_frame_equal(result, exp_df)
440+
441+
# TODO(2.0): make Series/DataFrame raise like Index and DTA?
442+
res_ser = ser.astype(dtype)
443+
exp_ser = exp_df.iloc[:, 0]
444+
assert exp_ser.dtype == exp_dtype
445+
tm.assert_series_equal(res_ser, exp_ser)
446+
447+
if unit in ["ns", "us", "ms", "s"]:
448+
exp_dta = exp_ser._values
449+
450+
res_index = idx.astype(dtype)
451+
# TODO(2.0): should be able to just call pd.Index(exp_ser)
452+
exp_index = pd.DatetimeIndex._simple_new(exp_dta, name=idx.name)
453+
assert exp_index.dtype == exp_dtype
454+
tm.assert_index_equal(res_index, exp_index)
455+
456+
res_dta = dta.astype(dtype)
457+
assert exp_dta.dtype == exp_dtype
458+
tm.assert_extension_array_equal(res_dta, exp_dta)
459+
else:
460+
msg = rf"Cannot cast DatetimeIndex to dtype datetime64\[{unit}\]"
461+
with pytest.raises(TypeError, match=msg):
462+
idx.astype(dtype)
463+
464+
msg = rf"Cannot cast DatetimeArray to dtype datetime64\[{unit}\]"
465+
with pytest.raises(TypeError, match=msg):
466+
dta.astype(dtype)
421467

422468
@pytest.mark.parametrize("unit", ["ns"])
423469
def test_astype_to_timedelta_unit_ns(self, unit):

0 commit comments

Comments
 (0)