Skip to content

Commit 67e8c4c

Browse files
authored
ENH: DTI/DTA.astype support non-nano (#47579)
* ENH: DTI/DTA.astype support non-nano * whatsnew * GH ref * pyright fixup
1 parent 700ef33 commit 67e8c4c

File tree

12 files changed

+97
-14
lines changed

12 files changed

+97
-14
lines changed

doc/source/whatsnew/v1.5.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,9 @@ Other enhancements
275275
- :class:`.DataError`, :class:`.SpecificationError`, :class:`.SettingWithCopyError`, :class:`.SettingWithCopyWarning`, :class:`.NumExprClobberingError`, :class:`.UndefinedVariableError`, and :class:`.IndexingError` are now exposed in ``pandas.errors`` (:issue:`27656`)
276276
- Added ``check_like`` argument to :func:`testing.assert_series_equal` (:issue:`47247`)
277277
- Allow reading compressed SAS files with :func:`read_sas` (e.g., ``.sas7bdat.gz`` files)
278+
- :meth:`DatetimeIndex.astype` now supports casting timezone-naive indexes to ``datetime64[s]``, ``datetime64[ms]``, and ``datetime64[us]``, and timezone-aware indexes to the corresponding ``datetime64[unit, tzname]`` dtypes (:issue:`47579`)
278279
- :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`)
280+
-
279281

280282
.. ---------------------------------------------------------------------------
281283
.. _whatsnew_150.notable_bug_fixes:

pandas/_libs/tslibs/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,14 @@
3030
"get_unit_from_dtype",
3131
"periods_per_day",
3232
"periods_per_second",
33+
"is_supported_unit",
3334
]
3435

3536
from pandas._libs.tslibs import dtypes
3637
from pandas._libs.tslibs.conversion import localize_pydatetime
3738
from pandas._libs.tslibs.dtypes import (
3839
Resolution,
40+
is_supported_unit,
3941
periods_per_day,
4042
periods_per_second,
4143
)

pandas/_libs/tslibs/dtypes.pyi

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ _period_code_map: dict[str, int]
77

88
def periods_per_day(reso: int) -> int: ...
99
def periods_per_second(reso: int) -> int: ...
10+
def is_supported_unit(reso: int) -> bool: ...
1011

1112
class PeriodDtypeBase:
1213
_dtype_code: int # PeriodDtypeCode

pandas/_libs/tslibs/dtypes.pyx

+9
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,15 @@ class NpyDatetimeUnit(Enum):
277277
NPY_FR_GENERIC = NPY_DATETIMEUNIT.NPY_FR_GENERIC
278278

279279

280+
def is_supported_unit(NPY_DATETIMEUNIT reso):
281+
return (
282+
reso == NPY_DATETIMEUNIT.NPY_FR_ns
283+
or reso == NPY_DATETIMEUNIT.NPY_FR_us
284+
or reso == NPY_DATETIMEUNIT.NPY_FR_ms
285+
or reso == NPY_DATETIMEUNIT.NPY_FR_s
286+
)
287+
288+
280289
cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit):
281290
if unit == NPY_DATETIMEUNIT.NPY_FR_ns or unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
282291
# generic -> default to nanoseconds

pandas/core/arrays/datetimes.py

+18-3
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
get_unit_from_dtype,
3232
ints_to_pydatetime,
3333
is_date_array_normalized,
34+
is_supported_unit,
3435
is_unitless,
3536
normalize_i8_timestamps,
3637
timezones,
@@ -603,12 +604,26 @@ def astype(self, dtype, copy: bool = True):
603604
return self.copy()
604605
return self
605606

607+
elif (
608+
self.tz is None
609+
and is_datetime64_dtype(dtype)
610+
and not is_unitless(dtype)
611+
and is_supported_unit(get_unit_from_dtype(dtype))
612+
):
613+
# unit conversion e.g. datetime64[s]
614+
res_values = astype_overflowsafe(self._ndarray, dtype, copy=True)
615+
return type(self)._simple_new(res_values, dtype=res_values.dtype)
616+
# TODO: preserve freq?
617+
606618
elif is_datetime64_ns_dtype(dtype):
607619
return astype_dt64_to_dt64tz(self, dtype, copy, via_utc=False)
608620

609-
elif self.tz is None and is_datetime64_dtype(dtype) and dtype != self.dtype:
610-
# unit conversion e.g. datetime64[s]
611-
return self._ndarray.astype(dtype)
621+
elif self.tz is not None and isinstance(dtype, DatetimeTZDtype):
622+
# tzaware unit conversion e.g. datetime64[s, UTC]
623+
np_dtype = np.dtype(dtype.str)
624+
res_values = astype_overflowsafe(self._ndarray, np_dtype, copy=copy)
625+
return type(self)._simple_new(res_values, dtype=dtype)
626+
# TODO: preserve freq?
612627

613628
elif is_period_dtype(dtype):
614629
return self.to_period(freq=dtype.freq)

pandas/core/dtypes/astype.py

+15
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import numpy as np
1616

1717
from pandas._libs import lib
18+
from pandas._libs.tslibs import is_unitless
1819
from pandas._libs.tslibs.timedeltas import array_to_timedelta64
1920
from pandas._typing import (
2021
ArrayLike,
@@ -280,6 +281,20 @@ def astype_array_safe(
280281
# Ensure we don't end up with a PandasArray
281282
dtype = dtype.numpy_dtype
282283

284+
if (
285+
is_datetime64_dtype(values.dtype)
286+
# need to do np.dtype check instead of is_datetime64_dtype
287+
# otherwise pyright complains
288+
and isinstance(dtype, np.dtype)
289+
and dtype.kind == "M"
290+
and not is_unitless(dtype)
291+
and not is_dtype_equal(dtype, values.dtype)
292+
):
293+
# unit conversion, we would re-cast to nanosecond, so this is
294+
# effectively just a copy (regardless of copy kwd)
295+
# TODO(2.0): remove special-case
296+
return values.copy()
297+
283298
try:
284299
new_values = astype_array(values, dtype, copy=copy)
285300
except (ValueError, TypeError):

pandas/core/dtypes/common.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -966,7 +966,9 @@ def is_datetime64_ns_dtype(arr_or_dtype) -> bool:
966966
tipo = get_dtype(arr_or_dtype.dtype)
967967
else:
968968
return False
969-
return tipo == DT64NS_DTYPE or getattr(tipo, "base", None) == DT64NS_DTYPE
969+
return tipo == DT64NS_DTYPE or (
970+
isinstance(tipo, DatetimeTZDtype) and tipo._unit == "ns"
971+
)
970972

971973

972974
def is_timedelta64_ns_dtype(arr_or_dtype) -> bool:

pandas/core/indexes/base.py

-10
Original file line numberDiff line numberDiff line change
@@ -1064,16 +1064,6 @@ def astype(self, dtype, copy: bool = True):
10641064
# Ensure that self.astype(self.dtype) is self
10651065
return self.copy() if copy else self
10661066

1067-
if (
1068-
self.dtype == np.dtype("M8[ns]")
1069-
and isinstance(dtype, np.dtype)
1070-
and dtype.kind == "M"
1071-
and dtype != np.dtype("M8[ns]")
1072-
):
1073-
# For now DatetimeArray supports this by unwrapping ndarray,
1074-
# but DatetimeIndex doesn't
1075-
raise TypeError(f"Cannot cast {type(self).__name__} to dtype")
1076-
10771067
values = self._data
10781068
if isinstance(values, ExtensionArray):
10791069
with rewrite_exception(type(values).__name__, type(self).__name__):

pandas/core/indexes/datetimes.py

+13
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
from pandas.core.dtypes.common import (
4949
is_datetime64_dtype,
5050
is_datetime64tz_dtype,
51+
is_dtype_equal,
5152
is_scalar,
5253
)
5354
from pandas.core.dtypes.missing import is_valid_na_for_dtype
@@ -338,6 +339,18 @@ def __new__(
338339
if copy:
339340
data = data.copy()
340341
return cls._simple_new(data, name=name)
342+
elif (
343+
isinstance(data, DatetimeArray)
344+
and freq is lib.no_default
345+
and tz is None
346+
and is_dtype_equal(data.dtype, dtype)
347+
):
348+
# Reached via Index.__new__ when we call .astype
349+
# TODO(2.0): special casing can be removed once _from_sequence_not_strict
350+
# no longer chokes on non-nano
351+
if copy:
352+
data = data.copy()
353+
return cls._simple_new(data, name=name)
341354

342355
dtarr = DatetimeArray._from_sequence_not_strict(
343356
data,

pandas/tests/arrays/test_datetimes.py

+30
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,36 @@ def test_cmp_dt64_arraylike_tznaive(self, comparison_op):
207207

208208

209209
class TestDatetimeArray:
210+
def test_astype_non_nano_tznaive(self):
211+
dti = pd.date_range("2016-01-01", periods=3)
212+
213+
res = dti.astype("M8[s]")
214+
assert res.dtype == "M8[s]"
215+
216+
dta = dti._data
217+
res = dta.astype("M8[s]")
218+
assert res.dtype == "M8[s]"
219+
assert isinstance(res, pd.core.arrays.DatetimeArray) # used to be ndarray
220+
221+
def test_astype_non_nano_tzaware(self):
222+
dti = pd.date_range("2016-01-01", periods=3, tz="UTC")
223+
224+
res = dti.astype("M8[s, US/Pacific]")
225+
assert res.dtype == "M8[s, US/Pacific]"
226+
227+
dta = dti._data
228+
res = dta.astype("M8[s, US/Pacific]")
229+
assert res.dtype == "M8[s, US/Pacific]"
230+
231+
# from non-nano to non-nano, preserving reso
232+
res2 = res.astype("M8[s, UTC]")
233+
assert res2.dtype == "M8[s, UTC]"
234+
assert not tm.shares_memory(res2, res)
235+
236+
res3 = res.astype("M8[s, UTC]", copy=False)
237+
assert res2.dtype == "M8[s, UTC]"
238+
assert tm.shares_memory(res3, res)
239+
210240
def test_astype_to_same(self):
211241
arr = DatetimeArray._from_sequence(
212242
["2000"], dtype=DatetimeTZDtype(tz="US/Central")

pandas/tests/dtypes/test_common.py

+3
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,9 @@ def test_is_datetime64_ns_dtype():
474474
pd.DatetimeIndex([1, 2, 3], dtype=np.dtype("datetime64[ns]"))
475475
)
476476

477+
# non-nano dt64tz
478+
assert not com.is_datetime64_ns_dtype(DatetimeTZDtype("us", "US/Eastern"))
479+
477480

478481
def test_is_timedelta64_ns_dtype():
479482
assert not com.is_timedelta64_ns_dtype(np.dtype("m8[ps]"))

pandas/tests/tslibs/test_api.py

+1
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ def test_namespace():
5555
"get_unit_from_dtype",
5656
"periods_per_day",
5757
"periods_per_second",
58+
"is_supported_unit",
5859
]
5960

6061
expected = set(submodules + api)

0 commit comments

Comments
 (0)