Skip to content

ENH: DTI/DTA.astype support non-nano #47579

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jul 5, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,8 @@ Other enhancements
- :class:`.DataError`, :class:`.SpecificationError`, :class:`.SettingWithCopyError`, :class:`.SettingWithCopyWarning`, :class:`.NumExprClobberingError`, :class:`.UndefinedVariableError`, and :class:`.IndexingError` are now exposed in ``pandas.errors`` (:issue:`27656`)
- Added ``check_like`` argument to :func:`testing.assert_series_equal` (:issue:`47247`)
- Allow reading compressed SAS files with :func:`read_sas` (e.g., ``.sas7bdat.gz`` files)
- :meth:`DatetimeIndex.astype` now supports casting timezone-naive indexes to ``datetime64[s]``, ``datetime64[ms]``, and ``datetime64[us]``, and timezone-aware indexes to the corresponding ``datetime64[unit, tzname]`` dtypes (:issue:`47579`)


.. ---------------------------------------------------------------------------
.. _whatsnew_150.notable_bug_fixes:
Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/tslibs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,14 @@
"get_unit_from_dtype",
"periods_per_day",
"periods_per_second",
"is_supported_unit",
]

from pandas._libs.tslibs import dtypes
from pandas._libs.tslibs.conversion import localize_pydatetime
from pandas._libs.tslibs.dtypes import (
Resolution,
is_supported_unit,
periods_per_day,
periods_per_second,
)
Expand Down
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/dtypes.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ _period_code_map: dict[str, int]

def periods_per_day(reso: int) -> int: ...
def periods_per_second(reso: int) -> int: ...
def is_supported_unit(reso: int) -> bool: ...

class PeriodDtypeBase:
_dtype_code: int # PeriodDtypeCode
Expand Down
9 changes: 9 additions & 0 deletions pandas/_libs/tslibs/dtypes.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,15 @@ class NpyDatetimeUnit(Enum):
NPY_FR_GENERIC = NPY_DATETIMEUNIT.NPY_FR_GENERIC


def is_supported_unit(NPY_DATETIMEUNIT reso):
return (
reso == NPY_DATETIMEUNIT.NPY_FR_ns
or reso == NPY_DATETIMEUNIT.NPY_FR_us
or reso == NPY_DATETIMEUNIT.NPY_FR_ms
or reso == NPY_DATETIMEUNIT.NPY_FR_s
)


cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit):
if unit == NPY_DATETIMEUNIT.NPY_FR_ns or unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
# generic -> default to nanoseconds
Expand Down
21 changes: 18 additions & 3 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
iNaT,
ints_to_pydatetime,
is_date_array_normalized,
is_supported_unit,
is_unitless,
normalize_i8_timestamps,
timezones,
Expand Down Expand Up @@ -671,12 +672,26 @@ def astype(self, dtype, copy: bool = True):
return self.copy()
return self

elif (
self.tz is None
and is_datetime64_dtype(dtype)
and not is_unitless(dtype)
and is_supported_unit(get_unit_from_dtype(dtype))
):
# unit conversion e.g. datetime64[s]
res_values = astype_overflowsafe(self._ndarray, dtype, copy=True)
return type(self)._simple_new(res_values, dtype=res_values.dtype)
# TODO: preserve freq?

elif is_datetime64_ns_dtype(dtype):
return astype_dt64_to_dt64tz(self, dtype, copy, via_utc=False)

elif self.tz is None and is_datetime64_dtype(dtype) and dtype != self.dtype:
# unit conversion e.g. datetime64[s]
return self._ndarray.astype(dtype)
elif self.tz is not None and isinstance(dtype, DatetimeTZDtype):
# tzaware unit conversion e.g. datetime64[s, UTC]
np_dtype = np.dtype(dtype.str)
res_values = astype_overflowsafe(self._ndarray, np_dtype, copy=copy)
return type(self)._simple_new(res_values, dtype=dtype)
# TODO: preserve freq?

elif is_period_dtype(dtype):
return self.to_period(freq=dtype.freq)
Expand Down
15 changes: 15 additions & 0 deletions pandas/core/dtypes/astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import numpy as np

from pandas._libs import lib
from pandas._libs.tslibs import is_unitless
from pandas._libs.tslibs.timedeltas import array_to_timedelta64
from pandas._typing import (
ArrayLike,
Expand Down Expand Up @@ -280,6 +281,20 @@ def astype_array_safe(
# Ensure we don't end up with a PandasArray
dtype = dtype.numpy_dtype

if (
is_datetime64_dtype(values.dtype)
# need to do np.dtype check instead of is_datetime64_dtype
# otherwise pyright complains
and isinstance(dtype, np.dtype)
and dtype.kind == "M"
and not is_unitless(dtype)
and not is_dtype_equal(dtype, values.dtype)
):
# unit conversion, we would re-cast to nanosecond, so this is
# effectively just a copy (regardless of copy kwd)
# TODO(2.0): remove special-case
return values.copy()

try:
new_values = astype_array(values, dtype, copy=copy)
except (ValueError, TypeError):
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -966,7 +966,9 @@ def is_datetime64_ns_dtype(arr_or_dtype) -> bool:
tipo = get_dtype(arr_or_dtype.dtype)
else:
return False
return tipo == DT64NS_DTYPE or getattr(tipo, "base", None) == DT64NS_DTYPE
return tipo == DT64NS_DTYPE or (
isinstance(tipo, DatetimeTZDtype) and tipo._unit == "ns"
)


def is_timedelta64_ns_dtype(arr_or_dtype) -> bool:
Expand Down
10 changes: 0 additions & 10 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1064,16 +1064,6 @@ def astype(self, dtype, copy: bool = True):
# Ensure that self.astype(self.dtype) is self
return self.copy() if copy else self

if (
self.dtype == np.dtype("M8[ns]")
and isinstance(dtype, np.dtype)
and dtype.kind == "M"
and dtype != np.dtype("M8[ns]")
):
# For now DatetimeArray supports this by unwrapping ndarray,
# but DatetimeIndex doesn't
raise TypeError(f"Cannot cast {type(self).__name__} to dtype")

values = self._data
if isinstance(values, ExtensionArray):
with rewrite_exception(type(values).__name__, type(self).__name__):
Expand Down
13 changes: 13 additions & 0 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
from pandas.core.dtypes.common import (
is_datetime64_dtype,
is_datetime64tz_dtype,
is_dtype_equal,
is_scalar,
)
from pandas.core.dtypes.missing import is_valid_na_for_dtype
Expand Down Expand Up @@ -338,6 +339,18 @@ def __new__(
if copy:
data = data.copy()
return cls._simple_new(data, name=name)
elif (
isinstance(data, DatetimeArray)
and freq is lib.no_default
and tz is None
and is_dtype_equal(data.dtype, dtype)
):
# Reached via Index.__new__ when we call .astype
# TODO(2.0): special casing can be removed once _from_sequence_not_strict
# no longer chokes on non-nano
if copy:
data = data.copy()
return cls._simple_new(data, name=name)

dtarr = DatetimeArray._from_sequence_not_strict(
data,
Expand Down
30 changes: 30 additions & 0 deletions pandas/tests/arrays/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,36 @@ def test_cmp_dt64_arraylike_tznaive(self, comparison_op):


class TestDatetimeArray:
def test_astype_non_nano_tznaive(self):
dti = pd.date_range("2016-01-01", periods=3)

res = dti.astype("M8[s]")
assert res.dtype == "M8[s]"

dta = dti._data
res = dta.astype("M8[s]")
assert res.dtype == "M8[s]"
assert isinstance(res, pd.core.arrays.DatetimeArray) # used to be ndarray

def test_astype_non_nano_tzaware(self):
dti = pd.date_range("2016-01-01", periods=3, tz="UTC")

res = dti.astype("M8[s, US/Pacific]")
assert res.dtype == "M8[s, US/Pacific]"

dta = dti._data
res = dta.astype("M8[s, US/Pacific]")
assert res.dtype == "M8[s, US/Pacific]"

# from non-nano to non-nano, preserving reso
res2 = res.astype("M8[s, UTC]")
assert res2.dtype == "M8[s, UTC]"
assert not tm.shares_memory(res2, res)

res3 = res.astype("M8[s, UTC]", copy=False)
assert res2.dtype == "M8[s, UTC]"
assert tm.shares_memory(res3, res)

def test_astype_to_same(self):
arr = DatetimeArray._from_sequence(
["2000"], dtype=DatetimeTZDtype(tz="US/Central")
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/dtypes/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,9 @@ def test_is_datetime64_ns_dtype():
pd.DatetimeIndex([1, 2, 3], dtype=np.dtype("datetime64[ns]"))
)

# non-nano dt64tz
assert not com.is_datetime64_ns_dtype(DatetimeTZDtype("us", "US/Eastern"))


def test_is_timedelta64_ns_dtype():
assert not com.is_timedelta64_ns_dtype(np.dtype("m8[ps]"))
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/tslibs/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def test_namespace():
"get_unit_from_dtype",
"periods_per_day",
"periods_per_second",
"is_supported_unit",
]

expected = set(submodules + api)
Expand Down