Skip to content

ENH: DatetimeTZDtype support non-nano #47120

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jun 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions pandas/_libs/tslibs/dtypes.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,19 @@ class Resolution(Enum):
def get_reso_from_freqstr(cls, freq: str) -> Resolution: ...
@property
def attr_abbrev(self) -> str: ...

class NpyDatetimeUnit(Enum):
NPY_FR_Y: int
NPY_FR_M: int
NPY_FR_W: int
NPY_FR_D: int
NPY_FR_h: int
NPY_FR_m: int
NPY_FR_s: int
NPY_FR_ms: int
NPY_FR_us: int
NPY_FR_ns: int
NPY_FR_ps: int
NPY_FR_fs: int
NPY_FR_as: int
NPY_FR_GENERIC: int
20 changes: 20 additions & 0 deletions pandas/_libs/tslibs/dtypes.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,26 @@ class Resolution(Enum):
return cls.from_attrname(attr_name)


class NpyDatetimeUnit(Enum):
"""
Python-space analogue to NPY_DATETIMEUNIT.
"""
NPY_FR_Y = NPY_DATETIMEUNIT.NPY_FR_Y
NPY_FR_M = NPY_DATETIMEUNIT.NPY_FR_M
NPY_FR_W = NPY_DATETIMEUNIT.NPY_FR_W
NPY_FR_D = NPY_DATETIMEUNIT.NPY_FR_D
NPY_FR_h = NPY_DATETIMEUNIT.NPY_FR_h
NPY_FR_m = NPY_DATETIMEUNIT.NPY_FR_m
NPY_FR_s = NPY_DATETIMEUNIT.NPY_FR_s
NPY_FR_ms = NPY_DATETIMEUNIT.NPY_FR_ms
NPY_FR_us = NPY_DATETIMEUNIT.NPY_FR_us
NPY_FR_ns = NPY_DATETIMEUNIT.NPY_FR_ns
NPY_FR_ps = NPY_DATETIMEUNIT.NPY_FR_ps
NPY_FR_fs = NPY_DATETIMEUNIT.NPY_FR_fs
NPY_FR_as = NPY_DATETIMEUNIT.NPY_FR_as
NPY_FR_GENERIC = NPY_DATETIMEUNIT.NPY_FR_GENERIC


cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit):
if unit == NPY_DATETIMEUNIT.NPY_FR_ns or unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
# generic -> default to nanoseconds
Expand Down
24 changes: 20 additions & 4 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -670,14 +670,17 @@ class DatetimeTZDtype(PandasExtensionDtype):

type: type[Timestamp] = Timestamp
kind: str_type = "M"
str = "|M8[ns]"
num = 101
base = np.dtype("M8[ns]")
base = np.dtype("M8[ns]") # TODO: depend on reso?
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If base is analogous to "default" I think M8[ns] is a reasonable default.

Copy link
Member Author

@jbrockmendel jbrockmendel Jun 1, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agreed but not clear thats what 'base' means. would be good to have that documented somewhere.

The alternative (that ive put together for (Timestamp|Timedelta).(min|max|resolution)) is a property-like that assumes nanoseconds when accessed on the class and is instance-specific when accessed on an instance

na_value = NaT
_metadata = ("unit", "tz")
_match = re.compile(r"(datetime64|M8)\[(?P<unit>.+), (?P<tz>.+)\]")
_cache_dtypes: dict[str_type, PandasExtensionDtype] = {}

@cache_readonly
def str(self):
return f"|M8[{self._unit}]"

def __init__(self, unit: str_type | DatetimeTZDtype = "ns", tz=None) -> None:
if isinstance(unit, DatetimeTZDtype):
# error: "str" has no attribute "tz"
Expand All @@ -696,8 +699,8 @@ def __init__(self, unit: str_type | DatetimeTZDtype = "ns", tz=None) -> None:
"'DatetimeTZDtype.construct_from_string()' instead."
)
raise ValueError(msg)
else:
raise ValueError("DatetimeTZDtype only supports ns units")
if unit not in ["s", "ms", "us", "ns"]:
raise ValueError("DatetimeTZDtype only supports s, ms, us, ns units")

if tz:
tz = timezones.maybe_get_tz(tz)
Expand All @@ -710,6 +713,19 @@ def __init__(self, unit: str_type | DatetimeTZDtype = "ns", tz=None) -> None:
self._unit = unit
self._tz = tz

@cache_readonly
def _reso(self) -> int:
"""
The NPY_DATETIMEUNIT corresponding to this dtype's resolution.
"""
reso = {
"s": dtypes.NpyDatetimeUnit.NPY_FR_s,
"ms": dtypes.NpyDatetimeUnit.NPY_FR_ms,
"us": dtypes.NpyDatetimeUnit.NPY_FR_us,
"ns": dtypes.NpyDatetimeUnit.NPY_FR_ns,
}[self._unit]
return reso.value

@property
def unit(self) -> str_type:
"""
Expand Down
15 changes: 12 additions & 3 deletions pandas/tests/dtypes/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import pytest
import pytz

from pandas._libs.tslibs.dtypes import NpyDatetimeUnit

from pandas.core.dtypes.base import _registry as registry
from pandas.core.dtypes.common import (
is_bool_dtype,
Expand Down Expand Up @@ -263,10 +265,17 @@ def test_hash_vs_equality(self, dtype):
assert dtype2 != dtype4
assert hash(dtype2) != hash(dtype4)

def test_construction(self):
msg = "DatetimeTZDtype only supports ns units"
def test_construction_non_nanosecond(self):
res = DatetimeTZDtype("ms", "US/Eastern")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the DatetimeTZDtype("D", "US/Eastern") raising case tested in this file as well?

Also would be good to have a full smoke test from one of the 1D constructors e.g. pd.array('2022-01-01', dtype='datetime64[us, US/Eastern]')

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also would be good to have a full smoke test from one of the 1D constructors e.g. pd.array('2022-01-01', dtype='datetime64[us, US/Eastern]')

would prefer to hold off; there's a check in DTA._simple_new that i haven't yet updated to handle non-nano dt64tz, so that'll get a dedicated (small) PR that makes a good fit for what you're suggesting

assert res.unit == "ms"
assert res._reso == NpyDatetimeUnit.NPY_FR_ms.value
assert res.str == "|M8[ms]"
assert str(res) == "datetime64[ms, US/Eastern]"

def test_day_not_supported(self):
msg = "DatetimeTZDtype only supports s, ms, us, ns units"
with pytest.raises(ValueError, match=msg):
DatetimeTZDtype("ms", "US/Eastern")
DatetimeTZDtype("D", "US/Eastern")

def test_subclass(self):
a = DatetimeTZDtype.construct_from_string("datetime64[ns, US/Eastern]")
Expand Down