Skip to content

ENH: support non-nano in DTA._box_func #47299

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion pandas/_libs/tslibs/conversion.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@ from numpy cimport (
ndarray,
)

from pandas._libs.tslibs.np_datetime cimport npy_datetimestruct
from pandas._libs.tslibs.np_datetime cimport (
NPY_DATETIMEUNIT,
npy_datetimestruct,
)


cdef class _TSObject:
Expand All @@ -31,3 +34,5 @@ cdef int64_t get_datetime64_nanos(object val) except? -1
cpdef datetime localize_pydatetime(datetime dt, tzinfo tz)
cdef int64_t cast_from_unit(object ts, str unit) except? -1
cpdef (int64_t, int) precision_from_unit(str unit)

cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso)
17 changes: 11 additions & 6 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -296,14 +296,18 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
raise TypeError(f'Cannot convert input [{ts}] of type {type(ts)} to '
f'Timestamp')

maybe_localize_tso(obj, tz, NPY_FR_ns)
return obj


cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso):
if tz is not None:
_localize_tso(obj, tz)
_localize_tso(obj, tz, reso)

if obj.value != NPY_NAT:
# check_overflows needs to run after _localize_tso
check_dts_bounds(&obj.dts)
check_dts_bounds(&obj.dts, reso)
check_overflows(obj)
return obj


cdef _TSObject convert_datetime_to_tsobject(datetime ts, tzinfo tz,
Expand Down Expand Up @@ -548,7 +552,7 @@ cdef inline check_overflows(_TSObject obj):
# ----------------------------------------------------------------------
# Localization

cdef inline void _localize_tso(_TSObject obj, tzinfo tz):
cdef inline void _localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso):
"""
Given the UTC nanosecond timestamp in obj.value, find the wall-clock
representation of that timestamp in the given timezone.
Expand All @@ -557,6 +561,7 @@ cdef inline void _localize_tso(_TSObject obj, tzinfo tz):
----------
obj : _TSObject
tz : tzinfo
reso : NPY_DATETIMEUNIT

Returns
-------
Expand All @@ -569,7 +574,7 @@ cdef inline void _localize_tso(_TSObject obj, tzinfo tz):
cdef:
int64_t local_val
Py_ssize_t outpos = -1
Localizer info = Localizer(tz, NPY_FR_ns)
Localizer info = Localizer(tz, reso)

assert obj.tzinfo is None

Expand All @@ -584,7 +589,7 @@ cdef inline void _localize_tso(_TSObject obj, tzinfo tz):
# infer we went through a pytz path, will have outpos!=-1
tz = tz._tzinfos[tz._transition_info[outpos]]

dt64_to_dtstruct(local_val, &obj.dts)
pandas_datetime_to_datetimestruct(local_val, reso, &obj.dts)

obj.tzinfo = tz

Expand Down
4 changes: 4 additions & 0 deletions pandas/_libs/tslibs/timestamps.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ class Timestamp(datetime):
# While Timestamp can return pd.NaT, having the constructor return
# a Union with NaTType makes things awkward for users of pandas
def _set_freq(self, freq: BaseOffset | None) -> None: ...
@classmethod
def _from_value_and_reso(
cls, value: int, reso: int, tz: _tzinfo | None
) -> Timestamp: ...
@property
def year(self) -> int: ...
@property
Expand Down
23 changes: 19 additions & 4 deletions pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ from pandas._libs.tslibs.conversion cimport (
_TSObject,
convert_datetime_to_tsobject,
convert_to_tsobject,
maybe_localize_tso,
)
from pandas._libs.tslibs.dtypes cimport (
npy_unit_to_abbrev,
Expand Down Expand Up @@ -210,6 +211,23 @@ cdef class _Timestamp(ABCTimestamp):
# -----------------------------------------------------------------
# Constructors

@classmethod
def _from_value_and_reso(cls, int64_t value, NPY_DATETIMEUNIT reso, tzinfo tz):
cdef:
npy_datetimestruct dts
_TSObject obj = _TSObject()

if value == NPY_NAT:
return NaT

obj.value = value
pandas_datetime_to_datetimestruct(value, reso, &obj.dts)
maybe_localize_tso(obj, tz, reso)

return create_timestamp_from_ts(
value, obj.dts, tz=obj.tzinfo, freq=None, fold=obj.fold, reso=reso
)

@classmethod
def _from_dt64(cls, dt64: np.datetime64):
# construct a Timestamp from a np.datetime64 object, keeping the
Expand All @@ -223,10 +241,7 @@ cdef class _Timestamp(ABCTimestamp):

reso = get_datetime64_unit(dt64)
value = get_datetime64_value(dt64)
pandas_datetime_to_datetimestruct(value, reso, &dts)
return create_timestamp_from_ts(
value, dts, tz=None, freq=None, fold=0, reso=reso
)
return cls._from_value_and_reso(value, reso, None)

# -----------------------------------------------------------------

Expand Down
13 changes: 7 additions & 6 deletions pandas/_libs/tslibs/tzconversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -88,15 +88,16 @@ cdef class Localizer:
# NB: using floordiv here is implicitly assuming we will
# never see trans or deltas that are not an integer number
# of seconds.
# TODO: avoid these np.array calls
if reso == NPY_DATETIMEUNIT.NPY_FR_us:
trans = trans // 1_000
deltas = deltas // 1_000
trans = np.array(trans) // 1_000
deltas = np.array(deltas) // 1_000
elif reso == NPY_DATETIMEUNIT.NPY_FR_ms:
trans = trans // 1_000_000
deltas = deltas // 1_000_000
trans = np.array(trans) // 1_000_000
deltas = np.array(deltas) // 1_000_000
elif reso == NPY_DATETIMEUNIT.NPY_FR_s:
trans = trans // 1_000_000_000
deltas = deltas // 1_000_000_000
trans = np.array(trans) // 1_000_000_000
deltas = np.array(deltas) // 1_000_000_000
else:
raise NotImplementedError(reso)

Expand Down
4 changes: 3 additions & 1 deletion pandas/_libs/tslibs/vectorized.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -37,5 +37,7 @@ def ints_to_pydatetime(
box: str = ...,
) -> npt.NDArray[np.object_]: ...
def tz_convert_from_utc(
stamps: npt.NDArray[np.int64], tz: tzinfo | None
stamps: npt.NDArray[np.int64],
tz: tzinfo | None,
reso: int = ...,
) -> npt.NDArray[np.int64]: ...
4 changes: 2 additions & 2 deletions pandas/_libs/tslibs/vectorized.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ from .tzconversion cimport Localizer

@cython.boundscheck(False)
@cython.wraparound(False)
def tz_convert_from_utc(ndarray stamps, tzinfo tz):
def tz_convert_from_utc(ndarray stamps, tzinfo tz, NPY_DATETIMEUNIT reso=NPY_FR_ns):
# stamps is int64_t, arbitrary ndim
"""
Convert the values (in i8) from UTC to tz
Expand All @@ -58,7 +58,7 @@ def tz_convert_from_utc(ndarray stamps, tzinfo tz):
ndarray[int64]
"""
cdef:
Localizer info = Localizer(tz, reso=NPY_FR_ns)
Localizer info = Localizer(tz, reso=reso)
int64_t utc_val, local_val
Py_ssize_t pos, i, n = stamps.size

Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@ def _check_compatible_with(self, other, setitem: bool = False):
def _box_func(self, x: np.datetime64) -> Timestamp | NaTType:
# GH#42228
value = x.view("i8")
ts = Timestamp(value, tz=self.tz)
ts = Timestamp._from_value_and_reso(value, reso=self._reso, tz=self.tz)
# Non-overlapping identity check (left operand type: "Timestamp",
# right operand type: "NaTType")
if ts is not NaT: # type: ignore[comparison-overlap]
Expand Down Expand Up @@ -775,7 +775,7 @@ def _local_timestamps(self) -> npt.NDArray[np.int64]:
if self.tz is None or timezones.is_utc(self.tz):
# Avoid the copy that would be made in tzconversion
return self.asi8
return tz_convert_from_utc(self.asi8, self.tz)
return tz_convert_from_utc(self.asi8, self.tz, reso=self._reso)

def tz_convert(self, tz) -> DatetimeArray:
"""
Expand Down
43 changes: 33 additions & 10 deletions pandas/tests/arrays/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
import numpy as np
import pytest

from pandas._libs.tslibs import tz_compare
from pandas._libs.tslibs.dtypes import NpyDatetimeUnit

from pandas.core.dtypes.dtypes import DatetimeTZDtype

import pandas as pd
Expand All @@ -20,28 +23,48 @@ def unit(self, request):
@pytest.fixture
def reso(self, unit):
"""Fixture returning datetime resolution for a given time unit"""
# TODO: avoid hard-coding
return {"s": 7, "ms": 8, "us": 9}[unit]
return {
"s": NpyDatetimeUnit.NPY_FR_s.value,
"ms": NpyDatetimeUnit.NPY_FR_ms.value,
"us": NpyDatetimeUnit.NPY_FR_us.value,
}[unit]

@pytest.fixture
def dtype(self, unit, tz_naive_fixture):
tz = tz_naive_fixture
if tz is None:
return np.dtype(f"datetime64[{unit}]")
else:
return DatetimeTZDtype(unit=unit, tz=tz)

@pytest.mark.xfail(reason="_box_func is not yet patched to get reso right")
def test_non_nano(self, unit, reso):
def test_non_nano(self, unit, reso, dtype):
arr = np.arange(5, dtype=np.int64).view(f"M8[{unit}]")
dta = DatetimeArray._simple_new(arr, dtype=arr.dtype)
dta = DatetimeArray._simple_new(arr, dtype=dtype)

assert dta.dtype == arr.dtype
assert dta.dtype == dtype
assert dta[0]._reso == reso
assert tz_compare(dta.tz, dta[0].tz)
assert (dta[0] == dta[:1]).all()

@pytest.mark.filterwarnings(
"ignore:weekofyear and week have been deprecated:FutureWarning"
)
@pytest.mark.parametrize(
"field", DatetimeArray._field_ops + DatetimeArray._bool_ops
)
def test_fields(self, unit, reso, field):
dti = pd.date_range("2016-01-01", periods=55, freq="D")
arr = np.asarray(dti).astype(f"M8[{unit}]")
def test_fields(self, unit, reso, field, dtype):
tz = getattr(dtype, "tz", None)
dti = pd.date_range("2016-01-01", periods=55, freq="D", tz=tz)
if tz is None:
arr = np.asarray(dti).astype(f"M8[{unit}]")
else:
arr = np.asarray(dti.tz_convert("UTC").tz_localize(None)).astype(
f"M8[{unit}]"
)

dta = DatetimeArray._simple_new(arr, dtype=arr.dtype)
dta = DatetimeArray._simple_new(arr, dtype=dtype)

# FIXME: assert (dti == dta).all()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth xfailing?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not really; im already working on the next branch that sits on top of this


res = getattr(dta, field)
expected = getattr(dti._data, field)
Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/scalar/timestamp/test_timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -802,10 +802,12 @@ def test_comparison(self, dt64, ts):

def test_cmp_cross_reso(self):
# numpy gets this wrong because of silent overflow
dt64 = np.datetime64(106752, "D") # won't fit in M8[ns]
dt64 = np.datetime64(9223372800, "s") # won't fit in M8[ns]
ts = Timestamp._from_dt64(dt64)

other = Timestamp(dt64 - 1)
# subtracting 3600*24 gives a datetime64 that _can_ fit inside the
# nanosecond implementation bounds.
other = Timestamp(dt64 - 3600 * 24)
assert other < ts
assert other.asm8 > ts.asm8 # <- numpy gets this wrong
assert ts > other
Expand Down