Skip to content

Commit a93e8df

Browse files
authored
ENH: support non-nano in DTA._box_func (#47299)
* ENH: support reso in DTA._box_func * mypy fixup
1 parent db2f241 commit a93e8df

File tree

10 files changed

+91
-34
lines changed

10 files changed

+91
-34
lines changed

pandas/_libs/tslibs/conversion.pxd

+6-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@ from numpy cimport (
88
ndarray,
99
)
1010

11-
from pandas._libs.tslibs.np_datetime cimport npy_datetimestruct
11+
from pandas._libs.tslibs.np_datetime cimport (
12+
NPY_DATETIMEUNIT,
13+
npy_datetimestruct,
14+
)
1215

1316

1417
cdef class _TSObject:
@@ -31,3 +34,5 @@ cdef int64_t get_datetime64_nanos(object val) except? -1
3134
cpdef datetime localize_pydatetime(datetime dt, tzinfo tz)
3235
cdef int64_t cast_from_unit(object ts, str unit) except? -1
3336
cpdef (int64_t, int) precision_from_unit(str unit)
37+
38+
cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso)

pandas/_libs/tslibs/conversion.pyx

+11-6
Original file line numberDiff line numberDiff line change
@@ -296,14 +296,18 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
296296
raise TypeError(f'Cannot convert input [{ts}] of type {type(ts)} to '
297297
f'Timestamp')
298298

299+
maybe_localize_tso(obj, tz, NPY_FR_ns)
300+
return obj
301+
302+
303+
cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso):
299304
if tz is not None:
300-
_localize_tso(obj, tz)
305+
_localize_tso(obj, tz, reso)
301306

302307
if obj.value != NPY_NAT:
303308
# check_overflows needs to run after _localize_tso
304-
check_dts_bounds(&obj.dts)
309+
check_dts_bounds(&obj.dts, reso)
305310
check_overflows(obj)
306-
return obj
307311

308312

309313
cdef _TSObject convert_datetime_to_tsobject(datetime ts, tzinfo tz,
@@ -548,7 +552,7 @@ cdef inline check_overflows(_TSObject obj):
548552
# ----------------------------------------------------------------------
549553
# Localization
550554

551-
cdef inline void _localize_tso(_TSObject obj, tzinfo tz):
555+
cdef inline void _localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso):
552556
"""
553557
Given the UTC nanosecond timestamp in obj.value, find the wall-clock
554558
representation of that timestamp in the given timezone.
@@ -557,6 +561,7 @@ cdef inline void _localize_tso(_TSObject obj, tzinfo tz):
557561
----------
558562
obj : _TSObject
559563
tz : tzinfo
564+
reso : NPY_DATETIMEUNIT
560565
561566
Returns
562567
-------
@@ -569,7 +574,7 @@ cdef inline void _localize_tso(_TSObject obj, tzinfo tz):
569574
cdef:
570575
int64_t local_val
571576
Py_ssize_t outpos = -1
572-
Localizer info = Localizer(tz, NPY_FR_ns)
577+
Localizer info = Localizer(tz, reso)
573578

574579
assert obj.tzinfo is None
575580

@@ -584,7 +589,7 @@ cdef inline void _localize_tso(_TSObject obj, tzinfo tz):
584589
# infer we went through a pytz path, will have outpos!=-1
585590
tz = tz._tzinfos[tz._transition_info[outpos]]
586591

587-
dt64_to_dtstruct(local_val, &obj.dts)
592+
pandas_datetime_to_datetimestruct(local_val, reso, &obj.dts)
588593

589594
obj.tzinfo = tz
590595

pandas/_libs/tslibs/timestamps.pyi

+4
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@ class Timestamp(datetime):
5959
# While Timestamp can return pd.NaT, having the constructor return
6060
# a Union with NaTType makes things awkward for users of pandas
6161
def _set_freq(self, freq: BaseOffset | None) -> None: ...
62+
@classmethod
63+
def _from_value_and_reso(
64+
cls, value: int, reso: int, tz: _tzinfo | None
65+
) -> Timestamp: ...
6266
@property
6367
def year(self) -> int: ...
6468
@property

pandas/_libs/tslibs/timestamps.pyx

+19-4
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ from pandas._libs.tslibs.conversion cimport (
5151
_TSObject,
5252
convert_datetime_to_tsobject,
5353
convert_to_tsobject,
54+
maybe_localize_tso,
5455
)
5556
from pandas._libs.tslibs.dtypes cimport (
5657
npy_unit_to_abbrev,
@@ -210,6 +211,23 @@ cdef class _Timestamp(ABCTimestamp):
210211
# -----------------------------------------------------------------
211212
# Constructors
212213

214+
@classmethod
215+
def _from_value_and_reso(cls, int64_t value, NPY_DATETIMEUNIT reso, tzinfo tz):
216+
cdef:
217+
npy_datetimestruct dts
218+
_TSObject obj = _TSObject()
219+
220+
if value == NPY_NAT:
221+
return NaT
222+
223+
obj.value = value
224+
pandas_datetime_to_datetimestruct(value, reso, &obj.dts)
225+
maybe_localize_tso(obj, tz, reso)
226+
227+
return create_timestamp_from_ts(
228+
value, obj.dts, tz=obj.tzinfo, freq=None, fold=obj.fold, reso=reso
229+
)
230+
213231
@classmethod
214232
def _from_dt64(cls, dt64: np.datetime64):
215233
# construct a Timestamp from a np.datetime64 object, keeping the
@@ -223,10 +241,7 @@ cdef class _Timestamp(ABCTimestamp):
223241

224242
reso = get_datetime64_unit(dt64)
225243
value = get_datetime64_value(dt64)
226-
pandas_datetime_to_datetimestruct(value, reso, &dts)
227-
return create_timestamp_from_ts(
228-
value, dts, tz=None, freq=None, fold=0, reso=reso
229-
)
244+
return cls._from_value_and_reso(value, reso, None)
230245

231246
# -----------------------------------------------------------------
232247

pandas/_libs/tslibs/tzconversion.pyx

+7-6
Original file line numberDiff line numberDiff line change
@@ -88,15 +88,16 @@ cdef class Localizer:
8888
# NB: using floordiv here is implicitly assuming we will
8989
# never see trans or deltas that are not an integer number
9090
# of seconds.
91+
# TODO: avoid these np.array calls
9192
if reso == NPY_DATETIMEUNIT.NPY_FR_us:
92-
trans = trans // 1_000
93-
deltas = deltas // 1_000
93+
trans = np.array(trans) // 1_000
94+
deltas = np.array(deltas) // 1_000
9495
elif reso == NPY_DATETIMEUNIT.NPY_FR_ms:
95-
trans = trans // 1_000_000
96-
deltas = deltas // 1_000_000
96+
trans = np.array(trans) // 1_000_000
97+
deltas = np.array(deltas) // 1_000_000
9798
elif reso == NPY_DATETIMEUNIT.NPY_FR_s:
98-
trans = trans // 1_000_000_000
99-
deltas = deltas // 1_000_000_000
99+
trans = np.array(trans) // 1_000_000_000
100+
deltas = np.array(deltas) // 1_000_000_000
100101
else:
101102
raise NotImplementedError(reso)
102103

pandas/_libs/tslibs/vectorized.pyi

+3-1
Original file line numberDiff line numberDiff line change
@@ -37,5 +37,7 @@ def ints_to_pydatetime(
3737
box: str = ...,
3838
) -> npt.NDArray[np.object_]: ...
3939
def tz_convert_from_utc(
40-
stamps: npt.NDArray[np.int64], tz: tzinfo | None
40+
stamps: npt.NDArray[np.int64],
41+
tz: tzinfo | None,
42+
reso: int = ...,
4143
) -> npt.NDArray[np.int64]: ...

pandas/_libs/tslibs/vectorized.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ from .tzconversion cimport Localizer
4343

4444
@cython.boundscheck(False)
4545
@cython.wraparound(False)
46-
def tz_convert_from_utc(ndarray stamps, tzinfo tz):
46+
def tz_convert_from_utc(ndarray stamps, tzinfo tz, NPY_DATETIMEUNIT reso=NPY_FR_ns):
4747
# stamps is int64_t, arbitrary ndim
4848
"""
4949
Convert the values (in i8) from UTC to tz
@@ -58,7 +58,7 @@ def tz_convert_from_utc(ndarray stamps, tzinfo tz):
5858
ndarray[int64]
5959
"""
6060
cdef:
61-
Localizer info = Localizer(tz, reso=NPY_FR_ns)
61+
Localizer info = Localizer(tz, reso=reso)
6262
int64_t utc_val, local_val
6363
Py_ssize_t pos, i, n = stamps.size
6464

pandas/core/arrays/datetimes.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -545,7 +545,7 @@ def _check_compatible_with(self, other, setitem: bool = False):
545545
def _box_func(self, x: np.datetime64) -> Timestamp | NaTType:
546546
# GH#42228
547547
value = x.view("i8")
548-
ts = Timestamp(value, tz=self.tz)
548+
ts = Timestamp._from_value_and_reso(value, reso=self._reso, tz=self.tz)
549549
# Non-overlapping identity check (left operand type: "Timestamp",
550550
# right operand type: "NaTType")
551551
if ts is not NaT: # type: ignore[comparison-overlap]
@@ -774,7 +774,7 @@ def _local_timestamps(self) -> npt.NDArray[np.int64]:
774774
if self.tz is None or timezones.is_utc(self.tz):
775775
# Avoid the copy that would be made in tzconversion
776776
return self.asi8
777-
return tz_convert_from_utc(self.asi8, self.tz)
777+
return tz_convert_from_utc(self.asi8, self.tz, reso=self._reso)
778778

779779
def tz_convert(self, tz) -> DatetimeArray:
780780
"""

pandas/tests/arrays/test_datetimes.py

+33-10
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
import numpy as np
55
import pytest
66

7+
from pandas._libs.tslibs import tz_compare
8+
from pandas._libs.tslibs.dtypes import NpyDatetimeUnit
9+
710
from pandas.core.dtypes.dtypes import DatetimeTZDtype
811

912
import pandas as pd
@@ -20,28 +23,48 @@ def unit(self, request):
2023
@pytest.fixture
2124
def reso(self, unit):
2225
"""Fixture returning datetime resolution for a given time unit"""
23-
# TODO: avoid hard-coding
24-
return {"s": 7, "ms": 8, "us": 9}[unit]
26+
return {
27+
"s": NpyDatetimeUnit.NPY_FR_s.value,
28+
"ms": NpyDatetimeUnit.NPY_FR_ms.value,
29+
"us": NpyDatetimeUnit.NPY_FR_us.value,
30+
}[unit]
31+
32+
@pytest.fixture
33+
def dtype(self, unit, tz_naive_fixture):
34+
tz = tz_naive_fixture
35+
if tz is None:
36+
return np.dtype(f"datetime64[{unit}]")
37+
else:
38+
return DatetimeTZDtype(unit=unit, tz=tz)
2539

26-
@pytest.mark.xfail(reason="_box_func is not yet patched to get reso right")
27-
def test_non_nano(self, unit, reso):
40+
def test_non_nano(self, unit, reso, dtype):
2841
arr = np.arange(5, dtype=np.int64).view(f"M8[{unit}]")
29-
dta = DatetimeArray._simple_new(arr, dtype=arr.dtype)
42+
dta = DatetimeArray._simple_new(arr, dtype=dtype)
3043

31-
assert dta.dtype == arr.dtype
44+
assert dta.dtype == dtype
3245
assert dta[0]._reso == reso
46+
assert tz_compare(dta.tz, dta[0].tz)
47+
assert (dta[0] == dta[:1]).all()
3348

3449
@pytest.mark.filterwarnings(
3550
"ignore:weekofyear and week have been deprecated:FutureWarning"
3651
)
3752
@pytest.mark.parametrize(
3853
"field", DatetimeArray._field_ops + DatetimeArray._bool_ops
3954
)
40-
def test_fields(self, unit, reso, field):
41-
dti = pd.date_range("2016-01-01", periods=55, freq="D")
42-
arr = np.asarray(dti).astype(f"M8[{unit}]")
55+
def test_fields(self, unit, reso, field, dtype):
56+
tz = getattr(dtype, "tz", None)
57+
dti = pd.date_range("2016-01-01", periods=55, freq="D", tz=tz)
58+
if tz is None:
59+
arr = np.asarray(dti).astype(f"M8[{unit}]")
60+
else:
61+
arr = np.asarray(dti.tz_convert("UTC").tz_localize(None)).astype(
62+
f"M8[{unit}]"
63+
)
4364

44-
dta = DatetimeArray._simple_new(arr, dtype=arr.dtype)
65+
dta = DatetimeArray._simple_new(arr, dtype=dtype)
66+
67+
# FIXME: assert (dti == dta).all()
4568

4669
res = getattr(dta, field)
4770
expected = getattr(dti._data, field)

pandas/tests/scalar/timestamp/test_timestamp.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -802,10 +802,12 @@ def test_comparison(self, dt64, ts):
802802

803803
def test_cmp_cross_reso(self):
804804
# numpy gets this wrong because of silent overflow
805-
dt64 = np.datetime64(106752, "D") # won't fit in M8[ns]
805+
dt64 = np.datetime64(9223372800, "s") # won't fit in M8[ns]
806806
ts = Timestamp._from_dt64(dt64)
807807

808-
other = Timestamp(dt64 - 1)
808+
# subtracting 3600*24 gives a datetime64 that _can_ fit inside the
809+
# nanosecond implementation bounds.
810+
other = Timestamp(dt64 - 3600 * 24)
809811
assert other < ts
810812
assert other.asm8 > ts.asm8 # <- numpy gets this wrong
811813
assert ts > other

0 commit comments

Comments
 (0)