Skip to content

Commit d0e3820

Browse files
authored
ENH: initial support for non-nano Timestamp (#46839)
1 parent f706fc9 commit d0e3820

File tree

5 files changed

+250
-17
lines changed

5 files changed

+250
-17
lines changed

pandas/_libs/tslibs/timestamps.pxd

+10-2
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,26 @@ from cpython.datetime cimport (
55
from numpy cimport int64_t
66

77
from pandas._libs.tslibs.base cimport ABCTimestamp
8-
from pandas._libs.tslibs.np_datetime cimport npy_datetimestruct
8+
from pandas._libs.tslibs.np_datetime cimport (
9+
NPY_DATETIMEUNIT,
10+
npy_datetimestruct,
11+
)
912
from pandas._libs.tslibs.offsets cimport BaseOffset
1013

1114

1215
cdef _Timestamp create_timestamp_from_ts(int64_t value,
1316
npy_datetimestruct dts,
14-
tzinfo tz, BaseOffset freq, bint fold)
17+
tzinfo tz,
18+
BaseOffset freq,
19+
bint fold,
20+
NPY_DATETIMEUNIT reso=*)
1521

1622

1723
cdef class _Timestamp(ABCTimestamp):
1824
cdef readonly:
1925
int64_t value, nanosecond
2026
BaseOffset _freq
27+
NPY_DATETIMEUNIT _reso
2128

2229
cdef bint _get_start_end_field(self, str field, freq)
2330
cdef _get_date_name_field(self, str field, object locale)
@@ -29,3 +36,4 @@ cdef class _Timestamp(ABCTimestamp):
2936
int op) except -1
3037
cpdef void _set_freq(self, freq)
3138
cdef _warn_on_field_deprecation(_Timestamp self, freq, str field)
39+
cdef bint _compare_mismatched_resos(_Timestamp self, _Timestamp other, int op)

pandas/_libs/tslibs/timestamps.pyx

+119-15
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ from pandas._libs.tslibs.conversion cimport (
5252
convert_datetime_to_tsobject,
5353
convert_to_tsobject,
5454
)
55+
from pandas._libs.tslibs.dtypes cimport npy_unit_to_abbrev
5556
from pandas._libs.tslibs.util cimport (
5657
is_array,
5758
is_datetime64_object,
@@ -72,10 +73,16 @@ from pandas._libs.tslibs.nattype cimport (
7273
c_NaT as NaT,
7374
)
7475
from pandas._libs.tslibs.np_datetime cimport (
76+
NPY_DATETIMEUNIT,
77+
NPY_FR_ns,
7578
check_dts_bounds,
79+
cmp_dtstructs,
7680
cmp_scalar,
7781
dt64_to_dtstruct,
82+
get_datetime64_unit,
83+
get_datetime64_value,
7884
npy_datetimestruct,
85+
pandas_datetime_to_datetimestruct,
7986
pydatetime_to_dt64,
8087
)
8188

@@ -114,24 +121,39 @@ _no_input = object()
114121
# ----------------------------------------------------------------------
115122

116123

117-
cdef inline _Timestamp create_timestamp_from_ts(int64_t value,
118-
npy_datetimestruct dts,
119-
tzinfo tz, BaseOffset freq, bint fold):
124+
cdef inline _Timestamp create_timestamp_from_ts(
125+
int64_t value,
126+
npy_datetimestruct dts,
127+
tzinfo tz,
128+
BaseOffset freq,
129+
bint fold,
130+
NPY_DATETIMEUNIT reso=NPY_FR_ns,
131+
):
120132
""" convenience routine to construct a Timestamp from its parts """
121-
cdef _Timestamp ts_base
133+
cdef:
134+
_Timestamp ts_base
135+
122136
ts_base = _Timestamp.__new__(Timestamp, dts.year, dts.month,
123137
dts.day, dts.hour, dts.min,
124138
dts.sec, dts.us, tz, fold=fold)
125139
ts_base.value = value
126140
ts_base._freq = freq
127141
ts_base.nanosecond = dts.ps // 1000
142+
ts_base._reso = reso
128143

129144
return ts_base
130145

131146

132-
def _unpickle_timestamp(value, freq, tz):
147+
def _unpickle_timestamp(value, freq, tz, reso):
133148
# GH#41949 dont warn on unpickle if we have a freq
134-
ts = Timestamp(value, tz=tz)
149+
if reso == NPY_FR_ns:
150+
ts = Timestamp(value, tz=tz)
151+
else:
152+
if tz is not None:
153+
raise NotImplementedError
154+
abbrev = npy_unit_to_abbrev(reso)
155+
dt64 = np.datetime64(value, abbrev)
156+
ts = Timestamp._from_dt64(dt64)
135157
ts._set_freq(freq)
136158
return ts
137159

@@ -177,12 +199,36 @@ cdef class _Timestamp(ABCTimestamp):
177199
)
178200
return self._freq
179201

202+
# -----------------------------------------------------------------
203+
# Constructors
204+
205+
@classmethod
206+
def _from_dt64(cls, dt64: np.datetime64):
207+
# construct a Timestamp from a np.datetime64 object, keeping the
208+
# resolution of the input.
209+
# This is herely mainly so we can incrementally implement non-nano
210+
# (e.g. only tznaive at first)
211+
cdef:
212+
npy_datetimestruct dts
213+
int64_t value
214+
NPY_DATETIMEUNIT reso
215+
216+
reso = get_datetime64_unit(dt64)
217+
value = get_datetime64_value(dt64)
218+
pandas_datetime_to_datetimestruct(value, reso, &dts)
219+
return create_timestamp_from_ts(
220+
value, dts, tz=None, freq=None, fold=0, reso=reso
221+
)
222+
223+
# -----------------------------------------------------------------
224+
180225
def __hash__(_Timestamp self):
181226
if self.nanosecond:
182227
return hash(self.value)
183228
if self.fold:
184229
return datetime.__hash__(self.replace(fold=0))
185230
return datetime.__hash__(self)
231+
# TODO(non-nano): what if we are out of bounds for pydatetime?
186232

187233
def __richcmp__(_Timestamp self, object other, int op):
188234
cdef:
@@ -193,17 +239,16 @@ cdef class _Timestamp(ABCTimestamp):
193239
ots = other
194240
elif other is NaT:
195241
return op == Py_NE
196-
elif PyDateTime_Check(other) or is_datetime64_object(other):
197-
if self.nanosecond == 0 and PyDateTime_Check(other):
242+
elif is_datetime64_object(other):
243+
ots = _Timestamp._from_dt64(other)
244+
elif PyDateTime_Check(other):
245+
if self.nanosecond == 0:
198246
val = self.to_pydatetime()
199247
return PyObject_RichCompareBool(val, other, op)
200248

201249
try:
202250
ots = type(self)(other)
203251
except ValueError:
204-
if is_datetime64_object(other):
205-
# cast non-nano dt64 to pydatetime
206-
other = other.astype(object)
207252
return self._compare_outside_nanorange(other, op)
208253

209254
elif is_array(other):
@@ -253,7 +298,21 @@ cdef class _Timestamp(ABCTimestamp):
253298
raise TypeError(
254299
"Cannot compare tz-naive and tz-aware timestamps"
255300
)
256-
return cmp_scalar(self.value, ots.value, op)
301+
if self._reso == ots._reso:
302+
return cmp_scalar(self.value, ots.value, op)
303+
return self._compare_mismatched_resos(ots, op)
304+
305+
# TODO: copied from Timedelta; try to de-duplicate
306+
cdef inline bint _compare_mismatched_resos(self, _Timestamp other, int op):
307+
# Can't just dispatch to numpy as they silently overflow and get it wrong
308+
cdef:
309+
npy_datetimestruct dts_self
310+
npy_datetimestruct dts_other
311+
312+
# dispatch to the datetimestruct utils instead of writing new ones!
313+
pandas_datetime_to_datetimestruct(self.value, self._reso, &dts_self)
314+
pandas_datetime_to_datetimestruct(other.value, other._reso, &dts_other)
315+
return cmp_dtstructs(&dts_self, &dts_other, op)
257316

258317
cdef bint _compare_outside_nanorange(_Timestamp self, datetime other,
259318
int op) except -1:
@@ -286,6 +345,9 @@ cdef class _Timestamp(ABCTimestamp):
286345
cdef:
287346
int64_t nanos = 0
288347

348+
if isinstance(self, _Timestamp) and self._reso != NPY_FR_ns:
349+
raise NotImplementedError(self._reso)
350+
289351
if is_any_td_scalar(other):
290352
nanos = delta_to_nanoseconds(other)
291353
try:
@@ -325,6 +387,8 @@ cdef class _Timestamp(ABCTimestamp):
325387
return NotImplemented
326388

327389
def __sub__(self, other):
390+
if isinstance(self, _Timestamp) and self._reso != NPY_FR_ns:
391+
raise NotImplementedError(self._reso)
328392

329393
if is_any_td_scalar(other) or is_integer_object(other):
330394
neg_other = -other
@@ -387,6 +451,9 @@ cdef class _Timestamp(ABCTimestamp):
387451
return NotImplemented
388452

389453
def __rsub__(self, other):
454+
if self._reso != NPY_FR_ns:
455+
raise NotImplementedError(self._reso)
456+
390457
if PyDateTime_Check(other):
391458
try:
392459
return type(self)(other) - self
@@ -421,6 +488,9 @@ cdef class _Timestamp(ABCTimestamp):
421488
ndarray[uint8_t, cast=True] out
422489
int month_kw
423490

491+
if self._reso != NPY_FR_ns:
492+
raise NotImplementedError(self._reso)
493+
424494
if freq:
425495
kwds = freq.kwds
426496
month_kw = kwds.get('startingMonth', kwds.get('month', 12))
@@ -591,6 +661,9 @@ cdef class _Timestamp(ABCTimestamp):
591661
int64_t val
592662
object[::1] out
593663

664+
if self._reso != NPY_FR_ns:
665+
raise NotImplementedError(self._reso)
666+
594667
val = self._maybe_convert_value_to_local()
595668
out = get_date_name_field(np.array([val], dtype=np.int64),
596669
field, locale=locale)
@@ -743,6 +816,9 @@ cdef class _Timestamp(ABCTimestamp):
743816
local_val = self._maybe_convert_value_to_local()
744817
int64_t normalized
745818

819+
if self._reso != NPY_FR_ns:
820+
raise NotImplementedError(self._reso)
821+
746822
normalized = normalize_i8_stamp(local_val)
747823
return Timestamp(normalized).tz_localize(self.tzinfo)
748824

@@ -760,8 +836,16 @@ cdef class _Timestamp(ABCTimestamp):
760836
self._freq = state[1]
761837
self.tzinfo = state[2]
762838

839+
if len(state) == 3:
840+
# pre-non-nano pickle
841+
reso = NPY_FR_ns
842+
assert False # checking for coverage
843+
else:
844+
reso = state[4]
845+
self._reso = reso
846+
763847
def __reduce__(self):
764-
object_state = self.value, self._freq, self.tzinfo
848+
object_state = self.value, self._freq, self.tzinfo, self._reso
765849
return (_unpickle_timestamp, object_state)
766850

767851
# -----------------------------------------------------------------
@@ -888,7 +972,7 @@ cdef class _Timestamp(ABCTimestamp):
888972
>>> ts.asm8
889973
numpy.datetime64('2020-03-14T15:00:00.000000000')
890974
"""
891-
return np.datetime64(self.value, 'ns')
975+
return self.to_datetime64()
892976

893977
def timestamp(self):
894978
"""
@@ -902,6 +986,9 @@ cdef class _Timestamp(ABCTimestamp):
902986
"""
903987
# GH 17329
904988
# Note: Naive timestamps will not match datetime.stdlib
989+
if self._reso != NPY_FR_ns:
990+
raise NotImplementedError(self._reso)
991+
905992
return round(self.value / 1e9, 6)
906993

907994
cpdef datetime to_pydatetime(_Timestamp self, bint warn=True):
@@ -933,7 +1020,9 @@ cdef class _Timestamp(ABCTimestamp):
9331020
"""
9341021
Return a numpy.datetime64 object with 'ns' precision.
9351022
"""
936-
return np.datetime64(self.value, "ns")
1023+
# TODO: find a way to construct dt64 directly from _reso
1024+
abbrev = npy_unit_to_abbrev(self._reso)
1025+
return np.datetime64(self.value, abbrev)
9371026

9381027
def to_numpy(self, dtype=None, copy=False) -> np.datetime64:
9391028
"""
@@ -995,6 +1084,9 @@ cdef class _Timestamp(ABCTimestamp):
9951084
"""
9961085
from pandas import Period
9971086

1087+
if self._reso != NPY_FR_ns:
1088+
raise NotImplementedError(self._reso)
1089+
9981090
if self.tz is not None:
9991091
# GH#21333
10001092
warnings.warn(
@@ -1470,6 +1562,9 @@ class Timestamp(_Timestamp):
14701562
cdef:
14711563
int64_t nanos = to_offset(freq).nanos
14721564

1565+
if self._reso != NPY_FR_ns:
1566+
raise NotImplementedError(self._reso)
1567+
14731568
if self.tz is not None:
14741569
value = self.tz_localize(None).value
14751570
else:
@@ -1865,6 +1960,9 @@ default 'raise'
18651960
>>> pd.NaT.tz_localize()
18661961
NaT
18671962
"""
1963+
if self._reso != NPY_FR_ns:
1964+
raise NotImplementedError(self._reso)
1965+
18681966
if ambiguous == 'infer':
18691967
raise ValueError('Cannot infer offset with only one time.')
18701968

@@ -1942,6 +2040,9 @@ default 'raise'
19422040
>>> pd.NaT.tz_convert(tz='Asia/Tokyo')
19432041
NaT
19442042
"""
2043+
if self._reso != NPY_FR_ns:
2044+
raise NotImplementedError(self._reso)
2045+
19452046
if self.tzinfo is None:
19462047
# tz naive, use tz_localize
19472048
raise TypeError(
@@ -2021,6 +2122,9 @@ default 'raise'
20212122
datetime ts_input
20222123
tzinfo_type tzobj
20232124

2125+
if self._reso != NPY_FR_ns:
2126+
raise NotImplementedError(self._reso)
2127+
20242128
# set to naive if needed
20252129
tzobj = self.tzinfo
20262130
value = self.value

pandas/tests/scalar/timedelta/test_timedelta.py

+1
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ def test_mul_preserves_reso(self, td, unit):
7272
assert res._reso == unit
7373

7474
def test_cmp_cross_reso(self, td):
75+
# numpy gets this wrong because of silent overflow
7576
other = Timedelta(days=106751, unit="ns")
7677
assert other < td
7778
assert td > other

0 commit comments

Comments
 (0)