Skip to content

Commit a37af56

Browse files
jbrockmendelyehoshuadimarsky
authored andcommitted
ENH: implement non-nano Timedelta scalar (pandas-dev#46688)
1 parent 405da95 commit a37af56

File tree

4 files changed

+204
-19
lines changed

4 files changed

+204
-19
lines changed

pandas/_libs/tslibs/timedeltas.pxd

+4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from cpython.datetime cimport timedelta
22
from numpy cimport int64_t
33

4+
from .np_datetime cimport NPY_DATETIMEUNIT
5+
46

57
# Exposed for tslib, not intended for outside use.
68
cpdef int64_t delta_to_nanoseconds(delta) except? -1
@@ -13,7 +15,9 @@ cdef class _Timedelta(timedelta):
1315
int64_t value # nanoseconds
1416
bint _is_populated # are my components populated
1517
int64_t _d, _h, _m, _s, _ms, _us, _ns
18+
NPY_DATETIMEUNIT _reso
1619

1720
cpdef timedelta to_pytimedelta(_Timedelta self)
1821
cdef bint _has_ns(self)
1922
cdef _ensure_components(_Timedelta self)
23+
cdef inline bint _compare_mismatched_resos(self, _Timedelta other, op)

pandas/_libs/tslibs/timedeltas.pyx

+126-19
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,19 @@ from pandas._libs.tslibs.nattype cimport (
4545
)
4646
from pandas._libs.tslibs.np_datetime cimport (
4747
NPY_DATETIMEUNIT,
48+
NPY_FR_ns,
49+
cmp_dtstructs,
4850
cmp_scalar,
4951
get_datetime64_unit,
5052
get_timedelta64_value,
53+
npy_datetimestruct,
54+
pandas_datetime_to_datetimestruct,
55+
pandas_timedelta_to_timedeltastruct,
5156
pandas_timedeltastruct,
52-
td64_to_tdstruct,
5357
)
58+
5459
from pandas._libs.tslibs.np_datetime import OutOfBoundsTimedelta
60+
5561
from pandas._libs.tslibs.offsets cimport is_tick_object
5662
from pandas._libs.tslibs.util cimport (
5763
is_array,
@@ -176,7 +182,9 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1:
176182
if is_tick_object(delta):
177183
return delta.nanos
178184
if isinstance(delta, _Timedelta):
179-
return delta.value
185+
if delta._reso == NPY_FR_ns:
186+
return delta.value
187+
raise NotImplementedError(delta._reso)
180188

181189
if is_timedelta64_object(delta):
182190
return get_timedelta64_value(ensure_td64ns(delta))
@@ -251,6 +259,8 @@ cdef convert_to_timedelta64(object ts, str unit):
251259
return np.timedelta64(NPY_NAT, "ns")
252260
elif isinstance(ts, _Timedelta):
253261
# already in the proper format
262+
if ts._reso != NPY_FR_ns:
263+
raise NotImplementedError
254264
ts = np.timedelta64(ts.value, "ns")
255265
elif is_timedelta64_object(ts):
256266
ts = ensure_td64ns(ts)
@@ -643,7 +653,8 @@ cdef bint _validate_ops_compat(other):
643653

644654
def _op_unary_method(func, name):
645655
def f(self):
646-
return Timedelta(func(self.value), unit='ns')
656+
new_value = func(self.value)
657+
return _timedelta_from_value_and_reso(new_value, self._reso)
647658
f.__name__ = name
648659
return f
649660

@@ -688,7 +699,17 @@ def _binary_op_method_timedeltalike(op, name):
688699
if other is NaT:
689700
# e.g. if original other was timedelta64('NaT')
690701
return NaT
691-
return Timedelta(op(self.value, other.value), unit='ns')
702+
703+
if self._reso != other._reso:
704+
raise NotImplementedError
705+
706+
res = op(self.value, other.value)
707+
if res == NPY_NAT:
708+
# e.g. test_implementation_limits
709+
# TODO: more generally could do an overflowcheck in op?
710+
return NaT
711+
712+
return _timedelta_from_value_and_reso(res, reso=self._reso)
692713

693714
f.__name__ = name
694715
return f
@@ -818,6 +839,38 @@ cdef _to_py_int_float(v):
818839
raise TypeError(f"Invalid type {type(v)}. Must be int or float.")
819840

820841

842+
def _timedelta_unpickle(value, reso):
843+
return _timedelta_from_value_and_reso(value, reso)
844+
845+
846+
cdef _timedelta_from_value_and_reso(int64_t value, NPY_DATETIMEUNIT reso):
847+
# Could make this a classmethod if/when cython supports cdef classmethods
848+
cdef:
849+
_Timedelta td_base
850+
851+
if reso == NPY_FR_ns:
852+
td_base = _Timedelta.__new__(Timedelta, microseconds=int(value) // 1000)
853+
elif reso == NPY_DATETIMEUNIT.NPY_FR_us:
854+
td_base = _Timedelta.__new__(Timedelta, microseconds=int(value))
855+
elif reso == NPY_DATETIMEUNIT.NPY_FR_ms:
856+
td_base = _Timedelta.__new__(Timedelta, milliseconds=int(value))
857+
elif reso == NPY_DATETIMEUNIT.NPY_FR_s:
858+
td_base = _Timedelta.__new__(Timedelta, seconds=int(value))
859+
elif reso == NPY_DATETIMEUNIT.NPY_FR_m:
860+
td_base = _Timedelta.__new__(Timedelta, minutes=int(value))
861+
elif reso == NPY_DATETIMEUNIT.NPY_FR_h:
862+
td_base = _Timedelta.__new__(Timedelta, hours=int(value))
863+
elif reso == NPY_DATETIMEUNIT.NPY_FR_D:
864+
td_base = _Timedelta.__new__(Timedelta, days=int(value))
865+
else:
866+
raise NotImplementedError(reso)
867+
868+
td_base.value = value
869+
td_base._is_populated = 0
870+
td_base._reso = reso
871+
return td_base
872+
873+
821874
# Similar to Timestamp/datetime, this is a construction requirement for
822875
# timedeltas that we need to do object instantiation in python. This will
823876
# serve as a C extension type that shadows the Python class, where we do any
@@ -827,6 +880,7 @@ cdef class _Timedelta(timedelta):
827880
# int64_t value # nanoseconds
828881
# bint _is_populated # are my components populated
829882
# int64_t _d, _h, _m, _s, _ms, _us, _ns
883+
# NPY_DATETIMEUNIT _reso
830884

831885
# higher than np.ndarray and np.matrix
832886
__array_priority__ = 100
@@ -853,6 +907,11 @@ cdef class _Timedelta(timedelta):
853907

854908
def __hash__(_Timedelta self):
855909
if self._has_ns():
910+
# Note: this does *not* satisfy the invariance
911+
# td1 == td2 \\Rightarrow hash(td1) == hash(td2)
912+
# if td1 and td2 have different _resos. timedelta64 also has this
913+
# non-invariant behavior.
914+
# see GH#44504
856915
return hash(self.value)
857916
else:
858917
return timedelta.__hash__(self)
@@ -890,10 +949,30 @@ cdef class _Timedelta(timedelta):
890949
else:
891950
return NotImplemented
892951

893-
return cmp_scalar(self.value, ots.value, op)
952+
if self._reso == ots._reso:
953+
return cmp_scalar(self.value, ots.value, op)
954+
return self._compare_mismatched_resos(ots, op)
955+
956+
# TODO: re-use/share with Timestamp
957+
cdef inline bint _compare_mismatched_resos(self, _Timedelta other, op):
958+
# Can't just dispatch to numpy as they silently overflow and get it wrong
959+
cdef:
960+
npy_datetimestruct dts_self
961+
npy_datetimestruct dts_other
962+
963+
# dispatch to the datetimestruct utils instead of writing new ones!
964+
pandas_datetime_to_datetimestruct(self.value, self._reso, &dts_self)
965+
pandas_datetime_to_datetimestruct(other.value, other._reso, &dts_other)
966+
return cmp_dtstructs(&dts_self, &dts_other, op)
894967

895968
cdef bint _has_ns(self):
896-
return self.value % 1000 != 0
969+
if self._reso == NPY_FR_ns:
970+
return self.value % 1000 != 0
971+
elif self._reso < NPY_FR_ns:
972+
# i.e. seconds, millisecond, microsecond
973+
return False
974+
else:
975+
raise NotImplementedError(self._reso)
897976

898977
cdef _ensure_components(_Timedelta self):
899978
"""
@@ -905,7 +984,7 @@ cdef class _Timedelta(timedelta):
905984
cdef:
906985
pandas_timedeltastruct tds
907986

908-
td64_to_tdstruct(self.value, &tds)
987+
pandas_timedelta_to_timedeltastruct(self.value, self._reso, &tds)
909988
self._d = tds.days
910989
self._h = tds.hrs
911990
self._m = tds.min
@@ -937,13 +1016,24 @@ cdef class _Timedelta(timedelta):
9371016
-----
9381017
Any nanosecond resolution will be lost.
9391018
"""
940-
return timedelta(microseconds=int(self.value) / 1000)
1019+
if self._reso == NPY_FR_ns:
1020+
return timedelta(microseconds=int(self.value) / 1000)
1021+
1022+
# TODO(@WillAyd): is this the right way to use components?
1023+
self._ensure_components()
1024+
return timedelta(
1025+
days=self._d, seconds=self._seconds, microseconds=self._microseconds
1026+
)
9411027

9421028
def to_timedelta64(self) -> np.timedelta64:
9431029
"""
9441030
Return a numpy.timedelta64 object with 'ns' precision.
9451031
"""
946-
return np.timedelta64(self.value, 'ns')
1032+
cdef:
1033+
str abbrev = npy_unit_to_abbrev(self._reso)
1034+
# TODO: way to create a np.timedelta64 obj with the reso directly
1035+
# instead of having to get the abbrev?
1036+
return np.timedelta64(self.value, abbrev)
9471037

9481038
def to_numpy(self, dtype=None, copy=False) -> np.timedelta64:
9491039
"""
@@ -1054,7 +1144,7 @@ cdef class _Timedelta(timedelta):
10541144
>>> td.asm8
10551145
numpy.timedelta64(42,'ns')
10561146
"""
1057-
return np.int64(self.value).view('m8[ns]')
1147+
return self.to_timedelta64()
10581148

10591149
@property
10601150
def resolution_string(self) -> str:
@@ -1258,6 +1348,14 @@ cdef class _Timedelta(timedelta):
12581348
f'H{components.minutes}M{seconds}S')
12591349
return tpl
12601350

1351+
# ----------------------------------------------------------------
1352+
# Constructors
1353+
1354+
@classmethod
1355+
def _from_value_and_reso(cls, int64_t value, NPY_DATETIMEUNIT reso):
1356+
# exposing as classmethod for testing
1357+
return _timedelta_from_value_and_reso(value, reso)
1358+
12611359

12621360
# Python front end to C extension type _Timedelta
12631361
# This serves as the box for timedelta64
@@ -1413,19 +1511,21 @@ class Timedelta(_Timedelta):
14131511
if value == NPY_NAT:
14141512
return NaT
14151513

1416-
# make timedelta happy
1417-
td_base = _Timedelta.__new__(cls, microseconds=int(value) // 1000)
1418-
td_base.value = value
1419-
td_base._is_populated = 0
1420-
return td_base
1514+
return _timedelta_from_value_and_reso(value, NPY_FR_ns)
14211515

14221516
def __setstate__(self, state):
1423-
(value) = state
1517+
if len(state) == 1:
1518+
# older pickle, only supported nanosecond
1519+
value = state[0]
1520+
reso = NPY_FR_ns
1521+
else:
1522+
value, reso = state
14241523
self.value = value
1524+
self._reso = reso
14251525

14261526
def __reduce__(self):
1427-
object_state = self.value,
1428-
return (Timedelta, object_state)
1527+
object_state = self.value, self._reso
1528+
return (_timedelta_unpickle, object_state)
14291529

14301530
@cython.cdivision(True)
14311531
def _round(self, freq, mode):
@@ -1496,7 +1596,14 @@ class Timedelta(_Timedelta):
14961596

14971597
def __mul__(self, other):
14981598
if is_integer_object(other) or is_float_object(other):
1499-
return Timedelta(other * self.value, unit='ns')
1599+
if util.is_nan(other):
1600+
# np.nan * timedelta -> np.timedelta64("NaT"), in this case NaT
1601+
return NaT
1602+
1603+
return _timedelta_from_value_and_reso(
1604+
<int64_t>(other * self.value),
1605+
reso=self._reso,
1606+
)
15001607

15011608
elif is_array(other):
15021609
# ndarray-like

pandas/tests/scalar/timedelta/test_timedelta.py

+73
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,79 @@
2424
import pandas._testing as tm
2525

2626

27+
class TestNonNano:
28+
@pytest.fixture(params=[7, 8, 9])
29+
def unit(self, request):
30+
# 7, 8, 9 correspond to second, millisecond, and microsecond, respectively
31+
return request.param
32+
33+
@pytest.fixture
34+
def val(self, unit):
35+
# microsecond that would be just out of bounds for nano
36+
us = 9223372800000000
37+
if unit == 9:
38+
value = us
39+
elif unit == 8:
40+
value = us // 1000
41+
else:
42+
value = us // 1_000_000
43+
return value
44+
45+
@pytest.fixture
46+
def td(self, unit, val):
47+
return Timedelta._from_value_and_reso(val, unit)
48+
49+
def test_from_value_and_reso(self, unit, val):
50+
# Just checking that the fixture is giving us what we asked for
51+
td = Timedelta._from_value_and_reso(val, unit)
52+
assert td.value == val
53+
assert td._reso == unit
54+
assert td.days == 106752
55+
56+
def test_unary_non_nano(self, td, unit):
57+
assert abs(td)._reso == unit
58+
assert (-td)._reso == unit
59+
assert (+td)._reso == unit
60+
61+
def test_sub_preserves_reso(self, td, unit):
62+
res = td - td
63+
expected = Timedelta._from_value_and_reso(0, unit)
64+
assert res == expected
65+
assert res._reso == unit
66+
67+
def test_mul_preserves_reso(self, td, unit):
68+
# The td fixture should always be far from the implementation
69+
# bound, so doubling does not risk overflow.
70+
res = td * 2
71+
assert res.value == td.value * 2
72+
assert res._reso == unit
73+
74+
def test_cmp_cross_reso(self, td):
75+
other = Timedelta(days=106751, unit="ns")
76+
assert other < td
77+
assert td > other
78+
assert not other == td
79+
assert td != other
80+
81+
def test_to_pytimedelta(self, td):
82+
res = td.to_pytimedelta()
83+
expected = timedelta(days=106752)
84+
assert type(res) is timedelta
85+
assert res == expected
86+
87+
def test_to_timedelta64(self, td, unit):
88+
for res in [td.to_timedelta64(), td.to_numpy(), td.asm8]:
89+
90+
assert isinstance(res, np.timedelta64)
91+
assert res.view("i8") == td.value
92+
if unit == 7:
93+
assert res.dtype == "m8[s]"
94+
elif unit == 8:
95+
assert res.dtype == "m8[ms]"
96+
elif unit == 9:
97+
assert res.dtype == "m8[us]"
98+
99+
27100
class TestTimedeltaUnaryOps:
28101
def test_invert(self):
29102
td = Timedelta(10, unit="d")

setup.py

+1
Original file line numberDiff line numberDiff line change
@@ -538,6 +538,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"):
538538
"_libs.tslibs.timedeltas": {
539539
"pyxfile": "_libs/tslibs/timedeltas",
540540
"depends": tseries_depends,
541+
"sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"],
541542
},
542543
"_libs.tslibs.timestamps": {
543544
"pyxfile": "_libs/tslibs/timestamps",

0 commit comments

Comments
 (0)