diff --git a/pandas/_libs/tslibs/timedeltas.pxd b/pandas/_libs/tslibs/timedeltas.pxd index f114fd9297920..3019be5a59c46 100644 --- a/pandas/_libs/tslibs/timedeltas.pxd +++ b/pandas/_libs/tslibs/timedeltas.pxd @@ -1,6 +1,8 @@ from cpython.datetime cimport timedelta from numpy cimport int64_t +from .np_datetime cimport NPY_DATETIMEUNIT + # Exposed for tslib, not intended for outside use. cpdef int64_t delta_to_nanoseconds(delta) except? -1 @@ -13,7 +15,9 @@ cdef class _Timedelta(timedelta): int64_t value # nanoseconds bint _is_populated # are my components populated int64_t _d, _h, _m, _s, _ms, _us, _ns + NPY_DATETIMEUNIT _reso cpdef timedelta to_pytimedelta(_Timedelta self) cdef bint _has_ns(self) cdef _ensure_components(_Timedelta self) + cdef inline bint _compare_mismatched_resos(self, _Timedelta other, op) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 7979feb076c6e..6606158aea807 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -45,13 +45,19 @@ from pandas._libs.tslibs.nattype cimport ( ) from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, + NPY_FR_ns, + cmp_dtstructs, cmp_scalar, get_datetime64_unit, get_timedelta64_value, + npy_datetimestruct, + pandas_datetime_to_datetimestruct, + pandas_timedelta_to_timedeltastruct, pandas_timedeltastruct, - td64_to_tdstruct, ) + from pandas._libs.tslibs.np_datetime import OutOfBoundsTimedelta + from pandas._libs.tslibs.offsets cimport is_tick_object from pandas._libs.tslibs.util cimport ( is_array, @@ -176,7 +182,9 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1: if is_tick_object(delta): return delta.nanos if isinstance(delta, _Timedelta): - return delta.value + if delta._reso == NPY_FR_ns: + return delta.value + raise NotImplementedError(delta._reso) if is_timedelta64_object(delta): return get_timedelta64_value(ensure_td64ns(delta)) @@ -251,6 +259,8 @@ cdef convert_to_timedelta64(object ts, str unit): return np.timedelta64(NPY_NAT, "ns") elif isinstance(ts, _Timedelta): # already in the proper format + if ts._reso != NPY_FR_ns: + raise NotImplementedError ts = np.timedelta64(ts.value, "ns") elif is_timedelta64_object(ts): ts = ensure_td64ns(ts) @@ -643,7 +653,8 @@ cdef bint _validate_ops_compat(other): def _op_unary_method(func, name): def f(self): - return Timedelta(func(self.value), unit='ns') + new_value = func(self.value) + return _timedelta_from_value_and_reso(new_value, self._reso) f.__name__ = name return f @@ -688,7 +699,17 @@ def _binary_op_method_timedeltalike(op, name): if other is NaT: # e.g. if original other was timedelta64('NaT') return NaT - return Timedelta(op(self.value, other.value), unit='ns') + + if self._reso != other._reso: + raise NotImplementedError + + res = op(self.value, other.value) + if res == NPY_NAT: + # e.g. test_implementation_limits + # TODO: more generally could do an overflowcheck in op? + return NaT + + return _timedelta_from_value_and_reso(res, reso=self._reso) f.__name__ = name return f @@ -818,6 +839,38 @@ cdef _to_py_int_float(v): raise TypeError(f"Invalid type {type(v)}. Must be int or float.") +def _timedelta_unpickle(value, reso): + return _timedelta_from_value_and_reso(value, reso) + + +cdef _timedelta_from_value_and_reso(int64_t value, NPY_DATETIMEUNIT reso): + # Could make this a classmethod if/when cython supports cdef classmethods + cdef: + _Timedelta td_base + + if reso == NPY_FR_ns: + td_base = _Timedelta.__new__(Timedelta, microseconds=int(value) // 1000) + elif reso == NPY_DATETIMEUNIT.NPY_FR_us: + td_base = _Timedelta.__new__(Timedelta, microseconds=int(value)) + elif reso == NPY_DATETIMEUNIT.NPY_FR_ms: + td_base = _Timedelta.__new__(Timedelta, milliseconds=int(value)) + elif reso == NPY_DATETIMEUNIT.NPY_FR_s: + td_base = _Timedelta.__new__(Timedelta, seconds=int(value)) + elif reso == NPY_DATETIMEUNIT.NPY_FR_m: + td_base = _Timedelta.__new__(Timedelta, minutes=int(value)) + elif reso == NPY_DATETIMEUNIT.NPY_FR_h: + td_base = _Timedelta.__new__(Timedelta, hours=int(value)) + elif reso == NPY_DATETIMEUNIT.NPY_FR_D: + td_base = _Timedelta.__new__(Timedelta, days=int(value)) + else: + raise NotImplementedError(reso) + + td_base.value = value + td_base._is_populated = 0 + td_base._reso = reso + return td_base + + # Similar to Timestamp/datetime, this is a construction requirement for # timedeltas that we need to do object instantiation in python. This will # serve as a C extension type that shadows the Python class, where we do any @@ -827,6 +880,7 @@ cdef class _Timedelta(timedelta): # int64_t value # nanoseconds # bint _is_populated # are my components populated # int64_t _d, _h, _m, _s, _ms, _us, _ns + # NPY_DATETIMEUNIT _reso # higher than np.ndarray and np.matrix __array_priority__ = 100 @@ -853,6 +907,11 @@ cdef class _Timedelta(timedelta): def __hash__(_Timedelta self): if self._has_ns(): + # Note: this does *not* satisfy the invariance + # td1 == td2 \\Rightarrow hash(td1) == hash(td2) + # if td1 and td2 have different _resos. timedelta64 also has this + # non-invariant behavior. + # see GH#44504 return hash(self.value) else: return timedelta.__hash__(self) @@ -890,10 +949,30 @@ cdef class _Timedelta(timedelta): else: return NotImplemented - return cmp_scalar(self.value, ots.value, op) + if self._reso == ots._reso: + return cmp_scalar(self.value, ots.value, op) + return self._compare_mismatched_resos(ots, op) + + # TODO: re-use/share with Timestamp + cdef inline bint _compare_mismatched_resos(self, _Timedelta other, op): + # Can't just dispatch to numpy as they silently overflow and get it wrong + cdef: + npy_datetimestruct dts_self + npy_datetimestruct dts_other + + # dispatch to the datetimestruct utils instead of writing new ones! + pandas_datetime_to_datetimestruct(self.value, self._reso, &dts_self) + pandas_datetime_to_datetimestruct(other.value, other._reso, &dts_other) + return cmp_dtstructs(&dts_self, &dts_other, op) cdef bint _has_ns(self): - return self.value % 1000 != 0 + if self._reso == NPY_FR_ns: + return self.value % 1000 != 0 + elif self._reso < NPY_FR_ns: + # i.e. seconds, millisecond, microsecond + return False + else: + raise NotImplementedError(self._reso) cdef _ensure_components(_Timedelta self): """ @@ -905,7 +984,7 @@ cdef class _Timedelta(timedelta): cdef: pandas_timedeltastruct tds - td64_to_tdstruct(self.value, &tds) + pandas_timedelta_to_timedeltastruct(self.value, self._reso, &tds) self._d = tds.days self._h = tds.hrs self._m = tds.min @@ -937,13 +1016,24 @@ cdef class _Timedelta(timedelta): ----- Any nanosecond resolution will be lost. """ - return timedelta(microseconds=int(self.value) / 1000) + if self._reso == NPY_FR_ns: + return timedelta(microseconds=int(self.value) / 1000) + + # TODO(@WillAyd): is this the right way to use components? + self._ensure_components() + return timedelta( + days=self._d, seconds=self._seconds, microseconds=self._microseconds + ) def to_timedelta64(self) -> np.timedelta64: """ Return a numpy.timedelta64 object with 'ns' precision. """ - return np.timedelta64(self.value, 'ns') + cdef: + str abbrev = npy_unit_to_abbrev(self._reso) + # TODO: way to create a np.timedelta64 obj with the reso directly + # instead of having to get the abbrev? + return np.timedelta64(self.value, abbrev) def to_numpy(self, dtype=None, copy=False) -> np.timedelta64: """ @@ -1054,7 +1144,7 @@ cdef class _Timedelta(timedelta): >>> td.asm8 numpy.timedelta64(42,'ns') """ - return np.int64(self.value).view('m8[ns]') + return self.to_timedelta64() @property def resolution_string(self) -> str: @@ -1258,6 +1348,14 @@ cdef class _Timedelta(timedelta): f'H{components.minutes}M{seconds}S') return tpl + # ---------------------------------------------------------------- + # Constructors + + @classmethod + def _from_value_and_reso(cls, int64_t value, NPY_DATETIMEUNIT reso): + # exposing as classmethod for testing + return _timedelta_from_value_and_reso(value, reso) + # Python front end to C extension type _Timedelta # This serves as the box for timedelta64 @@ -1413,19 +1511,21 @@ class Timedelta(_Timedelta): if value == NPY_NAT: return NaT - # make timedelta happy - td_base = _Timedelta.__new__(cls, microseconds=int(value) // 1000) - td_base.value = value - td_base._is_populated = 0 - return td_base + return _timedelta_from_value_and_reso(value, NPY_FR_ns) def __setstate__(self, state): - (value) = state + if len(state) == 1: + # older pickle, only supported nanosecond + value = state[0] + reso = NPY_FR_ns + else: + value, reso = state self.value = value + self._reso = reso def __reduce__(self): - object_state = self.value, - return (Timedelta, object_state) + object_state = self.value, self._reso + return (_timedelta_unpickle, object_state) @cython.cdivision(True) def _round(self, freq, mode): @@ -1496,7 +1596,14 @@ class Timedelta(_Timedelta): def __mul__(self, other): if is_integer_object(other) or is_float_object(other): - return Timedelta(other * self.value, unit='ns') + if util.is_nan(other): + # np.nan * timedelta -> np.timedelta64("NaT"), in this case NaT + return NaT + + return _timedelta_from_value_and_reso( + (other * self.value), + reso=self._reso, + ) elif is_array(other): # ndarray-like diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 1ec93c69def99..17a8ec5f86fc8 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -24,6 +24,79 @@ import pandas._testing as tm +class TestNonNano: + @pytest.fixture(params=[7, 8, 9]) + def unit(self, request): + # 7, 8, 9 correspond to second, millisecond, and microsecond, respectively + return request.param + + @pytest.fixture + def val(self, unit): + # microsecond that would be just out of bounds for nano + us = 9223372800000000 + if unit == 9: + value = us + elif unit == 8: + value = us // 1000 + else: + value = us // 1_000_000 + return value + + @pytest.fixture + def td(self, unit, val): + return Timedelta._from_value_and_reso(val, unit) + + def test_from_value_and_reso(self, unit, val): + # Just checking that the fixture is giving us what we asked for + td = Timedelta._from_value_and_reso(val, unit) + assert td.value == val + assert td._reso == unit + assert td.days == 106752 + + def test_unary_non_nano(self, td, unit): + assert abs(td)._reso == unit + assert (-td)._reso == unit + assert (+td)._reso == unit + + def test_sub_preserves_reso(self, td, unit): + res = td - td + expected = Timedelta._from_value_and_reso(0, unit) + assert res == expected + assert res._reso == unit + + def test_mul_preserves_reso(self, td, unit): + # The td fixture should always be far from the implementation + # bound, so doubling does not risk overflow. + res = td * 2 + assert res.value == td.value * 2 + assert res._reso == unit + + def test_cmp_cross_reso(self, td): + other = Timedelta(days=106751, unit="ns") + assert other < td + assert td > other + assert not other == td + assert td != other + + def test_to_pytimedelta(self, td): + res = td.to_pytimedelta() + expected = timedelta(days=106752) + assert type(res) is timedelta + assert res == expected + + def test_to_timedelta64(self, td, unit): + for res in [td.to_timedelta64(), td.to_numpy(), td.asm8]: + + assert isinstance(res, np.timedelta64) + assert res.view("i8") == td.value + if unit == 7: + assert res.dtype == "m8[s]" + elif unit == 8: + assert res.dtype == "m8[ms]" + elif unit == 9: + assert res.dtype == "m8[us]" + + class TestTimedeltaUnaryOps: def test_invert(self): td = Timedelta(10, unit="d") diff --git a/setup.py b/setup.py index 62704dc4423c8..384c1a267afe3 100755 --- a/setup.py +++ b/setup.py @@ -538,6 +538,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.tslibs.timedeltas": { "pyxfile": "_libs/tslibs/timedeltas", "depends": tseries_depends, + "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.tslibs.timestamps": { "pyxfile": "_libs/tslibs/timestamps",