Skip to content

ENH: implement non-nano Timedelta scalar #46688

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Apr 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pandas/_libs/tslibs/timedeltas.pxd
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from cpython.datetime cimport timedelta
from numpy cimport int64_t

from .np_datetime cimport NPY_DATETIMEUNIT


# Exposed for tslib, not intended for outside use.
cpdef int64_t delta_to_nanoseconds(delta) except? -1
Expand All @@ -13,7 +15,9 @@ cdef class _Timedelta(timedelta):
int64_t value # nanoseconds
bint _is_populated # are my components populated
int64_t _d, _h, _m, _s, _ms, _us, _ns
NPY_DATETIMEUNIT _reso

cpdef timedelta to_pytimedelta(_Timedelta self)
cdef bint _has_ns(self)
cdef _ensure_components(_Timedelta self)
cdef inline bint _compare_mismatched_resos(self, _Timedelta other, op)
145 changes: 126 additions & 19 deletions pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,19 @@ from pandas._libs.tslibs.nattype cimport (
)
from pandas._libs.tslibs.np_datetime cimport (
NPY_DATETIMEUNIT,
NPY_FR_ns,
cmp_dtstructs,
cmp_scalar,
get_datetime64_unit,
get_timedelta64_value,
npy_datetimestruct,
pandas_datetime_to_datetimestruct,
pandas_timedelta_to_timedeltastruct,
pandas_timedeltastruct,
td64_to_tdstruct,
)

from pandas._libs.tslibs.np_datetime import OutOfBoundsTimedelta

from pandas._libs.tslibs.offsets cimport is_tick_object
from pandas._libs.tslibs.util cimport (
is_array,
Expand Down Expand Up @@ -176,7 +182,9 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1:
if is_tick_object(delta):
return delta.nanos
if isinstance(delta, _Timedelta):
return delta.value
if delta._reso == NPY_FR_ns:
return delta.value
raise NotImplementedError(delta._reso)

if is_timedelta64_object(delta):
return get_timedelta64_value(ensure_td64ns(delta))
Expand Down Expand Up @@ -251,6 +259,8 @@ cdef convert_to_timedelta64(object ts, str unit):
return np.timedelta64(NPY_NAT, "ns")
elif isinstance(ts, _Timedelta):
# already in the proper format
if ts._reso != NPY_FR_ns:
raise NotImplementedError
ts = np.timedelta64(ts.value, "ns")
elif is_timedelta64_object(ts):
ts = ensure_td64ns(ts)
Expand Down Expand Up @@ -643,7 +653,8 @@ cdef bint _validate_ops_compat(other):

def _op_unary_method(func, name):
def f(self):
return Timedelta(func(self.value), unit='ns')
new_value = func(self.value)
return _timedelta_from_value_and_reso(new_value, self._reso)
f.__name__ = name
return f

Expand Down Expand Up @@ -688,7 +699,17 @@ def _binary_op_method_timedeltalike(op, name):
if other is NaT:
# e.g. if original other was timedelta64('NaT')
return NaT
return Timedelta(op(self.value, other.value), unit='ns')

if self._reso != other._reso:
raise NotImplementedError

res = op(self.value, other.value)
if res == NPY_NAT:
# e.g. test_implementation_limits
# TODO: more generally could do an overflowcheck in op?
return NaT

return _timedelta_from_value_and_reso(res, reso=self._reso)

f.__name__ = name
return f
Expand Down Expand Up @@ -818,6 +839,38 @@ cdef _to_py_int_float(v):
raise TypeError(f"Invalid type {type(v)}. Must be int or float.")


def _timedelta_unpickle(value, reso):
return _timedelta_from_value_and_reso(value, reso)


cdef _timedelta_from_value_and_reso(int64_t value, NPY_DATETIMEUNIT reso):
# Could make this a classmethod if/when cython supports cdef classmethods
cdef:
_Timedelta td_base

if reso == NPY_FR_ns:
td_base = _Timedelta.__new__(Timedelta, microseconds=int(value) // 1000)
elif reso == NPY_DATETIMEUNIT.NPY_FR_us:
td_base = _Timedelta.__new__(Timedelta, microseconds=int(value))
elif reso == NPY_DATETIMEUNIT.NPY_FR_ms:
td_base = _Timedelta.__new__(Timedelta, milliseconds=int(value))
elif reso == NPY_DATETIMEUNIT.NPY_FR_s:
td_base = _Timedelta.__new__(Timedelta, seconds=int(value))
elif reso == NPY_DATETIMEUNIT.NPY_FR_m:
td_base = _Timedelta.__new__(Timedelta, minutes=int(value))
elif reso == NPY_DATETIMEUNIT.NPY_FR_h:
td_base = _Timedelta.__new__(Timedelta, hours=int(value))
elif reso == NPY_DATETIMEUNIT.NPY_FR_D:
td_base = _Timedelta.__new__(Timedelta, days=int(value))
else:
raise NotImplementedError(reso)

td_base.value = value
td_base._is_populated = 0
td_base._reso = reso
return td_base


# Similar to Timestamp/datetime, this is a construction requirement for
# timedeltas that we need to do object instantiation in python. This will
# serve as a C extension type that shadows the Python class, where we do any
Expand All @@ -827,6 +880,7 @@ cdef class _Timedelta(timedelta):
# int64_t value # nanoseconds
# bint _is_populated # are my components populated
# int64_t _d, _h, _m, _s, _ms, _us, _ns
# NPY_DATETIMEUNIT _reso

# higher than np.ndarray and np.matrix
__array_priority__ = 100
Expand All @@ -853,6 +907,11 @@ cdef class _Timedelta(timedelta):

def __hash__(_Timedelta self):
if self._has_ns():
# Note: this does *not* satisfy the invariance
# td1 == td2 \\Rightarrow hash(td1) == hash(td2)
# if td1 and td2 have different _resos. timedelta64 also has this
# non-invariant behavior.
# see GH#44504
return hash(self.value)
else:
return timedelta.__hash__(self)
Expand Down Expand Up @@ -890,10 +949,30 @@ cdef class _Timedelta(timedelta):
else:
return NotImplemented

return cmp_scalar(self.value, ots.value, op)
if self._reso == ots._reso:
return cmp_scalar(self.value, ots.value, op)
return self._compare_mismatched_resos(ots, op)

# TODO: re-use/share with Timestamp
cdef inline bint _compare_mismatched_resos(self, _Timedelta other, op):
# Can't just dispatch to numpy as they silently overflow and get it wrong
cdef:
npy_datetimestruct dts_self
npy_datetimestruct dts_other

# dispatch to the datetimestruct utils instead of writing new ones!
pandas_datetime_to_datetimestruct(self.value, self._reso, &dts_self)
pandas_datetime_to_datetimestruct(other.value, other._reso, &dts_other)
return cmp_dtstructs(&dts_self, &dts_other, op)

cdef bint _has_ns(self):
return self.value % 1000 != 0
if self._reso == NPY_FR_ns:
return self.value % 1000 != 0
elif self._reso < NPY_FR_ns:
# i.e. seconds, millisecond, microsecond
return False
else:
raise NotImplementedError(self._reso)

cdef _ensure_components(_Timedelta self):
"""
Expand All @@ -905,7 +984,7 @@ cdef class _Timedelta(timedelta):
cdef:
pandas_timedeltastruct tds

td64_to_tdstruct(self.value, &tds)
pandas_timedelta_to_timedeltastruct(self.value, self._reso, &tds)
self._d = tds.days
self._h = tds.hrs
self._m = tds.min
Expand Down Expand Up @@ -937,13 +1016,24 @@ cdef class _Timedelta(timedelta):
-----
Any nanosecond resolution will be lost.
"""
return timedelta(microseconds=int(self.value) / 1000)
if self._reso == NPY_FR_ns:
return timedelta(microseconds=int(self.value) / 1000)

# TODO(@WillAyd): is this the right way to use components?
self._ensure_components()
return timedelta(
days=self._d, seconds=self._seconds, microseconds=self._microseconds
)

def to_timedelta64(self) -> np.timedelta64:
"""
Return a numpy.timedelta64 object with 'ns' precision.
"""
return np.timedelta64(self.value, 'ns')
cdef:
str abbrev = npy_unit_to_abbrev(self._reso)
# TODO: way to create a np.timedelta64 obj with the reso directly
# instead of having to get the abbrev?
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@seberg is there a C-API way to create a timedelta64 object?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess PyArray_Scalar assuming you got the correct dtype available. (That function should only be used for NumPy dtypes IMO, but that isn't a problem)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

assuming you got the correct dtype available

what we have on hand is the correct NPY_DATETIMEUNIT. I guess we need to create the dtype from the unit (we have a function to go the other direction, so i guess this shouldn't be too hard to figure out). If I figure this out, I'll probably try to upstream it into numpy's __init__.pxd

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would probably be OK to add a function that works with the unit directly for the C-API, also. But it doesn't exist yet. It seems PyArray_Scalar is commented out from __init__.pxd, I am not sure if there is a reason for that.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could the reason by that PyArray_Scalar's first arg is void * which might not play so well with cython? i've figured out how to create the dtype object from the unit (copied create_datetime_dtype_with_unit over from multiarray/datetime.c) but so far having no luck in calling PyArray_Scalar

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe? I don't really see a good reason, void * seems perfectly fine, if cython doesn't like it, just use char * (which is likely better anyway?)
But, I also don't think it is API that should be used a lot, so that may just be the reason also, that it is pretty unused.

return np.timedelta64(self.value, abbrev)

def to_numpy(self, dtype=None, copy=False) -> np.timedelta64:
"""
Expand Down Expand Up @@ -1054,7 +1144,7 @@ cdef class _Timedelta(timedelta):
>>> td.asm8
numpy.timedelta64(42,'ns')
"""
return np.int64(self.value).view('m8[ns]')
return self.to_timedelta64()

@property
def resolution_string(self) -> str:
Expand Down Expand Up @@ -1258,6 +1348,14 @@ cdef class _Timedelta(timedelta):
f'H{components.minutes}M{seconds}S')
return tpl

# ----------------------------------------------------------------
# Constructors

@classmethod
def _from_value_and_reso(cls, int64_t value, NPY_DATETIMEUNIT reso):
# exposing as classmethod for testing
return _timedelta_from_value_and_reso(value, reso)


# Python front end to C extension type _Timedelta
# This serves as the box for timedelta64
Expand Down Expand Up @@ -1413,19 +1511,21 @@ class Timedelta(_Timedelta):
if value == NPY_NAT:
return NaT

# make timedelta happy
td_base = _Timedelta.__new__(cls, microseconds=int(value) // 1000)
td_base.value = value
td_base._is_populated = 0
return td_base
return _timedelta_from_value_and_reso(value, NPY_FR_ns)

def __setstate__(self, state):
(value) = state
if len(state) == 1:
# older pickle, only supported nanosecond
value = state[0]
reso = NPY_FR_ns
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sufficient testing on this?

else:
value, reso = state
self.value = value
self._reso = reso

def __reduce__(self):
object_state = self.value,
return (Timedelta, object_state)
object_state = self.value, self._reso
return (_timedelta_unpickle, object_state)

@cython.cdivision(True)
def _round(self, freq, mode):
Expand Down Expand Up @@ -1496,7 +1596,14 @@ class Timedelta(_Timedelta):

def __mul__(self, other):
if is_integer_object(other) or is_float_object(other):
return Timedelta(other * self.value, unit='ns')
if util.is_nan(other):
# np.nan * timedelta -> np.timedelta64("NaT"), in this case NaT
return NaT

return _timedelta_from_value_and_reso(
<int64_t>(other * self.value),
reso=self._reso,
)

elif is_array(other):
# ndarray-like
Expand Down
73 changes: 73 additions & 0 deletions pandas/tests/scalar/timedelta/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,79 @@
import pandas._testing as tm


class TestNonNano:
@pytest.fixture(params=[7, 8, 9])
def unit(self, request):
# 7, 8, 9 correspond to second, millisecond, and microsecond, respectively
return request.param

@pytest.fixture
def val(self, unit):
# microsecond that would be just out of bounds for nano
us = 9223372800000000
if unit == 9:
value = us
elif unit == 8:
value = us // 1000
else:
value = us // 1_000_000
return value

@pytest.fixture
def td(self, unit, val):
return Timedelta._from_value_and_reso(val, unit)

def test_from_value_and_reso(self, unit, val):
# Just checking that the fixture is giving us what we asked for
td = Timedelta._from_value_and_reso(val, unit)
assert td.value == val
assert td._reso == unit
assert td.days == 106752

def test_unary_non_nano(self, td, unit):
assert abs(td)._reso == unit
assert (-td)._reso == unit
assert (+td)._reso == unit

def test_sub_preserves_reso(self, td, unit):
res = td - td
expected = Timedelta._from_value_and_reso(0, unit)
assert res == expected
assert res._reso == unit

def test_mul_preserves_reso(self, td, unit):
# The td fixture should always be far from the implementation
# bound, so doubling does not risk overflow.
res = td * 2
assert res.value == td.value * 2
assert res._reso == unit

def test_cmp_cross_reso(self, td):
other = Timedelta(days=106751, unit="ns")
assert other < td
assert td > other
assert not other == td
assert td != other

def test_to_pytimedelta(self, td):
res = td.to_pytimedelta()
expected = timedelta(days=106752)
assert type(res) is timedelta
assert res == expected

def test_to_timedelta64(self, td, unit):
for res in [td.to_timedelta64(), td.to_numpy(), td.asm8]:

assert isinstance(res, np.timedelta64)
assert res.view("i8") == td.value
if unit == 7:
assert res.dtype == "m8[s]"
elif unit == 8:
assert res.dtype == "m8[ms]"
elif unit == 9:
assert res.dtype == "m8[us]"


class TestTimedeltaUnaryOps:
def test_invert(self):
td = Timedelta(10, unit="d")
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"):
"_libs.tslibs.timedeltas": {
"pyxfile": "_libs/tslibs/timedeltas",
"depends": tseries_depends,
"sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"],
},
"_libs.tslibs.timestamps": {
"pyxfile": "_libs/tslibs/timestamps",
Expand Down