-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
ENH: implement non-nano Timedelta scalar #46688
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
17 commits
Select commit
Hold shift + click to select a range
66e06db
ENH: implement non-nano Timedelta scalar
jbrockmendel e64bbc6
Merge branch 'main' into nano-td
jbrockmendel 6b2a2c4
troubleshoot non-mac builds
jbrockmendel bdb1d36
troubleshoot non-mac builds
jbrockmendel 371b9e3
troubleshoot non-mac builds
jbrockmendel 58a208d
troubleshoot non-mac builds
jbrockmendel e15b591
troubleshoot non-mac builds
jbrockmendel 5f95506
troubleshoot non-mac builds
jbrockmendel acea307
troubleshoot non-mac builds
jbrockmendel 5cb584a
troubleshoot non-mac builds
jbrockmendel 09456e5
troubleshoot non-mac builds
jbrockmendel 039b7ab
revert troubleshooting
jbrockmendel 3b26b7d
lint fixup
jbrockmendel 4146a8a
Merge branch 'main' into nano-td
jbrockmendel 4aee01a
comment, simplify
jbrockmendel cc07d4e
Merge branch 'main' into nano-td
jbrockmendel 6d0d714
fix hash
jbrockmendel File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -45,13 +45,19 @@ from pandas._libs.tslibs.nattype cimport ( | |
) | ||
from pandas._libs.tslibs.np_datetime cimport ( | ||
NPY_DATETIMEUNIT, | ||
NPY_FR_ns, | ||
cmp_dtstructs, | ||
cmp_scalar, | ||
get_datetime64_unit, | ||
get_timedelta64_value, | ||
npy_datetimestruct, | ||
pandas_datetime_to_datetimestruct, | ||
pandas_timedelta_to_timedeltastruct, | ||
pandas_timedeltastruct, | ||
td64_to_tdstruct, | ||
) | ||
|
||
from pandas._libs.tslibs.np_datetime import OutOfBoundsTimedelta | ||
|
||
from pandas._libs.tslibs.offsets cimport is_tick_object | ||
from pandas._libs.tslibs.util cimport ( | ||
is_array, | ||
|
@@ -176,7 +182,9 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1: | |
if is_tick_object(delta): | ||
return delta.nanos | ||
if isinstance(delta, _Timedelta): | ||
return delta.value | ||
if delta._reso == NPY_FR_ns: | ||
return delta.value | ||
raise NotImplementedError(delta._reso) | ||
|
||
if is_timedelta64_object(delta): | ||
return get_timedelta64_value(ensure_td64ns(delta)) | ||
|
@@ -251,6 +259,8 @@ cdef convert_to_timedelta64(object ts, str unit): | |
return np.timedelta64(NPY_NAT, "ns") | ||
elif isinstance(ts, _Timedelta): | ||
# already in the proper format | ||
if ts._reso != NPY_FR_ns: | ||
raise NotImplementedError | ||
ts = np.timedelta64(ts.value, "ns") | ||
elif is_timedelta64_object(ts): | ||
ts = ensure_td64ns(ts) | ||
|
@@ -643,7 +653,8 @@ cdef bint _validate_ops_compat(other): | |
|
||
def _op_unary_method(func, name): | ||
def f(self): | ||
return Timedelta(func(self.value), unit='ns') | ||
new_value = func(self.value) | ||
return _timedelta_from_value_and_reso(new_value, self._reso) | ||
f.__name__ = name | ||
return f | ||
|
||
|
@@ -688,7 +699,17 @@ def _binary_op_method_timedeltalike(op, name): | |
if other is NaT: | ||
# e.g. if original other was timedelta64('NaT') | ||
return NaT | ||
return Timedelta(op(self.value, other.value), unit='ns') | ||
|
||
if self._reso != other._reso: | ||
raise NotImplementedError | ||
|
||
res = op(self.value, other.value) | ||
if res == NPY_NAT: | ||
# e.g. test_implementation_limits | ||
# TODO: more generally could do an overflowcheck in op? | ||
return NaT | ||
|
||
return _timedelta_from_value_and_reso(res, reso=self._reso) | ||
|
||
f.__name__ = name | ||
return f | ||
|
@@ -818,6 +839,38 @@ cdef _to_py_int_float(v): | |
raise TypeError(f"Invalid type {type(v)}. Must be int or float.") | ||
|
||
|
||
def _timedelta_unpickle(value, reso): | ||
return _timedelta_from_value_and_reso(value, reso) | ||
|
||
|
||
cdef _timedelta_from_value_and_reso(int64_t value, NPY_DATETIMEUNIT reso): | ||
# Could make this a classmethod if/when cython supports cdef classmethods | ||
cdef: | ||
_Timedelta td_base | ||
|
||
if reso == NPY_FR_ns: | ||
td_base = _Timedelta.__new__(Timedelta, microseconds=int(value) // 1000) | ||
elif reso == NPY_DATETIMEUNIT.NPY_FR_us: | ||
td_base = _Timedelta.__new__(Timedelta, microseconds=int(value)) | ||
elif reso == NPY_DATETIMEUNIT.NPY_FR_ms: | ||
td_base = _Timedelta.__new__(Timedelta, milliseconds=int(value)) | ||
elif reso == NPY_DATETIMEUNIT.NPY_FR_s: | ||
td_base = _Timedelta.__new__(Timedelta, seconds=int(value)) | ||
elif reso == NPY_DATETIMEUNIT.NPY_FR_m: | ||
td_base = _Timedelta.__new__(Timedelta, minutes=int(value)) | ||
elif reso == NPY_DATETIMEUNIT.NPY_FR_h: | ||
td_base = _Timedelta.__new__(Timedelta, hours=int(value)) | ||
elif reso == NPY_DATETIMEUNIT.NPY_FR_D: | ||
td_base = _Timedelta.__new__(Timedelta, days=int(value)) | ||
else: | ||
raise NotImplementedError(reso) | ||
|
||
td_base.value = value | ||
td_base._is_populated = 0 | ||
td_base._reso = reso | ||
return td_base | ||
|
||
|
||
# Similar to Timestamp/datetime, this is a construction requirement for | ||
# timedeltas that we need to do object instantiation in python. This will | ||
# serve as a C extension type that shadows the Python class, where we do any | ||
|
@@ -827,6 +880,7 @@ cdef class _Timedelta(timedelta): | |
# int64_t value # nanoseconds | ||
# bint _is_populated # are my components populated | ||
# int64_t _d, _h, _m, _s, _ms, _us, _ns | ||
# NPY_DATETIMEUNIT _reso | ||
|
||
# higher than np.ndarray and np.matrix | ||
__array_priority__ = 100 | ||
|
@@ -853,6 +907,11 @@ cdef class _Timedelta(timedelta): | |
|
||
def __hash__(_Timedelta self): | ||
if self._has_ns(): | ||
# Note: this does *not* satisfy the invariance | ||
# td1 == td2 \\Rightarrow hash(td1) == hash(td2) | ||
# if td1 and td2 have different _resos. timedelta64 also has this | ||
# non-invariant behavior. | ||
# see GH#44504 | ||
return hash(self.value) | ||
else: | ||
return timedelta.__hash__(self) | ||
|
@@ -890,10 +949,30 @@ cdef class _Timedelta(timedelta): | |
else: | ||
return NotImplemented | ||
|
||
return cmp_scalar(self.value, ots.value, op) | ||
if self._reso == ots._reso: | ||
return cmp_scalar(self.value, ots.value, op) | ||
return self._compare_mismatched_resos(ots, op) | ||
|
||
# TODO: re-use/share with Timestamp | ||
cdef inline bint _compare_mismatched_resos(self, _Timedelta other, op): | ||
# Can't just dispatch to numpy as they silently overflow and get it wrong | ||
cdef: | ||
npy_datetimestruct dts_self | ||
npy_datetimestruct dts_other | ||
|
||
# dispatch to the datetimestruct utils instead of writing new ones! | ||
pandas_datetime_to_datetimestruct(self.value, self._reso, &dts_self) | ||
pandas_datetime_to_datetimestruct(other.value, other._reso, &dts_other) | ||
return cmp_dtstructs(&dts_self, &dts_other, op) | ||
|
||
cdef bint _has_ns(self): | ||
return self.value % 1000 != 0 | ||
if self._reso == NPY_FR_ns: | ||
return self.value % 1000 != 0 | ||
elif self._reso < NPY_FR_ns: | ||
# i.e. seconds, millisecond, microsecond | ||
return False | ||
else: | ||
raise NotImplementedError(self._reso) | ||
|
||
cdef _ensure_components(_Timedelta self): | ||
""" | ||
|
@@ -905,7 +984,7 @@ cdef class _Timedelta(timedelta): | |
cdef: | ||
pandas_timedeltastruct tds | ||
|
||
td64_to_tdstruct(self.value, &tds) | ||
pandas_timedelta_to_timedeltastruct(self.value, self._reso, &tds) | ||
self._d = tds.days | ||
self._h = tds.hrs | ||
self._m = tds.min | ||
|
@@ -937,13 +1016,24 @@ cdef class _Timedelta(timedelta): | |
----- | ||
Any nanosecond resolution will be lost. | ||
""" | ||
return timedelta(microseconds=int(self.value) / 1000) | ||
if self._reso == NPY_FR_ns: | ||
return timedelta(microseconds=int(self.value) / 1000) | ||
|
||
# TODO(@WillAyd): is this the right way to use components? | ||
self._ensure_components() | ||
return timedelta( | ||
days=self._d, seconds=self._seconds, microseconds=self._microseconds | ||
) | ||
|
||
def to_timedelta64(self) -> np.timedelta64: | ||
""" | ||
Return a numpy.timedelta64 object with 'ns' precision. | ||
""" | ||
return np.timedelta64(self.value, 'ns') | ||
cdef: | ||
str abbrev = npy_unit_to_abbrev(self._reso) | ||
# TODO: way to create a np.timedelta64 obj with the reso directly | ||
# instead of having to get the abbrev? | ||
return np.timedelta64(self.value, abbrev) | ||
|
||
def to_numpy(self, dtype=None, copy=False) -> np.timedelta64: | ||
""" | ||
|
@@ -1054,7 +1144,7 @@ cdef class _Timedelta(timedelta): | |
>>> td.asm8 | ||
numpy.timedelta64(42,'ns') | ||
""" | ||
return np.int64(self.value).view('m8[ns]') | ||
return self.to_timedelta64() | ||
|
||
@property | ||
def resolution_string(self) -> str: | ||
|
@@ -1258,6 +1348,14 @@ cdef class _Timedelta(timedelta): | |
f'H{components.minutes}M{seconds}S') | ||
return tpl | ||
|
||
# ---------------------------------------------------------------- | ||
# Constructors | ||
|
||
@classmethod | ||
def _from_value_and_reso(cls, int64_t value, NPY_DATETIMEUNIT reso): | ||
# exposing as classmethod for testing | ||
return _timedelta_from_value_and_reso(value, reso) | ||
|
||
|
||
# Python front end to C extension type _Timedelta | ||
# This serves as the box for timedelta64 | ||
|
@@ -1413,19 +1511,21 @@ class Timedelta(_Timedelta): | |
if value == NPY_NAT: | ||
return NaT | ||
|
||
# make timedelta happy | ||
td_base = _Timedelta.__new__(cls, microseconds=int(value) // 1000) | ||
td_base.value = value | ||
td_base._is_populated = 0 | ||
return td_base | ||
return _timedelta_from_value_and_reso(value, NPY_FR_ns) | ||
|
||
def __setstate__(self, state): | ||
(value) = state | ||
if len(state) == 1: | ||
# older pickle, only supported nanosecond | ||
value = state[0] | ||
reso = NPY_FR_ns | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sufficient testing on this? |
||
else: | ||
value, reso = state | ||
self.value = value | ||
self._reso = reso | ||
|
||
def __reduce__(self): | ||
object_state = self.value, | ||
return (Timedelta, object_state) | ||
object_state = self.value, self._reso | ||
return (_timedelta_unpickle, object_state) | ||
|
||
@cython.cdivision(True) | ||
def _round(self, freq, mode): | ||
|
@@ -1496,7 +1596,14 @@ class Timedelta(_Timedelta): | |
|
||
def __mul__(self, other): | ||
if is_integer_object(other) or is_float_object(other): | ||
return Timedelta(other * self.value, unit='ns') | ||
if util.is_nan(other): | ||
# np.nan * timedelta -> np.timedelta64("NaT"), in this case NaT | ||
return NaT | ||
|
||
return _timedelta_from_value_and_reso( | ||
<int64_t>(other * self.value), | ||
reso=self._reso, | ||
) | ||
|
||
elif is_array(other): | ||
# ndarray-like | ||
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@seberg is there a C-API way to create a timedelta64 object?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess
PyArray_Scalar
assuming you got the correct dtype available. (That function should only be used for NumPy dtypes IMO, but that isn't a problem)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what we have on hand is the correct NPY_DATETIMEUNIT. I guess we need to create the dtype from the unit (we have a function to go the other direction, so i guess this shouldn't be too hard to figure out). If I figure this out, I'll probably try to upstream it into numpy's
__init__.pxd
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would probably be OK to add a function that works with the unit directly for the C-API, also. But it doesn't exist yet. It seems
PyArray_Scalar
is commented out from__init__.pxd
, I am not sure if there is a reason for that.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
could the reason by that
PyArray_Scalar
's first arg isvoid *
which might not play so well with cython? i've figured out how to create the dtype object from the unit (copiedcreate_datetime_dtype_with_unit
over from multiarray/datetime.c) but so far having no luck in callingPyArray_Scalar
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe? I don't really see a good reason,
void *
seems perfectly fine, if cython doesn't like it, just usechar *
(which is likely better anyway?)But, I also don't think it is API that should be used a lot, so that may just be the reason also, that it is pretty unused.