Skip to content

Commit 4b58319

Browse files
extract functional updates from #46936
1 parent a853022 commit 4b58319

File tree

2 files changed

+122
-139
lines changed

2 files changed

+122
-139
lines changed

pandas/_libs/tslibs/timedeltas.pyx

+110-129
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import collections
2+
import operator
23
import warnings
34

45
cimport cython
@@ -55,6 +56,7 @@ from pandas._libs.tslibs.np_datetime cimport (
5556
pandas_timedelta_to_timedeltastruct,
5657
pandas_timedeltastruct,
5758
)
59+
from pandas._libs.util cimport INT64_MAX
5860

5961
from pandas._libs.tslibs.np_datetime import OutOfBoundsTimedelta
6062

@@ -216,13 +218,12 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1:
216218
+ delta.seconds * 1_000_000
217219
+ delta.microseconds
218220
) * 1000
219-
except OverflowError as err:
220-
raise OutOfBoundsTimedelta(*err.args) from err
221-
221+
except OverflowError as ex:
222+
msg = f"{delta} outside allowed range [{NPY_NAT + 1}ns, {INT64_MAX}ns]"
223+
raise OutOfBoundsTimedelta(msg) from ex
222224
raise TypeError(type(delta))
223225

224226

225-
@cython.overflowcheck(True)
226227
cdef object ensure_td64ns(object ts):
227228
"""
228229
Overflow-safe implementation of td64.astype("m8[ns]")
@@ -241,24 +242,20 @@ cdef object ensure_td64ns(object ts):
241242
str unitstr
242243

243244
td64_unit = get_datetime64_unit(ts)
244-
if (
245-
td64_unit != NPY_DATETIMEUNIT.NPY_FR_ns
246-
and td64_unit != NPY_DATETIMEUNIT.NPY_FR_GENERIC
247-
):
248-
unitstr = npy_unit_to_abbrev(td64_unit)
245+
if td64_unit == NPY_DATETIMEUNIT.NPY_FR_ns or td64_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
246+
return ts
249247

250-
td64_value = get_timedelta64_value(ts)
248+
unitstr = npy_unit_to_abbrev(td64_unit)
249+
mult = precision_from_unit(unitstr)[0]
251250

252-
mult = precision_from_unit(unitstr)[0]
251+
with cython.overflowcheck(True):
253252
try:
254-
# NB: cython#1381 this cannot be *=
255-
td64_value = td64_value * mult
256-
except OverflowError as err:
257-
raise OutOfBoundsTimedelta(ts) from err
253+
td64_value = get_timedelta64_value(ts) * mult
254+
except OverflowError as ex:
255+
msg = f"{ts} outside allowed range [{NPY_NAT + 1}ns, {INT64_MAX}ns]"
256+
raise OutOfBoundsTimedelta(msg) from ex
258257

259-
return np.timedelta64(td64_value, "ns")
260-
261-
return ts
258+
return np.timedelta64(td64_value, "ns")
262259

263260

264261
cdef convert_to_timedelta64(object ts, str unit):
@@ -674,8 +671,7 @@ cdef bint _validate_ops_compat(other):
674671

675672
def _op_unary_method(func, name):
676673
def f(self):
677-
new_value = func(self.value)
678-
return _timedelta_from_value_and_reso(new_value, self._reso)
674+
return create_timedelta(func(self.value), "ignore", self._reso)
679675
f.__name__ = name
680676
return f
681677

@@ -724,13 +720,7 @@ def _binary_op_method_timedeltalike(op, name):
724720
if self._reso != other._reso:
725721
raise NotImplementedError
726722

727-
res = op(self.value, other.value)
728-
if res == NPY_NAT:
729-
# e.g. test_implementation_limits
730-
# TODO: more generally could do an overflowcheck in op?
731-
return NaT
732-
733-
return _timedelta_from_value_and_reso(res, reso=self._reso)
723+
return create_timedelta(op(self.value, other.value), "ignore", self._reso)
734724

735725
f.__name__ = name
736726
return f
@@ -861,7 +851,7 @@ cdef _to_py_int_float(v):
861851

862852

863853
def _timedelta_unpickle(value, reso):
864-
return _timedelta_from_value_and_reso(value, reso)
854+
return create_timedelta(value, "ignore", reso)
865855

866856

867857
cdef _timedelta_from_value_and_reso(int64_t value, NPY_DATETIMEUNIT reso):
@@ -892,6 +882,49 @@ cdef _timedelta_from_value_and_reso(int64_t value, NPY_DATETIMEUNIT reso):
892882
return td_base
893883

894884

885+
@cython.overflowcheck(True)
886+
cdef object create_timedelta(object value, str in_unit, NPY_DATETIMEUNIT out_reso):
887+
"""
888+
Timedelta factory.
889+
890+
Timedelta.__new__ just does arg validation (at least currently). Also, some internal
891+
functions expect to be able to create non-nano reso Timedeltas, but Timedelta.__new__
892+
doesn't yet expose that.
893+
894+
_timedelta_from_value_and_reso does, but only accepts limited args, and doesn't check for overflow.
895+
"""
896+
cdef:
897+
int64_t out_value
898+
899+
if isinstance(value, _Timedelta):
900+
return value
901+
902+
try:
903+
# if unit == "ns", no need to create an m8[ns] just to read the (same) value back
904+
# if unit == "ignore", assume caller wants to invoke an overflow-safe version of
905+
# _timedelta_from_value_and_reso, and that any float rounding is acceptable
906+
if (is_integer_object(value) or is_float_object(value)) and (in_unit == "ns" or in_unit == "ignore"):
907+
if util.is_nan(value):
908+
return NaT
909+
out_value = <int64_t>value
910+
elif is_timedelta64_object(value):
911+
out_value = ensure_td64ns(value).view(np.int64)
912+
elif isinstance(value, str):
913+
if value.startswith(("P", "-P")):
914+
out_value = parse_iso_format_string(value)
915+
else:
916+
out_value = parse_timedelta_string(value)
917+
else:
918+
out_value = convert_to_timedelta64(value, in_unit).view(np.int64)
919+
except OverflowError as ex:
920+
msg = f"{value} outside allowed range [{NPY_NAT + 1}ns, {INT64_MAX}ns]"
921+
raise OutOfBoundsTimedelta(msg) from ex
922+
923+
if out_value == NPY_NAT:
924+
return NaT
925+
return _timedelta_from_value_and_reso(out_value, out_reso)
926+
927+
895928
# Similar to Timestamp/datetime, this is a construction requirement for
896929
# timedeltas that we need to do object instantiation in python. This will
897930
# serve as a C extension type that shadows the Python class, where we do any
@@ -1375,7 +1408,7 @@ cdef class _Timedelta(timedelta):
13751408
@classmethod
13761409
def _from_value_and_reso(cls, int64_t value, NPY_DATETIMEUNIT reso):
13771410
# exposing as classmethod for testing
1378-
return _timedelta_from_value_and_reso(value, reso)
1411+
return create_timedelta(value, "ignore", reso)
13791412

13801413

13811414
# Python front end to C extension type _Timedelta
@@ -1438,99 +1471,52 @@ class Timedelta(_Timedelta):
14381471
We see that either way we get the same result
14391472
"""
14401473

1441-
_req_any_kwargs_new = {"weeks", "days", "hours", "minutes", "seconds",
1442-
"milliseconds", "microseconds", "nanoseconds"}
1474+
_allowed_kwargs = (
1475+
"weeks", "days", "hours", "minutes", "seconds", "milliseconds", "microseconds", "nanoseconds"
1476+
)
14431477

14441478
def __new__(cls, object value=_no_input, unit=None, **kwargs):
1445-
cdef _Timedelta td_base
1479+
cdef:
1480+
_Timedelta td_base
1481+
NPY_DATETIMEUNIT out_reso = NPY_FR_ns
14461482

1483+
# process kwargs iff no value passed
14471484
if value is _no_input:
1448-
if not len(kwargs):
1449-
raise ValueError("cannot construct a Timedelta without a "
1450-
"value/unit or descriptive keywords "
1451-
"(days,seconds....)")
1452-
1453-
kwargs = {key: _to_py_int_float(kwargs[key]) for key in kwargs}
1454-
1455-
unsupported_kwargs = set(kwargs)
1456-
unsupported_kwargs.difference_update(cls._req_any_kwargs_new)
1457-
if unsupported_kwargs or not cls._req_any_kwargs_new.intersection(kwargs):
1485+
if not kwargs:
1486+
raise ValueError(
1487+
"cannot construct a Timedelta without a value/unit "
1488+
"or descriptive keywords (days,seconds....)"
1489+
)
1490+
if not kwargs.keys() <= set(cls._allowed_kwargs):
14581491
raise ValueError(
14591492
"cannot construct a Timedelta from the passed arguments, "
1460-
"allowed keywords are "
1461-
"[weeks, days, hours, minutes, seconds, "
1462-
"milliseconds, microseconds, nanoseconds]"
1493+
f"allowed keywords are {cls._allowed_kwargs}"
14631494
)
1464-
1465-
# GH43764, convert any input to nanoseconds first and then
1466-
# create the timestamp. This ensures that any potential
1467-
# nanosecond contributions from kwargs parsed as floats
1468-
# are taken into consideration.
1469-
seconds = int((
1495+
# GH43764, convert any input to nanoseconds first, to ensure any potential
1496+
# nanosecond contributions from kwargs parsed as floats are included
1497+
kwargs = collections.defaultdict(int, {key: _to_py_int_float(val) for key, val in kwargs.items()})
1498+
ns = sum(
14701499
(
1471-
(kwargs.get('days', 0) + kwargs.get('weeks', 0) * 7) * 24
1472-
+ kwargs.get('hours', 0)
1473-
) * 3600
1474-
+ kwargs.get('minutes', 0) * 60
1475-
+ kwargs.get('seconds', 0)
1476-
) * 1_000_000_000
1477-
)
1478-
1479-
value = np.timedelta64(
1480-
int(kwargs.get('nanoseconds', 0))
1481-
+ int(kwargs.get('microseconds', 0) * 1_000)
1482-
+ int(kwargs.get('milliseconds', 0) * 1_000_000)
1483-
+ seconds
1500+
kwargs["weeks"] * 7 * 24 * 3600 * 1_000_000_000,
1501+
kwargs["days"] * 24 * 3600 * 1_000_000_000,
1502+
kwargs["hours"] * 3600 * 1_000_000_000,
1503+
kwargs["minutes"] * 60 * 1_000_000_000,
1504+
kwargs["seconds"] * 1_000_000_000,
1505+
kwargs["milliseconds"] * 1_000_000,
1506+
kwargs["microseconds"] * 1_000,
1507+
kwargs["nanoseconds"],
1508+
)
14841509
)
1510+
return create_timedelta(ns, "ns", out_reso)
14851511

1486-
if unit in {'Y', 'y', 'M'}:
1512+
if isinstance(value, str) and unit is not None:
1513+
raise ValueError("unit must not be specified if the value is a str")
1514+
elif unit in {"Y", "y", "M"}:
14871515
raise ValueError(
14881516
"Units 'M', 'Y', and 'y' are no longer supported, as they do not "
14891517
"represent unambiguous timedelta values durations."
14901518
)
1491-
1492-
# GH 30543 if pd.Timedelta already passed, return it
1493-
# check that only value is passed
1494-
if isinstance(value, _Timedelta) and unit is None and len(kwargs) == 0:
1495-
return value
1496-
elif isinstance(value, _Timedelta):
1497-
value = value.value
1498-
elif isinstance(value, str):
1499-
if unit is not None:
1500-
raise ValueError("unit must not be specified if the value is a str")
1501-
if (len(value) > 0 and value[0] == 'P') or (
1502-
len(value) > 1 and value[:2] == '-P'
1503-
):
1504-
value = parse_iso_format_string(value)
1505-
else:
1506-
value = parse_timedelta_string(value)
1507-
value = np.timedelta64(value)
1508-
elif PyDelta_Check(value):
1509-
value = convert_to_timedelta64(value, 'ns')
1510-
elif is_timedelta64_object(value):
1511-
value = ensure_td64ns(value)
1512-
elif is_tick_object(value):
1513-
value = np.timedelta64(value.nanos, 'ns')
1514-
elif is_integer_object(value) or is_float_object(value):
1515-
# unit=None is de-facto 'ns'
1516-
unit = parse_timedelta_unit(unit)
1517-
value = convert_to_timedelta64(value, unit)
1518-
elif checknull_with_nat(value):
1519-
return NaT
1520-
else:
1521-
raise ValueError(
1522-
"Value must be Timedelta, string, integer, "
1523-
f"float, timedelta or convertible, not {type(value).__name__}"
1524-
)
1525-
1526-
if is_timedelta64_object(value):
1527-
value = value.view('i8')
1528-
1529-
# nat
1530-
if value == NPY_NAT:
1531-
return NaT
1532-
1533-
return _timedelta_from_value_and_reso(value, NPY_FR_ns)
1519+
return create_timedelta(value, parse_timedelta_unit(unit), out_reso)
15341520

15351521
def __setstate__(self, state):
15361522
if len(state) == 1:
@@ -1607,30 +1593,25 @@ class Timedelta(_Timedelta):
16071593
# Arithmetic Methods
16081594
# TODO: Can some of these be defined in the cython class?
16091595

1610-
__neg__ = _op_unary_method(lambda x: -x, '__neg__')
1611-
__pos__ = _op_unary_method(lambda x: x, '__pos__')
1612-
__abs__ = _op_unary_method(lambda x: abs(x), '__abs__')
1596+
__neg__ = _op_unary_method(operator.neg, "__neg__")
1597+
__pos__ = _op_unary_method(operator.pos, "__pos__")
1598+
__abs__ = _op_unary_method(operator.abs, "__abs__")
16131599

1614-
__add__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__add__')
1615-
__radd__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__radd__')
1616-
__sub__ = _binary_op_method_timedeltalike(lambda x, y: x - y, '__sub__')
1617-
__rsub__ = _binary_op_method_timedeltalike(lambda x, y: y - x, '__rsub__')
1600+
__add__ = _binary_op_method_timedeltalike(operator.add, "__add__")
1601+
__radd__ = _binary_op_method_timedeltalike(operator.add, "__radd__")
1602+
__sub__ = _binary_op_method_timedeltalike(operator.sub, "__sub__")
1603+
__rsub__ = _binary_op_method_timedeltalike(lambda x, y: y - x, "__rsub__")
16181604

16191605
def __mul__(self, other):
1620-
if is_integer_object(other) or is_float_object(other):
1621-
if util.is_nan(other):
1622-
# np.nan * timedelta -> np.timedelta64("NaT"), in this case NaT
1623-
return NaT
1624-
1625-
return _timedelta_from_value_and_reso(
1626-
<int64_t>(other * self.value),
1627-
reso=self._reso,
1628-
)
1629-
1630-
elif is_array(other):
1606+
if util.is_nan(other):
1607+
# np.nan * timedelta -> np.timedelta64("NaT"), in this case NaT
1608+
return NaT
1609+
if is_array(other):
16311610
# ndarray-like
16321611
return other * self.to_timedelta64()
1633-
1612+
if is_integer_object(other) or is_float_object(other):
1613+
# can't call Timedelta b/c it doesn't (yet) expose reso
1614+
return create_timedelta(self.value * other, "ignore", self._reso)
16341615
return NotImplemented
16351616

16361617
__rmul__ = __mul__
@@ -1825,6 +1806,6 @@ cdef _broadcast_floordiv_td64(
18251806

18261807

18271808
# resolution in ns
1828-
Timedelta.min = Timedelta(np.iinfo(np.int64).min + 1)
1829-
Timedelta.max = Timedelta(np.iinfo(np.int64).max)
1809+
Timedelta.min = Timedelta(NPY_NAT + 1)
1810+
Timedelta.max = Timedelta(INT64_MAX)
18301811
Timedelta.resolution = Timedelta(nanoseconds=1)

pandas/_libs/tslibs/timestamps.pyx

+12-10
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,10 @@ from pandas._libs.tslibs.np_datetime cimport (
9090
pydatetime_to_dt64,
9191
)
9292

93-
from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime
93+
from pandas._libs.tslibs.np_datetime import (
94+
OutOfBoundsDatetime,
95+
OutOfBoundsTimedelta,
96+
)
9497

9598
from pandas._libs.tslibs.offsets cimport (
9699
BaseOffset,
@@ -435,14 +438,13 @@ cdef class _Timestamp(ABCTimestamp):
435438
# Timedelta
436439
try:
437440
return Timedelta(self.value - other.value)
438-
except (OverflowError, OutOfBoundsDatetime) as err:
439-
if isinstance(other, _Timestamp):
440-
if both_timestamps:
441-
raise OutOfBoundsDatetime(
442-
"Result is too large for pandas.Timedelta. Convert inputs "
443-
"to datetime.datetime with 'Timestamp.to_pydatetime()' "
444-
"before subtracting."
445-
) from err
441+
except OutOfBoundsTimedelta as err:
442+
if both_timestamps:
443+
raise OutOfBoundsTimedelta(
444+
"Result is too large for pandas.Timedelta. Convert inputs "
445+
"to datetime.datetime with 'Timestamp.to_pydatetime()' "
446+
"before subtracting."
447+
) from err
446448
# We get here in stata tests, fall back to stdlib datetime
447449
# method and return stdlib timedelta object
448450
pass
@@ -461,7 +463,7 @@ cdef class _Timestamp(ABCTimestamp):
461463
if PyDateTime_Check(other):
462464
try:
463465
return type(self)(other) - self
464-
except (OverflowError, OutOfBoundsDatetime) as err:
466+
except (OverflowError, OutOfBoundsDatetime, OutOfBoundsTimedelta) as err:
465467
# We get here in stata tests, fall back to stdlib datetime
466468
# method and return stdlib timedelta object
467469
pass

0 commit comments

Comments
 (0)