1
1
import collections
2
+ import operator
2
3
import warnings
3
4
4
5
cimport cython
@@ -55,6 +56,7 @@ from pandas._libs.tslibs.np_datetime cimport (
55
56
pandas_timedelta_to_timedeltastruct,
56
57
pandas_timedeltastruct,
57
58
)
59
+ from pandas._libs.util cimport INT64_MAX
58
60
59
61
from pandas._libs.tslibs.np_datetime import OutOfBoundsTimedelta
60
62
@@ -216,13 +218,12 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1:
216
218
+ delta.seconds * 1 _000_000
217
219
+ delta.microseconds
218
220
) * 1000
219
- except OverflowError as err :
220
- raise OutOfBoundsTimedelta( * err.args) from err
221
-
221
+ except OverflowError as ex :
222
+ msg = f " {delta} outside allowed range [{NPY_NAT + 1}ns, {INT64_MAX}ns] "
223
+ raise OutOfBoundsTimedelta(msg) from ex
222
224
raise TypeError (type (delta))
223
225
224
226
225
- @ cython.overflowcheck (True )
226
227
cdef object ensure_td64ns(object ts):
227
228
"""
228
229
Overflow-safe implementation of td64.astype("m8[ns]")
@@ -241,24 +242,20 @@ cdef object ensure_td64ns(object ts):
241
242
str unitstr
242
243
243
244
td64_unit = get_datetime64_unit(ts)
244
- if (
245
- td64_unit != NPY_DATETIMEUNIT.NPY_FR_ns
246
- and td64_unit != NPY_DATETIMEUNIT.NPY_FR_GENERIC
247
- ):
248
- unitstr = npy_unit_to_abbrev(td64_unit)
245
+ if td64_unit == NPY_DATETIMEUNIT.NPY_FR_ns or td64_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
246
+ return ts
249
247
250
- td64_value = get_timedelta64_value(ts)
248
+ unitstr = npy_unit_to_abbrev(td64_unit)
249
+ mult = precision_from_unit(unitstr)[0 ]
251
250
252
- mult = precision_from_unit(unitstr)[ 0 ]
251
+ with cython.overflowcheck( True ):
253
252
try :
254
- # NB: cython#1381 this cannot be *=
255
- td64_value = td64_value * mult
256
- except OverflowError as err:
257
- raise OutOfBoundsTimedelta(ts ) from err
253
+ td64_value = get_timedelta64_value(ts) * mult
254
+ except OverflowError as ex:
255
+ msg = f " {ts} outside allowed range [{NPY_NAT + 1}ns, {INT64_MAX}ns] "
256
+ raise OutOfBoundsTimedelta(msg ) from ex
258
257
259
- return np.timedelta64(td64_value, " ns" )
260
-
261
- return ts
258
+ return np.timedelta64(td64_value, " ns" )
262
259
263
260
264
261
cdef convert_to_timedelta64(object ts, str unit):
@@ -674,8 +671,7 @@ cdef bint _validate_ops_compat(other):
674
671
675
672
def _op_unary_method (func , name ):
676
673
def f (self ):
677
- new_value = func(self .value)
678
- return _timedelta_from_value_and_reso(new_value, self ._reso)
674
+ return create_timedelta(func(self .value), " ignore" , self ._reso)
679
675
f.__name__ = name
680
676
return f
681
677
@@ -724,13 +720,7 @@ def _binary_op_method_timedeltalike(op, name):
724
720
if self ._reso != other._reso:
725
721
raise NotImplementedError
726
722
727
- res = op(self .value, other.value)
728
- if res == NPY_NAT:
729
- # e.g. test_implementation_limits
730
- # TODO: more generally could do an overflowcheck in op?
731
- return NaT
732
-
733
- return _timedelta_from_value_and_reso(res, reso = self ._reso)
723
+ return create_timedelta(op(self .value, other.value), " ignore" , self ._reso)
734
724
735
725
f.__name__ = name
736
726
return f
@@ -861,7 +851,7 @@ cdef _to_py_int_float(v):
861
851
862
852
863
853
def _timedelta_unpickle (value , reso ):
864
- return _timedelta_from_value_and_reso (value, reso)
854
+ return create_timedelta (value, " ignore " , reso)
865
855
866
856
867
857
cdef _timedelta_from_value_and_reso(int64_t value, NPY_DATETIMEUNIT reso):
@@ -892,6 +882,49 @@ cdef _timedelta_from_value_and_reso(int64_t value, NPY_DATETIMEUNIT reso):
892
882
return td_base
893
883
894
884
885
+ @ cython.overflowcheck (True )
886
+ cdef object create_timedelta(object value, str in_unit, NPY_DATETIMEUNIT out_reso):
887
+ """
888
+ Timedelta factory.
889
+
890
+ Timedelta.__new__ just does arg validation (at least currently). Also, some internal
891
+ functions expect to be able to create non-nano reso Timedeltas, but Timedelta.__new__
892
+ doesn't yet expose that.
893
+
894
+ _timedelta_from_value_and_reso does, but only accepts limited args, and doesn't check for overflow.
895
+ """
896
+ cdef:
897
+ int64_t out_value
898
+
899
+ if isinstance (value, _Timedelta):
900
+ return value
901
+
902
+ try :
903
+ # if unit == "ns", no need to create an m8[ns] just to read the (same) value back
904
+ # if unit == "ignore", assume caller wants to invoke an overflow-safe version of
905
+ # _timedelta_from_value_and_reso, and that any float rounding is acceptable
906
+ if (is_integer_object(value) or is_float_object(value)) and (in_unit == " ns" or in_unit == " ignore" ):
907
+ if util.is_nan(value):
908
+ return NaT
909
+ out_value = < int64_t> value
910
+ elif is_timedelta64_object(value):
911
+ out_value = ensure_td64ns(value).view(np.int64)
912
+ elif isinstance (value, str ):
913
+ if value.startswith((" P" , " -P" )):
914
+ out_value = parse_iso_format_string(value)
915
+ else :
916
+ out_value = parse_timedelta_string(value)
917
+ else :
918
+ out_value = convert_to_timedelta64(value, in_unit).view(np.int64)
919
+ except OverflowError as ex:
920
+ msg = f" {value} outside allowed range [{NPY_NAT + 1}ns, {INT64_MAX}ns]"
921
+ raise OutOfBoundsTimedelta(msg) from ex
922
+
923
+ if out_value == NPY_NAT:
924
+ return NaT
925
+ return _timedelta_from_value_and_reso(out_value, out_reso)
926
+
927
+
895
928
# Similar to Timestamp/datetime, this is a construction requirement for
896
929
# timedeltas that we need to do object instantiation in python. This will
897
930
# serve as a C extension type that shadows the Python class, where we do any
@@ -1375,7 +1408,7 @@ cdef class _Timedelta(timedelta):
1375
1408
@classmethod
1376
1409
def _from_value_and_reso(cls , int64_t value , NPY_DATETIMEUNIT reso ):
1377
1410
# exposing as classmethod for testing
1378
- return _timedelta_from_value_and_reso (value, reso)
1411
+ return create_timedelta (value, " ignore " , reso)
1379
1412
1380
1413
1381
1414
# Python front end to C extension type _Timedelta
@@ -1438,99 +1471,52 @@ class Timedelta(_Timedelta):
1438
1471
We see that either way we get the same result
1439
1472
"""
1440
1473
1441
- _req_any_kwargs_new = {" weeks" , " days" , " hours" , " minutes" , " seconds" ,
1442
- " milliseconds" , " microseconds" , " nanoseconds" }
1474
+ _allowed_kwargs = (
1475
+ " weeks" , " days" , " hours" , " minutes" , " seconds" , " milliseconds" , " microseconds" , " nanoseconds"
1476
+ )
1443
1477
1444
1478
def __new__ (cls , object value = _no_input, unit = None , **kwargs ):
1445
- cdef _Timedelta td_base
1479
+ cdef:
1480
+ _Timedelta td_base
1481
+ NPY_DATETIMEUNIT out_reso = NPY_FR_ns
1446
1482
1483
+ # process kwargs iff no value passed
1447
1484
if value is _no_input:
1448
- if not len (kwargs):
1449
- raise ValueError (" cannot construct a Timedelta without a "
1450
- " value/unit or descriptive keywords "
1451
- " (days,seconds....)" )
1452
-
1453
- kwargs = {key: _to_py_int_float(kwargs[key]) for key in kwargs}
1454
-
1455
- unsupported_kwargs = set (kwargs)
1456
- unsupported_kwargs.difference_update(cls ._req_any_kwargs_new)
1457
- if unsupported_kwargs or not cls ._req_any_kwargs_new.intersection(kwargs):
1485
+ if not kwargs:
1486
+ raise ValueError (
1487
+ " cannot construct a Timedelta without a value/unit "
1488
+ " or descriptive keywords (days,seconds....)"
1489
+ )
1490
+ if not kwargs.keys() <= set (cls ._allowed_kwargs):
1458
1491
raise ValueError (
1459
1492
" cannot construct a Timedelta from the passed arguments, "
1460
- " allowed keywords are "
1461
- " [weeks, days, hours, minutes, seconds, "
1462
- " milliseconds, microseconds, nanoseconds]"
1493
+ f" allowed keywords are {cls._allowed_kwargs}"
1463
1494
)
1464
-
1465
- # GH43764, convert any input to nanoseconds first and then
1466
- # create the timestamp. This ensures that any potential
1467
- # nanosecond contributions from kwargs parsed as floats
1468
- # are taken into consideration.
1469
- seconds = int ((
1495
+ # GH43764, convert any input to nanoseconds first, to ensure any potential
1496
+ # nanosecond contributions from kwargs parsed as floats are included
1497
+ kwargs = collections.defaultdict(int , {key: _to_py_int_float(val) for key, val in kwargs.items()})
1498
+ ns = sum (
1470
1499
(
1471
- (kwargs.get(' days' , 0 ) + kwargs.get(' weeks' , 0 ) * 7 ) * 24
1472
- + kwargs.get(' hours' , 0 )
1473
- ) * 3600
1474
- + kwargs.get(' minutes' , 0 ) * 60
1475
- + kwargs.get(' seconds' , 0 )
1476
- ) * 1 _000_000_000
1477
- )
1478
-
1479
- value = np.timedelta64(
1480
- int (kwargs.get(' nanoseconds' , 0 ))
1481
- + int (kwargs.get(' microseconds' , 0 ) * 1 _000)
1482
- + int (kwargs.get(' milliseconds' , 0 ) * 1 _000_000)
1483
- + seconds
1500
+ kwargs[" weeks" ] * 7 * 24 * 3600 * 1 _000_000_000,
1501
+ kwargs[" days" ] * 24 * 3600 * 1 _000_000_000,
1502
+ kwargs[" hours" ] * 3600 * 1 _000_000_000,
1503
+ kwargs[" minutes" ] * 60 * 1 _000_000_000,
1504
+ kwargs[" seconds" ] * 1 _000_000_000,
1505
+ kwargs[" milliseconds" ] * 1 _000_000,
1506
+ kwargs[" microseconds" ] * 1 _000,
1507
+ kwargs[" nanoseconds" ],
1508
+ )
1484
1509
)
1510
+ return create_timedelta(ns, " ns" , out_reso)
1485
1511
1486
- if unit in {' Y' , ' y' , ' M' }:
1512
+ if isinstance (value, str ) and unit is not None :
1513
+ raise ValueError (" unit must not be specified if the value is a str" )
1514
+ elif unit in {" Y" , " y" , " M" }:
1487
1515
raise ValueError (
1488
1516
" Units 'M', 'Y', and 'y' are no longer supported, as they do not "
1489
1517
" represent unambiguous timedelta values durations."
1490
1518
)
1491
-
1492
- # GH 30543 if pd.Timedelta already passed, return it
1493
- # check that only value is passed
1494
- if isinstance (value, _Timedelta) and unit is None and len (kwargs) == 0 :
1495
- return value
1496
- elif isinstance (value, _Timedelta):
1497
- value = value.value
1498
- elif isinstance (value, str ):
1499
- if unit is not None :
1500
- raise ValueError (" unit must not be specified if the value is a str" )
1501
- if (len (value) > 0 and value[0 ] == ' P' ) or (
1502
- len (value) > 1 and value[:2 ] == ' -P'
1503
- ):
1504
- value = parse_iso_format_string(value)
1505
- else :
1506
- value = parse_timedelta_string(value)
1507
- value = np.timedelta64(value)
1508
- elif PyDelta_Check(value):
1509
- value = convert_to_timedelta64(value, ' ns' )
1510
- elif is_timedelta64_object(value):
1511
- value = ensure_td64ns(value)
1512
- elif is_tick_object(value):
1513
- value = np.timedelta64(value.nanos, ' ns' )
1514
- elif is_integer_object(value) or is_float_object(value):
1515
- # unit=None is de-facto 'ns'
1516
- unit = parse_timedelta_unit(unit)
1517
- value = convert_to_timedelta64(value, unit)
1518
- elif checknull_with_nat(value):
1519
- return NaT
1520
- else :
1521
- raise ValueError (
1522
- " Value must be Timedelta, string, integer, "
1523
- f" float, timedelta or convertible, not {type(value).__name__}"
1524
- )
1525
-
1526
- if is_timedelta64_object(value):
1527
- value = value.view(' i8' )
1528
-
1529
- # nat
1530
- if value == NPY_NAT:
1531
- return NaT
1532
-
1533
- return _timedelta_from_value_and_reso(value, NPY_FR_ns)
1519
+ return create_timedelta(value, parse_timedelta_unit(unit), out_reso)
1534
1520
1535
1521
def __setstate__ (self , state ):
1536
1522
if len (state) == 1 :
@@ -1607,30 +1593,25 @@ class Timedelta(_Timedelta):
1607
1593
# Arithmetic Methods
1608
1594
# TODO: Can some of these be defined in the cython class?
1609
1595
1610
- __neg__ = _op_unary_method(lambda x : - x, ' __neg__' )
1611
- __pos__ = _op_unary_method(lambda x : x, ' __pos__' )
1612
- __abs__ = _op_unary_method(lambda x : abs (x), ' __abs__' )
1596
+ __neg__ = _op_unary_method(operator.neg, " __neg__" )
1597
+ __pos__ = _op_unary_method(operator.pos, " __pos__" )
1598
+ __abs__ = _op_unary_method(operator. abs, " __abs__" )
1613
1599
1614
- __add__ = _binary_op_method_timedeltalike(lambda x , y : x + y, ' __add__' )
1615
- __radd__ = _binary_op_method_timedeltalike(lambda x , y : x + y, ' __radd__' )
1616
- __sub__ = _binary_op_method_timedeltalike(lambda x , y : x - y, ' __sub__' )
1617
- __rsub__ = _binary_op_method_timedeltalike(lambda x , y : y - x, ' __rsub__' )
1600
+ __add__ = _binary_op_method_timedeltalike(operator.add, " __add__" )
1601
+ __radd__ = _binary_op_method_timedeltalike(operator.add, " __radd__" )
1602
+ __sub__ = _binary_op_method_timedeltalike(operator.sub, " __sub__" )
1603
+ __rsub__ = _binary_op_method_timedeltalike(lambda x , y : y - x, " __rsub__" )
1618
1604
1619
1605
def __mul__ (self , other ):
1620
- if is_integer_object(other) or is_float_object(other):
1621
- if util.is_nan(other):
1622
- # np.nan * timedelta -> np.timedelta64("NaT"), in this case NaT
1623
- return NaT
1624
-
1625
- return _timedelta_from_value_and_reso(
1626
- < int64_t> (other * self .value),
1627
- reso = self ._reso,
1628
- )
1629
-
1630
- elif is_array(other):
1606
+ if util.is_nan(other):
1607
+ # np.nan * timedelta -> np.timedelta64("NaT"), in this case NaT
1608
+ return NaT
1609
+ if is_array(other):
1631
1610
# ndarray-like
1632
1611
return other * self .to_timedelta64()
1633
-
1612
+ if is_integer_object(other) or is_float_object(other):
1613
+ # can't call Timedelta b/c it doesn't (yet) expose reso
1614
+ return create_timedelta(self .value * other, " ignore" , self ._reso)
1634
1615
return NotImplemented
1635
1616
1636
1617
__rmul__ = __mul__
@@ -1825,6 +1806,6 @@ cdef _broadcast_floordiv_td64(
1825
1806
1826
1807
1827
1808
# resolution in ns
1828
- Timedelta.min = Timedelta(np.iinfo(np.int64).min + 1 )
1829
- Timedelta.max = Timedelta(np.iinfo(np.int64).max )
1809
+ Timedelta.min = Timedelta(NPY_NAT + 1 )
1810
+ Timedelta.max = Timedelta(INT64_MAX )
1830
1811
Timedelta.resolution = Timedelta(nanoseconds = 1 )
0 commit comments