Skip to content

Commit 8cf3771

Browse files
authored
BUG: Timedelta(td64_out_of_bounds) silently overflowing (#38965)
1 parent 0f2db73 commit 8cf3771

File tree

7 files changed

+143
-28
lines changed

7 files changed

+143
-28
lines changed

doc/source/whatsnew/v1.3.0.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -198,14 +198,14 @@ Datetimelike
198198
- Bug in :class:`DataFrame` and :class:`Series` constructors sometimes dropping nanoseconds from :class:`Timestamp` (resp. :class:`Timedelta`) ``data``, with ``dtype=datetime64[ns]`` (resp. ``timedelta64[ns]``) (:issue:`38032`)
199199
- Bug in :meth:`DataFrame.first` and :meth:`Series.first` returning two months for offset one month when first day is last calendar day (:issue:`29623`)
200200
- Bug in constructing a :class:`DataFrame` or :class:`Series` with mismatched ``datetime64`` data and ``timedelta64`` dtype, or vice-versa, failing to raise ``TypeError`` (:issue:`38575`, :issue:`38764`, :issue:`38792`)
201-
- Bug in constructing a :class:`Series` or :class:`DataFrame` with a ``datetime`` object out of bounds for ``datetime64[ns]`` dtype (:issue:`38792`)
201+
- Bug in constructing a :class:`Series` or :class:`DataFrame` with a ``datetime`` object out of bounds for ``datetime64[ns]`` dtype or a ``timedelta`` object ouf of bounds for ``timedelta64[ns]`` dtype (:issue:`38792`, :issue:`38965`)
202202
- Bug in :meth:`DatetimeIndex.intersection`, :meth:`DatetimeIndex.symmetric_difference`, :meth:`PeriodIndex.intersection`, :meth:`PeriodIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38741`)
203203
- Bug in :meth:`Series.where` incorrectly casting ``datetime64`` values to ``int64`` (:issue:`37682`)
204204
-
205205

206206
Timedelta
207207
^^^^^^^^^
208-
208+
- Bug in constructing :class:`Timedelta` from ``np.timedelta64`` objects with non-nanosecond units that are out of bounds for ``timedelta64[ns]`` (:issue:`38965`)
209209
-
210210
-
211211

pandas/_libs/tslibs/np_datetime.pxd

+1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ cdef extern from "numpy/ndarraytypes.h":
4242
NPY_FR_ps
4343
NPY_FR_fs
4444
NPY_FR_as
45+
NPY_FR_GENERIC
4546

4647
cdef extern from "src/datetime/np_datetime.h":
4748
ctypedef struct pandas_timedeltastruct:

pandas/_libs/tslibs/timedeltas.pyx

+79-10
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,18 @@ PyDateTime_IMPORT
2424

2525
cimport pandas._libs.tslibs.util as util
2626
from pandas._libs.tslibs.base cimport ABCTimestamp
27-
from pandas._libs.tslibs.conversion cimport cast_from_unit
27+
from pandas._libs.tslibs.conversion cimport cast_from_unit, precision_from_unit
2828
from pandas._libs.tslibs.nattype cimport (
2929
NPY_NAT,
3030
c_NaT as NaT,
3131
c_nat_strings as nat_strings,
3232
checknull_with_nat,
3333
)
3434
from pandas._libs.tslibs.np_datetime cimport (
35+
NPY_DATETIMEUNIT,
3536
cmp_scalar,
37+
get_datetime64_unit,
38+
get_timedelta64_value,
3639
pandas_timedeltastruct,
3740
td64_to_tdstruct,
3841
)
@@ -156,7 +159,7 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1:
156159
if isinstance(delta, _Timedelta):
157160
delta = delta.value
158161
if is_timedelta64_object(delta):
159-
return delta.astype("timedelta64[ns]").item()
162+
return get_timedelta64_value(ensure_td64ns(delta))
160163
if is_integer_object(delta):
161164
return delta
162165
if PyDelta_Check(delta):
@@ -169,6 +172,72 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1:
169172
raise TypeError(type(delta))
170173

171174

175+
cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit):
176+
if unit == NPY_DATETIMEUNIT.NPY_FR_ns or unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
177+
# generic -> default to nanoseconds
178+
return "ns"
179+
elif unit == NPY_DATETIMEUNIT.NPY_FR_us:
180+
return "us"
181+
elif unit == NPY_DATETIMEUNIT.NPY_FR_ms:
182+
return "ms"
183+
elif unit == NPY_DATETIMEUNIT.NPY_FR_s:
184+
return "s"
185+
elif unit == NPY_DATETIMEUNIT.NPY_FR_m:
186+
return "m"
187+
elif unit == NPY_DATETIMEUNIT.NPY_FR_h:
188+
return "h"
189+
elif unit == NPY_DATETIMEUNIT.NPY_FR_D:
190+
return "D"
191+
elif unit == NPY_DATETIMEUNIT.NPY_FR_W:
192+
return "W"
193+
elif unit == NPY_DATETIMEUNIT.NPY_FR_M:
194+
return "M"
195+
elif unit == NPY_DATETIMEUNIT.NPY_FR_Y:
196+
return "Y"
197+
else:
198+
raise NotImplementedError(unit)
199+
200+
201+
@cython.overflowcheck(True)
202+
cdef object ensure_td64ns(object ts):
203+
"""
204+
Overflow-safe implementation of td64.astype("m8[ns]")
205+
206+
Parameters
207+
----------
208+
ts : np.timedelta64
209+
210+
Returns
211+
-------
212+
np.timedelta64[ns]
213+
"""
214+
cdef:
215+
NPY_DATETIMEUNIT td64_unit
216+
int64_t td64_value, mult
217+
str unitstr
218+
219+
td64_unit = get_datetime64_unit(ts)
220+
if (
221+
td64_unit != NPY_DATETIMEUNIT.NPY_FR_ns
222+
and td64_unit != NPY_DATETIMEUNIT.NPY_FR_GENERIC
223+
):
224+
unitstr = npy_unit_to_abbrev(td64_unit)
225+
226+
td64_value = get_timedelta64_value(ts)
227+
228+
mult = precision_from_unit(unitstr)[0]
229+
try:
230+
# NB: cython#1381 this cannot be *=
231+
td64_value = td64_value * mult
232+
except OverflowError as err:
233+
from pandas._libs.tslibs.conversion import OutOfBoundsTimedelta
234+
raise OutOfBoundsTimedelta(ts)
235+
236+
return np.timedelta64(td64_value, "ns")
237+
238+
return ts
239+
240+
172241
cdef convert_to_timedelta64(object ts, str unit):
173242
"""
174243
Convert an incoming object to a timedelta64 if possible.
@@ -184,37 +253,37 @@ cdef convert_to_timedelta64(object ts, str unit):
184253
Return an ns based int64
185254
"""
186255
if checknull_with_nat(ts):
187-
return np.timedelta64(NPY_NAT)
256+
return np.timedelta64(NPY_NAT, "ns")
188257
elif isinstance(ts, _Timedelta):
189258
# already in the proper format
190-
ts = np.timedelta64(ts.value)
259+
ts = np.timedelta64(ts.value, "ns")
191260
elif is_datetime64_object(ts):
192261
# only accept a NaT here
193262
if ts.astype('int64') == NPY_NAT:
194263
return np.timedelta64(NPY_NAT)
195264
elif is_timedelta64_object(ts):
196-
ts = ts.astype(f"m8[{unit.lower()}]")
265+
ts = ensure_td64ns(ts)
197266
elif is_integer_object(ts):
198267
if ts == NPY_NAT:
199-
return np.timedelta64(NPY_NAT)
268+
return np.timedelta64(NPY_NAT, "ns")
200269
else:
201270
if unit in ['Y', 'M', 'W']:
202271
ts = np.timedelta64(ts, unit)
203272
else:
204273
ts = cast_from_unit(ts, unit)
205-
ts = np.timedelta64(ts)
274+
ts = np.timedelta64(ts, "ns")
206275
elif is_float_object(ts):
207276
if unit in ['Y', 'M', 'W']:
208277
ts = np.timedelta64(int(ts), unit)
209278
else:
210279
ts = cast_from_unit(ts, unit)
211-
ts = np.timedelta64(ts)
280+
ts = np.timedelta64(ts, "ns")
212281
elif isinstance(ts, str):
213282
if len(ts) > 0 and ts[0] == 'P':
214283
ts = parse_iso_format_string(ts)
215284
else:
216285
ts = parse_timedelta_string(ts)
217-
ts = np.timedelta64(ts)
286+
ts = np.timedelta64(ts, "ns")
218287
elif is_tick_object(ts):
219288
ts = np.timedelta64(ts.nanos, 'ns')
220289

@@ -1196,7 +1265,7 @@ class Timedelta(_Timedelta):
11961265
elif is_timedelta64_object(value):
11971266
if unit is not None:
11981267
value = value.astype(f'timedelta64[{unit}]')
1199-
value = value.astype('timedelta64[ns]')
1268+
value = ensure_td64ns(value)
12001269
elif is_tick_object(value):
12011270
value = np.timedelta64(value.nanos, 'ns')
12021271
elif is_integer_object(value) or is_float_object(value):

pandas/core/dtypes/cast.py

+16-4
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from pandas._libs.tslibs import (
2929
NaT,
3030
OutOfBoundsDatetime,
31+
OutOfBoundsTimedelta,
3132
Period,
3233
Timedelta,
3334
Timestamp,
@@ -743,8 +744,12 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj,
743744
val = val.value
744745

745746
elif isinstance(val, (np.timedelta64, timedelta)):
746-
val = Timedelta(val).value
747-
dtype = np.dtype("m8[ns]")
747+
try:
748+
val = Timedelta(val).value
749+
except (OutOfBoundsTimedelta, OverflowError):
750+
dtype = np.dtype(object)
751+
else:
752+
dtype = np.dtype("m8[ns]")
748753

749754
elif is_bool(val):
750755
dtype = np.dtype(np.bool_)
@@ -1386,7 +1391,7 @@ def try_timedelta(v):
13861391

13871392
try:
13881393
td_values = to_timedelta(v)
1389-
except ValueError:
1394+
except (ValueError, OverflowError):
13901395
return v.reshape(shape)
13911396
else:
13921397
return np.asarray(td_values).reshape(shape)
@@ -1618,8 +1623,16 @@ def construct_2d_arraylike_from_scalar(
16181623
value: Scalar, length: int, width: int, dtype: np.dtype, copy: bool
16191624
) -> np.ndarray:
16201625

1626+
shape = (length, width)
1627+
16211628
if dtype.kind in ["m", "M"]:
16221629
value = maybe_unbox_datetimelike(value, dtype)
1630+
elif dtype == object:
1631+
if isinstance(value, (np.timedelta64, np.datetime64)):
1632+
# calling np.array below would cast to pytimedelta/pydatetime
1633+
out = np.empty(shape, dtype=object)
1634+
out.fill(value)
1635+
return out
16231636

16241637
# Attempt to coerce to a numpy array
16251638
try:
@@ -1632,7 +1645,6 @@ def construct_2d_arraylike_from_scalar(
16321645
if arr.ndim != 0:
16331646
raise ValueError("DataFrame constructor not properly called!")
16341647

1635-
shape = (length, width)
16361648
return np.full(shape, arr)
16371649

16381650

pandas/tests/frame/test_constructors.py

+17-11
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import pytest
1111
import pytz
1212

13-
from pandas.compat.numpy import _np_version_under1p19, _np_version_under1p20
13+
from pandas.compat.numpy import _np_version_under1p19
1414

1515
from pandas.core.dtypes.common import is_integer_dtype
1616
from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype, PeriodDtype
@@ -2371,16 +2371,10 @@ def test_from_timedelta_scalar_preserves_nanos(self, constructor):
23712371
def test_from_timestamp_scalar_preserves_nanos(self, constructor):
23722372
ts = Timestamp.now() + Timedelta(1)
23732373

2374-
obj = Series(ts, index=range(1), dtype="M8[ns]")
2374+
obj = constructor(ts, dtype="M8[ns]")
23752375
assert get1(obj) == ts
23762376

2377-
def test_from_timedelta64_scalar_object(self, constructor, request):
2378-
if getattr(constructor, "func", None) is DataFrame and _np_version_under1p20:
2379-
# getattr check means we only xfail when box is None
2380-
mark = pytest.mark.xfail(
2381-
reason="np.array(td64, dtype=object) converts to int"
2382-
)
2383-
request.node.add_marker(mark)
2377+
def test_from_timedelta64_scalar_object(self, constructor):
23842378

23852379
td = Timedelta(1)
23862380
td64 = td.to_timedelta64()
@@ -2407,8 +2401,20 @@ def test_from_scalar_datetimelike_mismatched(self, constructor, cls, request):
24072401
with pytest.raises(TypeError, match="Cannot cast"):
24082402
constructor(scalar, dtype=dtype)
24092403

2410-
def test_from_out_of_bounds_datetime(self, constructor):
2404+
@pytest.mark.parametrize("cls", [datetime, np.datetime64])
2405+
def test_from_out_of_bounds_datetime(self, constructor, cls):
24112406
scalar = datetime(9999, 1, 1)
2407+
if cls is np.datetime64:
2408+
scalar = np.datetime64(scalar, "D")
2409+
result = constructor(scalar)
2410+
2411+
assert type(get1(result)) is cls
2412+
2413+
@pytest.mark.parametrize("cls", [timedelta, np.timedelta64])
2414+
def test_from_out_of_bounds_timedelta(self, constructor, cls):
2415+
scalar = datetime(9999, 1, 1) - datetime(1970, 1, 1)
2416+
if cls is np.timedelta64:
2417+
scalar = np.timedelta64(scalar, "D")
24122418
result = constructor(scalar)
24132419

2414-
assert type(get1(result)) is datetime
2420+
assert type(get1(result)) is cls

pandas/tests/scalar/timedelta/test_constructors.py

+27
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import numpy as np
44
import pytest
55

6+
from pandas._libs.tslibs import OutOfBoundsTimedelta
7+
68
from pandas import Timedelta, offsets, to_timedelta
79

810

@@ -197,6 +199,31 @@ def test_overflow_on_construction():
197199
Timedelta(timedelta(days=13 * 19999))
198200

199201

202+
def test_construction_out_of_bounds_td64():
203+
# TODO: parametrize over units just above/below the implementation bounds
204+
# once GH#38964 is resolved
205+
206+
# Timedelta.max is just under 106752 days
207+
td64 = np.timedelta64(106752, "D")
208+
assert td64.astype("m8[ns]").view("i8") < 0 # i.e. naive astype will be wrong
209+
210+
msg = "106752 days"
211+
with pytest.raises(OutOfBoundsTimedelta, match=msg):
212+
Timedelta(td64)
213+
214+
# But just back in bounds and we are OK
215+
assert Timedelta(td64 - 1) == td64 - 1
216+
217+
td64 *= -1
218+
assert td64.astype("m8[ns]").view("i8") > 0 # i.e. naive astype will be wrong
219+
220+
with pytest.raises(OutOfBoundsTimedelta, match=msg):
221+
Timedelta(td64)
222+
223+
# But just back in bounds and we are OK
224+
assert Timedelta(td64 + 1) == td64 + 1
225+
226+
200227
@pytest.mark.parametrize(
201228
"fmt,exp",
202229
[

pandas/util/_exceptions.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ def rewrite_exception(old_name: str, new_name: str):
1010
try:
1111
yield
1212
except Exception as err:
13-
msg = err.args[0]
13+
msg = str(err.args[0])
1414
msg = msg.replace(old_name, new_name)
1515
args: Tuple[str, ...] = (msg,)
1616
if len(err.args) > 1:

0 commit comments

Comments
 (0)