Skip to content

Commit 4ea3efe

Browse files
authored
BUG: pd.Timedelta(big_int, unit=W) silent overflow (#47268)
* BUG: pd.Timedelta(big_int, unit=W) silent overflow * GH ref * note exception changes in whatsnew
1 parent 019574d commit 4ea3efe

File tree

7 files changed

+66
-40
lines changed

7 files changed

+66
-40
lines changed

doc/source/whatsnew/v1.5.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,7 @@ Other API changes
436436
The ``auth_local_webserver = False`` option is planned to stop working in
437437
October 2022. (:issue:`46312`)
438438
- :func:`read_json` now raises ``FileNotFoundError`` (previously ``ValueError``) when input is a string ending in ``.json``, ``.json.gz``, ``.json.bz2``, etc. but no such file exists. (:issue:`29102`)
439+
- Operations with :class:`Timestamp` or :class:`Timedelta` that would previously raise ``OverflowError`` instead raise ``OutOfBoundsDatetime`` or ``OutOfBoundsTimedelta`` where appropriate (:issue:`47268`)
439440
-
440441

441442
.. ---------------------------------------------------------------------------
@@ -736,6 +737,7 @@ Timedelta
736737
^^^^^^^^^
737738
- Bug in :func:`astype_nansafe` astype("timedelta64[ns]") fails when np.nan is included (:issue:`45798`)
738739
- Bug in constructing a :class:`Timedelta` with a ``np.timedelta64`` object and a ``unit`` sometimes silently overflowing and returning incorrect results instead of raising ``OutOfBoundsTimedelta`` (:issue:`46827`)
740+
- Bug in constructing a :class:`Timedelta` from a large integer or float with ``unit="W"`` silently overflowing and returning incorrect results instead of raising ``OutOfBoundsTimedelta`` (:issue:`47268`)
739741
-
740742

741743
Time Zones

pandas/_libs/tslibs/timedeltas.pyx

+34-28
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,8 @@ cdef convert_to_timedelta64(object ts, str unit):
316316
317317
Return an ns based int64
318318
"""
319+
# Caller is responsible for checking unit not in ["Y", "y", "M"]
320+
319321
if checknull_with_nat(ts):
320322
return np.timedelta64(NPY_NAT, "ns")
321323
elif isinstance(ts, _Timedelta):
@@ -329,17 +331,9 @@ cdef convert_to_timedelta64(object ts, str unit):
329331
if ts == NPY_NAT:
330332
return np.timedelta64(NPY_NAT, "ns")
331333
else:
332-
if unit in ["Y", "M", "W"]:
333-
ts = np.timedelta64(ts, unit)
334-
else:
335-
ts = cast_from_unit(ts, unit)
336-
ts = np.timedelta64(ts, "ns")
334+
ts = _maybe_cast_from_unit(ts, unit)
337335
elif is_float_object(ts):
338-
if unit in ["Y", "M", "W"]:
339-
ts = np.timedelta64(int(ts), unit)
340-
else:
341-
ts = cast_from_unit(ts, unit)
342-
ts = np.timedelta64(ts, "ns")
336+
ts = _maybe_cast_from_unit(ts, unit)
343337
elif isinstance(ts, str):
344338
if (len(ts) > 0 and ts[0] == "P") or (len(ts) > 1 and ts[:2] == "-P"):
345339
ts = parse_iso_format_string(ts)
@@ -356,6 +350,20 @@ cdef convert_to_timedelta64(object ts, str unit):
356350
return ts.astype("timedelta64[ns]")
357351

358352

353+
cdef _maybe_cast_from_unit(ts, str unit):
354+
# caller is responsible for checking
355+
# assert unit not in ["Y", "y", "M"]
356+
try:
357+
ts = cast_from_unit(ts, unit)
358+
except OverflowError as err:
359+
raise OutOfBoundsTimedelta(
360+
f"Cannot cast {ts} from {unit} to 'ns' without overflow."
361+
) from err
362+
363+
ts = np.timedelta64(ts, "ns")
364+
return ts
365+
366+
359367
@cython.boundscheck(False)
360368
@cython.wraparound(False)
361369
def array_to_timedelta64(
@@ -370,6 +378,8 @@ def array_to_timedelta64(
370378
-------
371379
np.ndarray[timedelta64ns]
372380
"""
381+
# Caller is responsible for checking
382+
assert unit not in ["Y", "y", "M"]
373383

374384
cdef:
375385
Py_ssize_t i, n = values.size
@@ -652,24 +662,20 @@ cdef inline timedelta_from_spec(object number, object frac, object unit):
652662
cdef:
653663
str n
654664

655-
try:
656-
unit = ''.join(unit)
657-
658-
if unit in ["M", "Y", "y"]:
659-
warnings.warn(
660-
"Units 'M', 'Y' and 'y' do not represent unambiguous "
661-
"timedelta values and will be removed in a future version.",
662-
FutureWarning,
663-
stacklevel=2,
664-
)
665+
unit = ''.join(unit)
666+
if unit in ["M", "Y", "y"]:
667+
warnings.warn(
668+
"Units 'M', 'Y' and 'y' do not represent unambiguous "
669+
"timedelta values and will be removed in a future version.",
670+
FutureWarning,
671+
stacklevel=3,
672+
)
665673

666-
if unit == 'M':
667-
# To parse ISO 8601 string, 'M' should be treated as minute,
668-
# not month
669-
unit = 'm'
670-
unit = parse_timedelta_unit(unit)
671-
except KeyError:
672-
raise ValueError(f"invalid abbreviation: {unit}")
674+
if unit == 'M':
675+
# To parse ISO 8601 string, 'M' should be treated as minute,
676+
# not month
677+
unit = 'm'
678+
unit = parse_timedelta_unit(unit)
673679

674680
n = ''.join(number) + '.' + ''.join(frac)
675681
return cast_from_unit(float(n), unit)
@@ -696,7 +702,7 @@ cpdef inline str parse_timedelta_unit(str unit):
696702
return unit
697703
try:
698704
return timedelta_abbrevs[unit.lower()]
699-
except (KeyError, AttributeError):
705+
except KeyError:
700706
raise ValueError(f"invalid unit abbreviation: {unit}")
701707

702708
# ----------------------------------------------------------------------

pandas/_libs/tslibs/timestamps.pyx

+5-2
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,10 @@ from pandas._libs.tslibs.np_datetime cimport (
9090
pydatetime_to_dt64,
9191
)
9292

93-
from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime
93+
from pandas._libs.tslibs.np_datetime import (
94+
OutOfBoundsDatetime,
95+
OutOfBoundsTimedelta,
96+
)
9497

9598
from pandas._libs.tslibs.offsets cimport (
9699
BaseOffset,
@@ -455,7 +458,7 @@ cdef class _Timestamp(ABCTimestamp):
455458
# Timedelta
456459
try:
457460
return Timedelta(self.value - other.value)
458-
except (OverflowError, OutOfBoundsDatetime) as err:
461+
except (OverflowError, OutOfBoundsDatetime, OutOfBoundsTimedelta) as err:
459462
if isinstance(other, _Timestamp):
460463
if both_timestamps:
461464
raise OutOfBoundsDatetime(

pandas/core/arrays/timedeltas.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,8 @@ def _from_sequence_not_strict(
270270
if dtype:
271271
_validate_td64_dtype(dtype)
272272

273+
assert unit not in ["Y", "y", "M"] # caller is responsible for checking
274+
273275
explicit_none = freq is None
274276
freq = freq if freq is not lib.no_default else None
275277

@@ -923,6 +925,8 @@ def sequence_to_td64ns(
923925
errors to be ignored; they are caught and subsequently ignored at a
924926
higher level.
925927
"""
928+
assert unit not in ["Y", "y", "M"] # caller is responsible for checking
929+
926930
inferred_freq = None
927931
if unit is not None:
928932
unit = parse_timedelta_unit(unit)
@@ -954,7 +958,7 @@ def sequence_to_td64ns(
954958
# Convert whatever we have into timedelta64[ns] dtype
955959
if is_object_dtype(data.dtype) or is_string_dtype(data.dtype):
956960
# no need to make a copy, need to convert if string-dtyped
957-
data = objects_to_td64ns(data, unit=unit, errors=errors)
961+
data = _objects_to_td64ns(data, unit=unit, errors=errors)
958962
copy = False
959963

960964
elif is_integer_dtype(data.dtype):
@@ -1032,7 +1036,7 @@ def ints_to_td64ns(data, unit="ns"):
10321036
return data, copy_made
10331037

10341038

1035-
def objects_to_td64ns(data, unit=None, errors="raise"):
1039+
def _objects_to_td64ns(data, unit=None, errors="raise"):
10361040
"""
10371041
Convert a object-dtyped or string-dtyped array into an
10381042
timedelta64[ns]-dtyped array.

pandas/tests/scalar/timedelta/test_arithmetic.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,8 @@ def test_td_add_datetimelike_scalar(self, op):
9999
assert result is NaT
100100

101101
def test_td_add_timestamp_overflow(self):
102-
msg = "int too (large|big) to convert"
103-
with pytest.raises(OverflowError, match=msg):
102+
msg = "Cannot cast 259987 from D to 'ns' without overflow"
103+
with pytest.raises(OutOfBoundsTimedelta, match=msg):
104104
Timestamp("1700-01-01") + Timedelta(13 * 19999, unit="D")
105105

106106
msg = "Cannot cast 259987 days, 0:00:00 to unit=ns without overflow"

pandas/tests/scalar/timedelta/test_constructors.py

+13-4
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,15 @@
1313
)
1414

1515

16+
def test_construct_with_weeks_unit_overflow():
17+
# GH#47268 don't silently wrap around
18+
with pytest.raises(OutOfBoundsTimedelta, match="without overflow"):
19+
Timedelta(1000000000000000000, unit="W")
20+
21+
with pytest.raises(OutOfBoundsTimedelta, match="without overflow"):
22+
Timedelta(1000000000000000000.0, unit="W")
23+
24+
1625
def test_construct_from_td64_with_unit():
1726
# ignore the unit, as it may cause silently overflows leading to incorrect
1827
# results, and in non-overflow cases is irrelevant GH#46827
@@ -204,15 +213,15 @@ def test_td_from_repr_roundtrip(val):
204213

205214

206215
def test_overflow_on_construction():
207-
msg = "int too (large|big) to convert"
208-
209216
# GH#3374
210217
value = Timedelta("1day").value * 20169940
211-
with pytest.raises(OverflowError, match=msg):
218+
msg = "Cannot cast 1742682816000000000000 from ns to 'ns' without overflow"
219+
with pytest.raises(OutOfBoundsTimedelta, match=msg):
212220
Timedelta(value)
213221

214222
# xref GH#17637
215-
with pytest.raises(OverflowError, match=msg):
223+
msg = "Cannot cast 139993 from D to 'ns' without overflow"
224+
with pytest.raises(OutOfBoundsTimedelta, match=msg):
216225
Timedelta(7 * 19999, unit="D")
217226

218227
msg = "Cannot cast 259987 days, 0:00:00 to unit=ns without overflow"

pandas/tests/scalar/timedelta/test_timedelta.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -744,10 +744,12 @@ def test_implementation_limits(self):
744744
td = Timedelta(min_td.value - 1, "ns")
745745
assert td is NaT
746746

747-
with pytest.raises(OverflowError, match=msg):
747+
msg = "Cannot cast -9223372036854775809 from ns to 'ns' without overflow"
748+
with pytest.raises(OutOfBoundsTimedelta, match=msg):
748749
Timedelta(min_td.value - 2, "ns")
749750

750-
with pytest.raises(OverflowError, match=msg):
751+
msg = "Cannot cast 9223372036854775808 from ns to 'ns' without overflow"
752+
with pytest.raises(OutOfBoundsTimedelta, match=msg):
751753
Timedelta(max_td.value + 1, "ns")
752754

753755
def test_total_seconds_precision(self):

0 commit comments

Comments
 (0)