diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index d5177075afda5..9ce10dacf33de 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -306,6 +306,7 @@ Timedelta - Bug in constructing :class:`Timedelta` from ``np.timedelta64`` objects with non-nanosecond units that are out of bounds for ``timedelta64[ns]`` (:issue:`38965`) - Bug in constructing a :class:`TimedeltaIndex` incorrectly accepting ``np.datetime64("NaT")`` objects (:issue:`39462`) - Bug in constructing :class:`Timedelta` from input string with only symbols and no digits failed to raise an error (:issue:`39710`) +- Bug in :class:`TimedeltaIndex` and :func:`to_timedelta` failing to raise when passed non-nanosecond ``timedelta64`` arrays that overflow when converting to ``timedelta64[ns]`` (:issue:`40008`) Timezones ^^^^^^^^^ diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 0646c58fa84b6..536cb63cc6119 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -239,6 +239,11 @@ def ensure_datetime64ns(arr: ndarray, copy: bool=True): return result unit = get_datetime64_unit(arr.flat[0]) + if unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC: + # without raising explicitly here, we end up with a SystemError + # built-in function ensure_datetime64ns returned a result with an error + raise ValueError("datetime64/timedelta64 must have a unit specified") + if unit == NPY_FR_ns: if copy: arr = arr.copy() diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 893644be23a0e..86d802fd90792 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -24,7 +24,10 @@ iNaT, to_offset, ) -from pandas._libs.tslibs.conversion import precision_from_unit +from pandas._libs.tslibs.conversion import ( + ensure_timedelta64ns, + precision_from_unit, +) from pandas._libs.tslibs.fields import get_timedelta_field from pandas._libs.tslibs.timedeltas import ( array_to_timedelta64, @@ -982,8 +985,7 @@ def sequence_to_td64ns(data, copy=False, unit=None, errors="raise"): elif is_timedelta64_dtype(data.dtype): if data.dtype != TD64NS_DTYPE: # non-nano unit - # TODO: watch out for overflows - data = data.astype(TD64NS_DTYPE) + data = ensure_timedelta64ns(data) copy = False else: @@ -1025,8 +1027,8 @@ def ints_to_td64ns(data, unit="ns"): dtype_str = f"timedelta64[{unit}]" data = data.view(dtype_str) - # TODO: watch out for overflows when converting from lower-resolution - data = data.astype("timedelta64[ns]") + data = ensure_timedelta64ns(data) + # the astype conversion makes a copy, so we can avoid re-copying later copy_made = True diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 2b689364c5002..248798408381e 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -765,7 +765,7 @@ def test_astype_datetime64_bad_dtype_raises(from_type, to_type): @pytest.mark.parametrize("from_type", [np.datetime64, np.timedelta64]) def test_astype_object_preserves_datetime_na(from_type): - arr = np.array([from_type("NaT")]) + arr = np.array([from_type("NaT", "ns")]) result = astype_nansafe(arr, dtype=np.dtype("object")) assert isna(result)[0] diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py index 4786b8c35a5b1..56326dd15bd9b 100644 --- a/pandas/tests/reshape/test_cut.py +++ b/pandas/tests/reshape/test_cut.py @@ -145,18 +145,28 @@ def test_bins_not_monotonic(): ), ), ( - [np.timedelta64(-1), np.timedelta64(0), np.timedelta64(1)], + [ + np.timedelta64(-1, "ns"), + np.timedelta64(0, "ns"), + np.timedelta64(1, "ns"), + ], np.array( [ - np.timedelta64(-np.iinfo(np.int64).max), - np.timedelta64(0), - np.timedelta64(np.iinfo(np.int64).max), + np.timedelta64(-np.iinfo(np.int64).max, "ns"), + np.timedelta64(0, "ns"), + np.timedelta64(np.iinfo(np.int64).max, "ns"), ] ), IntervalIndex.from_tuples( [ - (np.timedelta64(-np.iinfo(np.int64).max), np.timedelta64(0)), - (np.timedelta64(0), np.timedelta64(np.iinfo(np.int64).max)), + ( + np.timedelta64(-np.iinfo(np.int64).max, "ns"), + np.timedelta64(0, "ns"), + ), + ( + np.timedelta64(0, "ns"), + np.timedelta64(np.iinfo(np.int64).max, "ns"), + ), ] ), ), diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py index 6ff14087e6259..99ff4e8e6a8dd 100644 --- a/pandas/tests/tools/test_to_timedelta.py +++ b/pandas/tests/tools/test_to_timedelta.py @@ -6,6 +6,8 @@ import numpy as np import pytest +from pandas.errors import OutOfBoundsTimedelta + import pandas as pd from pandas import ( Series, @@ -14,6 +16,7 @@ to_timedelta, ) import pandas._testing as tm +from pandas.core.arrays import TimedeltaArray class TestTimedeltas: @@ -75,6 +78,19 @@ def test_to_timedelta(self): expected = TimedeltaIndex([np.timedelta64(1, "D")] * 5) tm.assert_index_equal(result, expected) + def test_to_timedelta_oob_non_nano(self): + arr = np.array([pd.NaT.value + 1], dtype="timedelta64[s]") + + msg = r"Out of bounds for nanosecond timedelta64\[s\] -9223372036854775807" + with pytest.raises(OutOfBoundsTimedelta, match=msg): + to_timedelta(arr) + + with pytest.raises(OutOfBoundsTimedelta, match=msg): + TimedeltaIndex(arr) + + with pytest.raises(OutOfBoundsTimedelta, match=msg): + TimedeltaArray._from_sequence(arr) + def test_to_timedelta_dataframe(self): # GH 11776 arr = np.arange(10).reshape(2, 5)