diff --git a/doc/source/reference/general_utility_functions.rst b/doc/source/reference/general_utility_functions.rst index 72a84217323ab..c1759110b94ad 100644 --- a/doc/source/reference/general_utility_functions.rst +++ b/doc/source/reference/general_utility_functions.rst @@ -43,6 +43,7 @@ Exceptions and warnings errors.NullFrequencyError errors.NumbaUtilError errors.OutOfBoundsDatetime + errors.OutOfBoundsTimedelta errors.ParserError errors.ParserWarning errors.PerformanceWarning diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py index 0ae4cc97d07e3..7723140e3eab1 100644 --- a/pandas/_libs/tslibs/__init__.py +++ b/pandas/_libs/tslibs/__init__.py @@ -7,6 +7,7 @@ "nat_strings", "is_null_datetimelike", "OutOfBoundsDatetime", + "OutOfBoundsTimedelta", "IncompatibleFrequency", "Period", "Resolution", @@ -26,7 +27,7 @@ ] from . import dtypes -from .conversion import localize_pydatetime +from .conversion import OutOfBoundsTimedelta, localize_pydatetime from .dtypes import Resolution from .nattype import NaT, NaTType, iNaT, is_null_datetimelike, nat_strings from .np_datetime import OutOfBoundsDatetime diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 31d2d0e9572f5..85da7a60a029a 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -51,6 +51,15 @@ DT64NS_DTYPE = np.dtype('M8[ns]') TD64NS_DTYPE = np.dtype('m8[ns]') +class OutOfBoundsTimedelta(ValueError): + """ + Raised when encountering a timedelta value that cannot be represented + as a timedelta64[ns]. + """ + # Timedelta analogue to OutOfBoundsDatetime + pass + + # ---------------------------------------------------------------------- # Unit Conversion Helpers @@ -228,11 +237,34 @@ def ensure_timedelta64ns(arr: ndarray, copy: bool=True): Returns ------- - result : ndarray with dtype timedelta64[ns] - + ndarray[timedelta64[ns]] """ - return arr.astype(TD64NS_DTYPE, copy=copy) - # TODO: check for overflows when going from a lower-resolution to nanos + assert arr.dtype.kind == "m", arr.dtype + + if arr.dtype == TD64NS_DTYPE: + return arr.copy() if copy else arr + + # Re-use the datetime64 machinery to do an overflow-safe `astype` + dtype = arr.dtype.str.replace("m8", "M8") + dummy = arr.view(dtype) + try: + dt64_result = ensure_datetime64ns(dummy, copy) + except OutOfBoundsDatetime as err: + # Re-write the exception in terms of timedelta64 instead of dt64 + + # Find the value that we are going to report as causing an overflow + tdmin = arr.min() + tdmax = arr.max() + if np.abs(tdmin) >= np.abs(tdmax): + bad_val = tdmin + else: + bad_val = tdmax + + raise OutOfBoundsTimedelta( + f"Out of bounds for nanosecond {arr.dtype.name} {bad_val}" + ) + + return dt64_result.view(TD64NS_DTYPE) # ---------------------------------------------------------------------- diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index d8779dae7c384..6a4b3318d3aa7 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2302,7 +2302,8 @@ class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock): def __init__(self, values, placement, ndim=None): if values.dtype != TD64NS_DTYPE: - values = conversion.ensure_timedelta64ns(values) + # e.g. non-nano or int64 + values = TimedeltaArray._from_sequence(values)._data if isinstance(values, TimedeltaArray): values = values._data assert isinstance(values, np.ndarray), type(values) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index e3427d93f3d84..6ac3004d29996 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -6,7 +6,7 @@ from pandas._config.config import OptionError -from pandas._libs.tslibs import OutOfBoundsDatetime +from pandas._libs.tslibs import OutOfBoundsDatetime, OutOfBoundsTimedelta class NullFrequencyError(ValueError): diff --git a/pandas/tests/tslibs/test_api.py b/pandas/tests/tslibs/test_api.py index 036037032031a..eca444c9ceb34 100644 --- a/pandas/tests/tslibs/test_api.py +++ b/pandas/tests/tslibs/test_api.py @@ -32,6 +32,7 @@ def test_namespace(): "is_null_datetimelike", "nat_strings", "OutOfBoundsDatetime", + "OutOfBoundsTimedelta", "Period", "IncompatibleFrequency", "Resolution", diff --git a/pandas/tests/tslibs/test_conversion.py b/pandas/tests/tslibs/test_conversion.py index b35940c6bb95b..4f184b78f34a1 100644 --- a/pandas/tests/tslibs/test_conversion.py +++ b/pandas/tests/tslibs/test_conversion.py @@ -4,7 +4,13 @@ import pytest from pytz import UTC -from pandas._libs.tslibs import conversion, iNaT, timezones, tzconversion +from pandas._libs.tslibs import ( + OutOfBoundsTimedelta, + conversion, + iNaT, + timezones, + tzconversion, +) from pandas import Timestamp, date_range import pandas._testing as tm @@ -89,6 +95,13 @@ def test_ensure_datetime64ns_bigendian(): tm.assert_numpy_array_equal(result, expected) +def test_ensure_timedelta64ns_overflows(): + arr = np.arange(10).astype("m8[Y]") * 100 + msg = r"Out of bounds for nanosecond timedelta64\[Y\] 900" + with pytest.raises(OutOfBoundsTimedelta, match=msg): + conversion.ensure_timedelta64ns(arr) + + class SubDatetime(datetime): pass