Skip to content

BUG: ensure_timedelta64ns overflows #34448

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Jul 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/reference/general_utility_functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ Exceptions and warnings
errors.NullFrequencyError
errors.NumbaUtilError
errors.OutOfBoundsDatetime
errors.OutOfBoundsTimedelta
errors.ParserError
errors.ParserWarning
errors.PerformanceWarning
Expand Down
3 changes: 2 additions & 1 deletion pandas/_libs/tslibs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"nat_strings",
"is_null_datetimelike",
"OutOfBoundsDatetime",
"OutOfBoundsTimedelta",
"IncompatibleFrequency",
"Period",
"Resolution",
Expand All @@ -26,7 +27,7 @@
]

from . import dtypes
from .conversion import localize_pydatetime
from .conversion import OutOfBoundsTimedelta, localize_pydatetime
from .dtypes import Resolution
from .nattype import NaT, NaTType, iNaT, is_null_datetimelike, nat_strings
from .np_datetime import OutOfBoundsDatetime
Expand Down
40 changes: 36 additions & 4 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,15 @@ DT64NS_DTYPE = np.dtype('M8[ns]')
TD64NS_DTYPE = np.dtype('m8[ns]')


class OutOfBoundsTimedelta(ValueError):
"""
Raised when encountering a timedelta value that cannot be represented
as a timedelta64[ns].
"""
# Timedelta analogue to OutOfBoundsDatetime
pass


# ----------------------------------------------------------------------
# Unit Conversion Helpers

Expand Down Expand Up @@ -228,11 +237,34 @@ def ensure_timedelta64ns(arr: ndarray, copy: bool=True):

Returns
-------
result : ndarray with dtype timedelta64[ns]

ndarray[timedelta64[ns]]
"""
return arr.astype(TD64NS_DTYPE, copy=copy)
# TODO: check for overflows when going from a lower-resolution to nanos
assert arr.dtype.kind == "m", arr.dtype

if arr.dtype == TD64NS_DTYPE:
return arr.copy() if copy else arr

# Re-use the datetime64 machinery to do an overflow-safe `astype`
dtype = arr.dtype.str.replace("m8", "M8")
dummy = arr.view(dtype)
try:
dt64_result = ensure_datetime64ns(dummy, copy)
except OutOfBoundsDatetime as err:
# Re-write the exception in terms of timedelta64 instead of dt64

# Find the value that we are going to report as causing an overflow
tdmin = arr.min()
tdmax = arr.max()
if np.abs(tdmin) >= np.abs(tdmax):
bad_val = tdmin
else:
bad_val = tdmax

raise OutOfBoundsTimedelta(
f"Out of bounds for nanosecond {arr.dtype.name} {bad_val}"
)

return dt64_result.view(TD64NS_DTYPE)


# ----------------------------------------------------------------------
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2302,7 +2302,8 @@ class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock):

def __init__(self, values, placement, ndim=None):
if values.dtype != TD64NS_DTYPE:
values = conversion.ensure_timedelta64ns(values)
# e.g. non-nano or int64
values = TimedeltaArray._from_sequence(values)._data
if isinstance(values, TimedeltaArray):
values = values._data
assert isinstance(values, np.ndarray), type(values)
Expand Down
2 changes: 1 addition & 1 deletion pandas/errors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from pandas._config.config import OptionError

from pandas._libs.tslibs import OutOfBoundsDatetime
from pandas._libs.tslibs import OutOfBoundsDatetime, OutOfBoundsTimedelta


class NullFrequencyError(ValueError):
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/tslibs/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def test_namespace():
"is_null_datetimelike",
"nat_strings",
"OutOfBoundsDatetime",
"OutOfBoundsTimedelta",
"Period",
"IncompatibleFrequency",
"Resolution",
Expand Down
15 changes: 14 additions & 1 deletion pandas/tests/tslibs/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,13 @@
import pytest
from pytz import UTC

from pandas._libs.tslibs import conversion, iNaT, timezones, tzconversion
from pandas._libs.tslibs import (
OutOfBoundsTimedelta,
conversion,
iNaT,
timezones,
tzconversion,
)

from pandas import Timestamp, date_range
import pandas._testing as tm
Expand Down Expand Up @@ -89,6 +95,13 @@ def test_ensure_datetime64ns_bigendian():
tm.assert_numpy_array_equal(result, expected)


def test_ensure_timedelta64ns_overflows():
arr = np.arange(10).astype("m8[Y]") * 100
msg = r"Out of bounds for nanosecond timedelta64\[Y\] 900"
with pytest.raises(OutOfBoundsTimedelta, match=msg):
conversion.ensure_timedelta64ns(arr)


class SubDatetime(datetime):
pass

Expand Down