From 005f5b41b866a7af271442f1967bdfd2daf254a9 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 28 May 2020 17:39:18 -0700 Subject: [PATCH 1/6] BUG: ensure_timedelta64ns overflows --- pandas/_libs/tslibs/conversion.pyx | 15 +++++++++++---- pandas/core/internals/blocks.py | 3 ++- pandas/tests/tslibs/test_conversion.py | 14 +++++++++++++- 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 8fd2f6b476e1c..7e520cfd5c1ef 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -217,11 +217,18 @@ def ensure_timedelta64ns(arr: ndarray, copy: bool=True): Returns ------- - result : ndarray with dtype timedelta64[ns] - + ndarray[timedelta64[ns]] """ - return arr.astype(TD64NS_DTYPE, copy=copy) - # TODO: check for overflows when going from a lower-resolution to nanos + assert arr.dtype.kind == "m", arr.dtype + + if arr.dtype == TD64NS_DTYPE: + return arr.copy() if copy else arr + + # Re-use the datetime64 machinery to do an overflow-safe `astype` + dtype = arr.dtype.str.replace("m8", "M8") + dummy = arr.view(dtype) + dt64_result = ensure_datetime64ns(dummy, copy) + return dt64_result.view(TD64NS_DTYPE) # ---------------------------------------------------------------------- diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index d880bd81bd947..a1d8ac67acb9e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2312,7 +2312,8 @@ class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock): def __init__(self, values, placement, ndim=None): if values.dtype != TD64NS_DTYPE: - values = conversion.ensure_timedelta64ns(values) + # e.g. non-nano or int64 + values = TimedeltaArray._from_sequence(values)._data if isinstance(values, TimedeltaArray): values = values._data assert isinstance(values, np.ndarray), type(values) diff --git a/pandas/tests/tslibs/test_conversion.py b/pandas/tests/tslibs/test_conversion.py index fd8c9df026674..d8b9eb96d3264 100644 --- a/pandas/tests/tslibs/test_conversion.py +++ b/pandas/tests/tslibs/test_conversion.py @@ -4,7 +4,13 @@ import pytest from pytz import UTC -from pandas._libs.tslibs import conversion, iNaT, timezones, tzconversion +from pandas._libs.tslibs import ( + OutOfBoundsDatetime, + conversion, + iNaT, + timezones, + tzconversion, +) from pandas import Timestamp, date_range import pandas._testing as tm @@ -80,6 +86,12 @@ def test_ensure_datetime64ns_bigendian(): tm.assert_numpy_array_equal(result, expected) +def test_ensure_timedelta64ns_overflows(): + arr = np.arange(10).astype("m8[Y]") * 100 + with pytest.raises(OutOfBoundsDatetime, match="Out of bounds"): + conversion.ensure_timedelta64ns(arr) + + class SubDatetime(datetime): pass From 3356d907c97abb2a03112d40e59fe080b5ac37dd Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 7 Jun 2020 14:07:50 -0700 Subject: [PATCH 2/6] TST: fix error message to be about timedelta64 --- pandas/_libs/tslibs/conversion.pyx | 19 ++++++++++++++++++- pandas/tests/tslibs/test_conversion.py | 3 ++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 106b4e0d81991..9c447b89b8ae6 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -227,7 +227,24 @@ def ensure_timedelta64ns(arr: ndarray, copy: bool=True): # Re-use the datetime64 machinery to do an overflow-safe `astype` dtype = arr.dtype.str.replace("m8", "M8") dummy = arr.view(dtype) - dt64_result = ensure_datetime64ns(dummy, copy) + try: + dt64_result = ensure_datetime64ns(dummy, copy) + except OutOfBoundsDatetime as err: + # Re-write the exception in terms of timedelta64 instead of dt64 + + # Find the value that we are going to report as causing an overflow + tdmin = arr.min() + tdmax = arr.max() + if np.abs(tdmin) >= np.abs(tdmax): + bad_val = tdmin + else: + bad_val = tdmax + + unit_str = arr.dtype.str.split("[")[-1][:-1] + raise OutOfBoundsDatetime( + f"Out of bounds for nanosecond timedelta {bad_val}[{unit_str}]" + ) + return dt64_result.view(TD64NS_DTYPE) diff --git a/pandas/tests/tslibs/test_conversion.py b/pandas/tests/tslibs/test_conversion.py index d8b9eb96d3264..3530d6300abac 100644 --- a/pandas/tests/tslibs/test_conversion.py +++ b/pandas/tests/tslibs/test_conversion.py @@ -88,7 +88,8 @@ def test_ensure_datetime64ns_bigendian(): def test_ensure_timedelta64ns_overflows(): arr = np.arange(10).astype("m8[Y]") * 100 - with pytest.raises(OutOfBoundsDatetime, match="Out of bounds"): + msg = r"Out of bounds for nanosecond timedelta 900\[Y\]" + with pytest.raises(OutOfBoundsDatetime, match=msg): conversion.ensure_timedelta64ns(arr) From a73f9c214bec558720d5e482a59dcc4aad5928d0 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 21 Jun 2020 14:34:35 -0700 Subject: [PATCH 3/6] implement OutOfBoundsTimedelta --- pandas/_libs/tslibs/__init__.py | 3 ++- pandas/_libs/tslibs/conversion.pyx | 7 ++++++- pandas/tests/tslibs/test_api.py | 1 + pandas/tests/tslibs/test_conversion.py | 4 ++-- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py index 6f173a4542bb0..7a243654b66ba 100644 --- a/pandas/_libs/tslibs/__init__.py +++ b/pandas/_libs/tslibs/__init__.py @@ -7,6 +7,7 @@ "nat_strings", "is_null_datetimelike", "OutOfBoundsDatetime", + "OutOfBoundsTimedelta", "IncompatibleFrequency", "Period", "Resolution", @@ -19,7 +20,7 @@ ] from . import dtypes -from .conversion import localize_pydatetime +from .conversion import OutOfBoundsTimedelta, localize_pydatetime from .nattype import NaT, NaTType, iNaT, is_null_datetimelike, nat_strings from .np_datetime import OutOfBoundsDatetime from .offsets import to_offset diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index b540bbe01a4e7..7c73975db5db6 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -52,6 +52,11 @@ DT64NS_DTYPE = np.dtype('M8[ns]') TD64NS_DTYPE = np.dtype('m8[ns]') +class OutOfBoundsTimedelta(ValueError): + # Timedelta analogue to OutOfBoundsDatetime + pass + + # ---------------------------------------------------------------------- # Unit Conversion Helpers @@ -242,7 +247,7 @@ def ensure_timedelta64ns(arr: ndarray, copy: bool=True): bad_val = tdmax unit_str = arr.dtype.str.split("[")[-1][:-1] - raise OutOfBoundsDatetime( + raise OutOfBoundsTimedelta( f"Out of bounds for nanosecond timedelta {bad_val}[{unit_str}]" ) diff --git a/pandas/tests/tslibs/test_api.py b/pandas/tests/tslibs/test_api.py index a119db6c68635..2788da19c1485 100644 --- a/pandas/tests/tslibs/test_api.py +++ b/pandas/tests/tslibs/test_api.py @@ -31,6 +31,7 @@ def test_namespace(): "is_null_datetimelike", "nat_strings", "OutOfBoundsDatetime", + "OutOfBoundsTimedelta", "Period", "IncompatibleFrequency", "Resolution", diff --git a/pandas/tests/tslibs/test_conversion.py b/pandas/tests/tslibs/test_conversion.py index 3530d6300abac..c35b39c00e751 100644 --- a/pandas/tests/tslibs/test_conversion.py +++ b/pandas/tests/tslibs/test_conversion.py @@ -5,7 +5,7 @@ from pytz import UTC from pandas._libs.tslibs import ( - OutOfBoundsDatetime, + OutOfBoundsTimedelta, conversion, iNaT, timezones, @@ -89,7 +89,7 @@ def test_ensure_datetime64ns_bigendian(): def test_ensure_timedelta64ns_overflows(): arr = np.arange(10).astype("m8[Y]") * 100 msg = r"Out of bounds for nanosecond timedelta 900\[Y\]" - with pytest.raises(OutOfBoundsDatetime, match=msg): + with pytest.raises(OutOfBoundsTimedelta, match=msg): conversion.ensure_timedelta64ns(arr) From 663108025400cfea3b88a4a582df3c08a3d7a516 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 22 Jun 2020 14:48:27 -0700 Subject: [PATCH 4/6] add OOBTD to pd.errors --- pandas/errors/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index e3427d93f3d84..6ac3004d29996 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -6,7 +6,7 @@ from pandas._config.config import OptionError -from pandas._libs.tslibs import OutOfBoundsDatetime +from pandas._libs.tslibs import OutOfBoundsDatetime, OutOfBoundsTimedelta class NullFrequencyError(ValueError): From 409e7e6be122593b3de2a930bc21afb73da249a8 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 6 Jul 2020 10:29:32 -0700 Subject: [PATCH 5/6] docstring, add to general_utility_functions --- doc/source/reference/general_utility_functions.rst | 1 + pandas/_libs/tslibs/conversion.pyx | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/doc/source/reference/general_utility_functions.rst b/doc/source/reference/general_utility_functions.rst index 72a84217323ab..c1759110b94ad 100644 --- a/doc/source/reference/general_utility_functions.rst +++ b/doc/source/reference/general_utility_functions.rst @@ -43,6 +43,7 @@ Exceptions and warnings errors.NullFrequencyError errors.NumbaUtilError errors.OutOfBoundsDatetime + errors.OutOfBoundsTimedelta errors.ParserError errors.ParserWarning errors.PerformanceWarning diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 64ab649496805..218f6d3d09ff4 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -53,6 +53,10 @@ TD64NS_DTYPE = np.dtype('m8[ns]') class OutOfBoundsTimedelta(ValueError): + """ + Raised when encountering a timedelta value that cannot be represented + as a timedelta64[ns]. + """ # Timedelta analogue to OutOfBoundsDatetime pass From e87b5a2d337cbc75aea80e19756864b7c64031e6 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 9 Jul 2020 21:05:39 -0700 Subject: [PATCH 6/6] update exception message --- pandas/_libs/tslibs/conversion.pyx | 3 +-- pandas/tests/tslibs/test_conversion.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 1b83343804ba0..85da7a60a029a 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -260,9 +260,8 @@ def ensure_timedelta64ns(arr: ndarray, copy: bool=True): else: bad_val = tdmax - unit_str = arr.dtype.str.split("[")[-1][:-1] raise OutOfBoundsTimedelta( - f"Out of bounds for nanosecond timedelta {bad_val}[{unit_str}]" + f"Out of bounds for nanosecond {arr.dtype.name} {bad_val}" ) return dt64_result.view(TD64NS_DTYPE) diff --git a/pandas/tests/tslibs/test_conversion.py b/pandas/tests/tslibs/test_conversion.py index d88d4aa1d852d..4f184b78f34a1 100644 --- a/pandas/tests/tslibs/test_conversion.py +++ b/pandas/tests/tslibs/test_conversion.py @@ -97,7 +97,7 @@ def test_ensure_datetime64ns_bigendian(): def test_ensure_timedelta64ns_overflows(): arr = np.arange(10).astype("m8[Y]") * 100 - msg = r"Out of bounds for nanosecond timedelta 900\[Y\]" + msg = r"Out of bounds for nanosecond timedelta64\[Y\] 900" with pytest.raises(OutOfBoundsTimedelta, match=msg): conversion.ensure_timedelta64ns(arr)