From 44c3a107c2ea9a2d429daa8422de491a618dbe5f Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 17 Feb 2018 10:52:41 -0800 Subject: [PATCH 1/6] Fix wraparound/overflow in date_range --- doc/source/whatsnew/v0.23.0.txt | 2 +- pandas/core/indexes/datetimes.py | 23 +++++++++++++++++-- .../indexes/datetimes/test_date_range.py | 6 +++++ 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index a2198d9103528..df3029ef00903 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -716,7 +716,7 @@ Datetimelike - Bug in :class:`Timestamp` and :func:`to_datetime` where a string representing a barely out-of-bounds timestamp would be incorrectly rounded down instead of raising ``OutOfBoundsDatetime`` (:issue:`19382`) - Bug in :func:`Timestamp.floor` :func:`DatetimeIndex.floor` where time stamps far in the future and past were not rounded correctly (:issue:`19206`) - Bug in :func:`to_datetime` where passing an out-of-bounds datetime with ``errors='coerce'`` and ``utc=True`` would raise ``OutOfBoundsDatetime`` instead of parsing to ``NaT`` (:issue:`19612`) -- +- Bug in :func:`date_range` where dates sufficiently far in the future could wrap around to the past instead of raising ``OutOfBoundsDatetime`` (:issue:``) Timezones ^^^^^^^^^ diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index cc9ce1f3fd5eb..e72fd0a1d636a 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -2137,11 +2137,30 @@ def _generate_regular_range(start, end, periods, offset): tz = start.tz elif start is not None: b = Timestamp(start).value - e = b + np.int64(periods) * stride + try: + with np.errstate(over='raise'): + # raise instead of incorrectly wrapping around + e = b + np.int64(periods) * stride + except (FloatingPointError, OverflowError): + raise libts.OutOfBoundsDatetime('Cannot generate range with ' + 'start={start} and ' + 'periods={periods}' + .format(start=start, + periods=periods)) + tz = start.tz elif end is not None: e = Timestamp(end).value + stride - b = e - np.int64(periods) * stride + try: + with np.errstate(over='raise'): + # raise instead of incorrectly wrapping around + b = e - np.int64(periods) * stride + except (FloatingPointError, OverflowError): + raise libts.OutOfBoundsDatetime('Cannot generate range with ' + 'start={start} and ' + 'periods={periods}' + .format(start=start, + periods=periods)) tz = end.tz else: raise ValueError("at least 'start' or 'end' should be specified " diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 3738398d017f8..66c18ddfe1f75 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -13,6 +13,7 @@ import pandas.util.testing as tm import pandas.util._test_decorators as td from pandas import compat +from pandas.errors import OutOfBoundsDatetime from pandas import date_range, bdate_range, offsets, DatetimeIndex, Timestamp from pandas.tseries.offsets import (generate_range, CDay, BDay, DateOffset, MonthEnd, prefix_mapping) @@ -78,6 +79,11 @@ def test_date_range_timestamp_equiv_preserve_frequency(self): class TestDateRanges(TestData): + def test_date_range_out_of_bounds(self): + with pytest.raises(OutOfBoundsDatetime): + date_range('2016-01-01', periods=100000, freq='D') + with pytest.raises(OutOfBoundsDatetime): + date_range(end='1763-10-12', periods=100000, freq='D') def test_date_range_gen_error(self): rng = date_range('1/1/2000 00:00', '1/1/2000 00:18', freq='5min') From db37391cdd131fdff092f3939ae88692a2a5b506 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 17 Feb 2018 10:53:19 -0800 Subject: [PATCH 2/6] edit whatsnew --- doc/source/whatsnew/v0.23.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index df3029ef00903..579ec35bc5c1b 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -716,7 +716,7 @@ Datetimelike - Bug in :class:`Timestamp` and :func:`to_datetime` where a string representing a barely out-of-bounds timestamp would be incorrectly rounded down instead of raising ``OutOfBoundsDatetime`` (:issue:`19382`) - Bug in :func:`Timestamp.floor` :func:`DatetimeIndex.floor` where time stamps far in the future and past were not rounded correctly (:issue:`19206`) - Bug in :func:`to_datetime` where passing an out-of-bounds datetime with ``errors='coerce'`` and ``utc=True`` would raise ``OutOfBoundsDatetime`` instead of parsing to ``NaT`` (:issue:`19612`) -- Bug in :func:`date_range` where dates sufficiently far in the future could wrap around to the past instead of raising ``OutOfBoundsDatetime`` (:issue:``) +- Bug in :func:`date_range` with frequency of ``Day`` or higher where dates sufficiently far in the future could wrap around to the past instead of raising ``OutOfBoundsDatetime`` (:issue:``) Timezones ^^^^^^^^^ From b88aff0468a4f187b1143031c2dd212ad52bcdaa Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 17 Feb 2018 10:54:21 -0800 Subject: [PATCH 3/6] add GH reference --- doc/source/whatsnew/v0.23.0.txt | 2 +- pandas/core/indexes/datetimes.py | 4 ++-- pandas/tests/indexes/datetimes/test_date_range.py | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 579ec35bc5c1b..fac007febd29f 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -716,7 +716,7 @@ Datetimelike - Bug in :class:`Timestamp` and :func:`to_datetime` where a string representing a barely out-of-bounds timestamp would be incorrectly rounded down instead of raising ``OutOfBoundsDatetime`` (:issue:`19382`) - Bug in :func:`Timestamp.floor` :func:`DatetimeIndex.floor` where time stamps far in the future and past were not rounded correctly (:issue:`19206`) - Bug in :func:`to_datetime` where passing an out-of-bounds datetime with ``errors='coerce'`` and ``utc=True`` would raise ``OutOfBoundsDatetime`` instead of parsing to ``NaT`` (:issue:`19612`) -- Bug in :func:`date_range` with frequency of ``Day`` or higher where dates sufficiently far in the future could wrap around to the past instead of raising ``OutOfBoundsDatetime`` (:issue:``) +- Bug in :func:`date_range` with frequency of ``Day`` or higher where dates sufficiently far in the future could wrap around to the past instead of raising ``OutOfBoundsDatetime`` (:issue:`19740`) Timezones ^^^^^^^^^ diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index e72fd0a1d636a..37e2532ab2129 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -2139,7 +2139,7 @@ def _generate_regular_range(start, end, periods, offset): b = Timestamp(start).value try: with np.errstate(over='raise'): - # raise instead of incorrectly wrapping around + # raise instead of incorrectly wrapping around GH#19740 e = b + np.int64(periods) * stride except (FloatingPointError, OverflowError): raise libts.OutOfBoundsDatetime('Cannot generate range with ' @@ -2153,7 +2153,7 @@ def _generate_regular_range(start, end, periods, offset): e = Timestamp(end).value + stride try: with np.errstate(over='raise'): - # raise instead of incorrectly wrapping around + # raise instead of incorrectly wrapping around GH#19740 b = e - np.int64(periods) * stride except (FloatingPointError, OverflowError): raise libts.OutOfBoundsDatetime('Cannot generate range with ' diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 66c18ddfe1f75..dda68335e03b2 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -80,6 +80,7 @@ def test_date_range_timestamp_equiv_preserve_frequency(self): class TestDateRanges(TestData): def test_date_range_out_of_bounds(self): + # GH#19740 with pytest.raises(OutOfBoundsDatetime): date_range('2016-01-01', periods=100000, freq='D') with pytest.raises(OutOfBoundsDatetime): From 54bd77598b88923e35c4eefc4214d404352eb942 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 18 Feb 2018 15:00:58 -0800 Subject: [PATCH 4/6] wrap checked_add_with_arr --- pandas/core/indexes/datetimes.py | 61 +++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 21 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 37e2532ab2129..0d86b32c719fe 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -2137,30 +2137,11 @@ def _generate_regular_range(start, end, periods, offset): tz = start.tz elif start is not None: b = Timestamp(start).value - try: - with np.errstate(over='raise'): - # raise instead of incorrectly wrapping around GH#19740 - e = b + np.int64(periods) * stride - except (FloatingPointError, OverflowError): - raise libts.OutOfBoundsDatetime('Cannot generate range with ' - 'start={start} and ' - 'periods={periods}' - .format(start=start, - periods=periods)) - + e = _reraise_overflow_as_oob(b, periods, stride, side='start') tz = start.tz elif end is not None: e = Timestamp(end).value + stride - try: - with np.errstate(over='raise'): - # raise instead of incorrectly wrapping around GH#19740 - b = e - np.int64(periods) * stride - except (FloatingPointError, OverflowError): - raise libts.OutOfBoundsDatetime('Cannot generate range with ' - 'start={start} and ' - 'periods={periods}' - .format(start=start, - periods=periods)) + b = _reraise_overflow_as_oob(e, periods, stride, side='end') tz = end.tz else: raise ValueError("at least 'start' or 'end' should be specified " @@ -2185,6 +2166,44 @@ def _generate_regular_range(start, end, periods, offset): return data +def _reraise_overflow_as_oob(endpoint, periods, stride, side='start'): + """ + Calculate the second endpoint for passing to np.arange, checking + to avoid an integer overflow. Catch OverflowError and re-raise + as OutOfBoundsDatetime. + + Parameters + ---------- + endpoint : int + periods : int + stride : int + side : {'start', 'end'} + + Returns + ------- + other_end : int + + Raises + ------ + OutOfBoundsDatetime + """ + # GH#19740 raise instead of incorrectly wrapping around + assert side in ['start', 'end'] + if side == 'end': + stride *= -1 + + try: + other_end = checked_add_with_arr(np.int64(endpoint), + np.int64(periods) * stride) + except OverflowError: + raise libts.OutOfBoundsDatetime('Cannot generate range with ' + '{side}={endpoint} and ' + 'periods={periods}' + .format(side=side, endpoint=endpoint, + periods=periods)) + return other_end + + def date_range(start=None, end=None, periods=None, freq='D', tz=None, normalize=False, name=None, closed=None, **kwargs): """ From 25c79de0ff88e80cb9545360c776874bcacb7d95 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 25 Oct 2018 19:17:38 -0700 Subject: [PATCH 5/6] Fixup GH references --- doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/core/arrays/datetimes.py | 2 +- pandas/tests/indexes/datetimes/test_date_range.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index bd1b10eafe4fb..2d9db656b7c78 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1021,7 +1021,7 @@ Datetimelike - Bug in :class:`PeriodIndex` where adding or subtracting a :class:`timedelta` or :class:`Tick` object produced incorrect results (:issue:`22988`) - Bug in :func:`date_range` when decrementing a start date to a past end date by a negative frequency (:issue:`23270`) - Bug in :func:`DataFrame.combine` with datetimelike values raising a TypeError (:issue:`23079`) -- Bug in :func:`date_range` with frequency of ``Day`` or higher where dates sufficiently far in the future could wrap around to the past instead of raising ``OutOfBoundsDatetime`` (:issue:`19740`) +- Bug in :func:`date_range` with frequency of ``Day`` or higher where dates sufficiently far in the future could wrap around to the past instead of raising ``OutOfBoundsDatetime`` (:issue:`14187`) Timedelta ^^^^^^^^^ diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 39d3d4119376d..e39589660aca6 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1393,7 +1393,7 @@ def _generate_range_overflow_safe(endpoint, periods, stride, side='start'): ------ OutOfBoundsDatetime """ - # GH#19740 raise instead of incorrectly wrapping around + # GH#14187 raise instead of incorrectly wrapping around assert side in ['start', 'end'] if side == 'end': stride *= -1 diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index a45b6a5bf296b..6496a38326acf 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -80,7 +80,7 @@ def test_date_range_timestamp_equiv_preserve_frequency(self): class TestDateRanges(TestData): def test_date_range_out_of_bounds(self): - # GH#19740 + # GH#14187 with pytest.raises(OutOfBoundsDatetime): date_range('2016-01-01', periods=100000, freq='D') with pytest.raises(OutOfBoundsDatetime): From 4fe5dad54e33b132974afa0adcabb46fd3ed0dda Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 27 Oct 2018 23:13:12 -0400 Subject: [PATCH 6/6] fixup --- pandas/tests/indexes/datetimes/test_date_range.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 577a3c4c022d0..450d7643bfbd5 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -15,6 +15,7 @@ from pandas import ( DatetimeIndex, Timestamp, bdate_range, compat, date_range, offsets ) +from pandas.errors import OutOfBoundsDatetime from pandas.tests.series.common import TestData from pandas.tseries.offsets import ( BDay, CDay, DateOffset, MonthEnd, generate_range, prefix_mapping