Skip to content

Commit 388d22c

Browse files
jbrockmendeljreback
authored andcommitted
BUG: avoid overflow in Bday generate_range, closes #24252 (#26651)
1 parent 7f8dd72 commit 388d22c

File tree

7 files changed

+49
-6
lines changed

7 files changed

+49
-6
lines changed

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,7 @@ Datetimelike
600600
- Bug in :meth:`isin` for datetimelike indexes; :class:`DatetimeIndex`, :class:`TimedeltaIndex` and :class:`PeriodIndex` where the ``levels`` parameter was ignored. (:issue:`26675`)
601601
- Bug in :func:`to_datetime` which raises ``TypeError`` for ``format='%Y%m%d'`` when called for invalid integer dates with length >= 6 digits with ``errors='ignore'``
602602
- Bug when comparing a :class:`PeriodIndex` against a zero-dimensional numpy array (:issue:`26689`)
603+
- Bug in :func:`date_range` with unnecessary ``OverflowError`` being raised for very large or very small dates (:issue:`26651`)
603604

604605
Timedelta
605606
^^^^^^^^^

pandas/_libs/tslibs/conversion.pyx

+9
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,10 @@ cdef convert_to_tsobject(object ts, object tz, object unit,
275275
- iso8601 string object
276276
- python datetime object
277277
- another timestamp object
278+
279+
Raises
280+
------
281+
OutOfBoundsDatetime : ts cannot be converted within implementation bounds
278282
"""
279283
cdef:
280284
_TSObject obj
@@ -294,6 +298,11 @@ cdef convert_to_tsobject(object ts, object tz, object unit,
294298
if obj.value != NPY_NAT:
295299
dt64_to_dtstruct(obj.value, &obj.dts)
296300
elif is_integer_object(ts):
301+
try:
302+
ts = <int64_t>ts
303+
except OverflowError:
304+
# GH#26651 re-raise as OutOfBoundsDatetime
305+
raise OutOfBoundsDatetime(ts)
297306
if ts == NPY_NAT:
298307
obj.value = NPY_NAT
299308
else:

pandas/tests/arithmetic/test_timedelta64.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
import numpy as np
66
import pytest
77

8-
from pandas.errors import NullFrequencyError, PerformanceWarning
8+
from pandas.errors import (
9+
NullFrequencyError, OutOfBoundsDatetime, PerformanceWarning)
910

1011
import pandas as pd
1112
from pandas import (
@@ -479,10 +480,10 @@ def test_tdi_add_timestamp_nat_masking(self):
479480

480481
def test_tdi_add_overflow(self):
481482
# See GH#14068
482-
msg = "too (big|large) to convert"
483-
with pytest.raises(OverflowError, match=msg):
483+
# preliminary test scalar analogue of vectorized tests below
484+
with pytest.raises(OutOfBoundsDatetime):
484485
pd.to_timedelta(106580, 'D') + Timestamp('2000')
485-
with pytest.raises(OverflowError, match=msg):
486+
with pytest.raises(OutOfBoundsDatetime):
486487
Timestamp('2000') + pd.to_timedelta(106580, 'D')
487488

488489
_NaT = int(pd.NaT) + 1

pandas/tests/indexes/datetimes/test_date_range.py

+13
Original file line numberDiff line numberDiff line change
@@ -740,6 +740,19 @@ def test_bdays_and_open_boundaries(self, closed):
740740
expected = pd.date_range(bday_start, bday_end, freq='D')
741741
tm.assert_index_equal(result, expected)
742742

743+
def test_bday_near_overflow(self):
744+
# GH#24252 avoid doing unnecessary addition that _would_ overflow
745+
start = pd.Timestamp.max.floor("D").to_pydatetime()
746+
rng = pd.date_range(start, end=None, periods=1, freq='B')
747+
expected = pd.DatetimeIndex([start], freq='B')
748+
tm.assert_index_equal(rng, expected)
749+
750+
def test_bday_overflow_error(self):
751+
# GH#24252 check that we get OutOfBoundsDatetime and not OverflowError
752+
start = pd.Timestamp.max.floor("D").to_pydatetime()
753+
with pytest.raises(OutOfBoundsDatetime):
754+
pd.date_range(start, periods=2, freq='B')
755+
743756

744757
class TestCustomDateRange:
745758

pandas/tests/scalar/timestamp/test_timestamp.py

+7
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,13 @@ def test_invalid_date_kwarg_with_string_input(self, arg):
463463
with pytest.raises(ValueError):
464464
Timestamp('2010-10-10 12:59:59.999999999', **kwarg)
465465

466+
def test_out_of_bounds_integer_value(self):
467+
# GH#26651 check that we raise OutOfBoundsDatetime, not OverflowError
468+
with pytest.raises(OutOfBoundsDatetime):
469+
Timestamp(Timestamp.max.value * 2)
470+
with pytest.raises(OutOfBoundsDatetime):
471+
Timestamp(Timestamp.min.value * 2)
472+
466473
def test_out_of_bounds_value(self):
467474
one_us = np.timedelta64(1).astype('timedelta64[us]')
468475

pandas/tests/tseries/offsets/test_offsets.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def test_apply_out_of_range(self, tz_naive_fixture):
115115
assert t.tzinfo == result.tzinfo
116116

117117
except OutOfBoundsDatetime:
118-
raise
118+
pass
119119
except (ValueError, KeyError):
120120
# we are creating an invalid offset
121121
# so ignore

pandas/tseries/offsets.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@ def wrapper(self, other):
9797
if tz is not None and result.tzinfo is None:
9898
result = conversion.localize_pydatetime(result, tz)
9999

100+
result = Timestamp(result)
101+
100102
return result
101103
return wrapper
102104

@@ -2330,7 +2332,7 @@ def apply(self, other):
23302332
# an exception, when we call using the + operator,
23312333
# we directly call the known method
23322334
result = other.__add__(self)
2333-
if result == NotImplemented:
2335+
if result is NotImplemented:
23342336
raise OverflowError
23352337
return result
23362338
elif isinstance(other, (datetime, np.datetime64, date)):
@@ -2467,6 +2469,11 @@ def generate_range(start=None, end=None, periods=None, offset=BDay()):
24672469
while cur <= end:
24682470
yield cur
24692471

2472+
if cur == end:
2473+
# GH#24252 avoid overflows by not performing the addition
2474+
# in offset.apply unless we have to
2475+
break
2476+
24702477
# faster than cur + offset
24712478
next_date = offset.apply(cur)
24722479
if next_date <= cur:
@@ -2477,6 +2484,11 @@ def generate_range(start=None, end=None, periods=None, offset=BDay()):
24772484
while cur >= end:
24782485
yield cur
24792486

2487+
if cur == end:
2488+
# GH#24252 avoid overflows by not performing the addition
2489+
# in offset.apply unless we have to
2490+
break
2491+
24802492
# faster than cur + offset
24812493
next_date = offset.apply(cur)
24822494
if next_date >= cur:

0 commit comments

Comments
 (0)