Skip to content

Commit db4da3b

Browse files
committed
Fix overflow bugs in date_Range
1 parent d43ac97 commit db4da3b

File tree

2 files changed

+121
-2
lines changed

2 files changed

+121
-2
lines changed

pandas/core/arrays/datetimes.py

+96-2
Original file line numberDiff line numberDiff line change
@@ -1724,7 +1724,8 @@ def _generate_regular_range(cls, start, end, periods, freq):
17241724
return data
17251725

17261726

1727-
def _generate_range_overflow_safe(endpoint, periods, stride, side='start'):
1727+
def _generate_range_overflow_safe(endpoint, periods, stride,
1728+
side='start'):
17281729
"""
17291730
Calculate the second endpoint for passing to np.arange, checking
17301731
to avoid an integer overflow. Catch OverflowError and re-raise
@@ -1747,12 +1748,78 @@ def _generate_range_overflow_safe(endpoint, periods, stride, side='start'):
17471748
"""
17481749
# GH#14187 raise instead of incorrectly wrapping around
17491750
assert side in ['start', 'end']
1751+
1752+
i64max = np.iinfo(np.int64).max
1753+
msg = ('Cannot generate range with {side}={endpoint} and '
1754+
'periods={periods}'
1755+
.format(side=side, endpoint=endpoint, periods=periods))
1756+
1757+
with np.errstate(over="raise"):
1758+
# if periods * strides cannot be multiplied within the *uint64* bounds,
1759+
# we cannot salvage the operation by recursing, so raise
1760+
try:
1761+
addend = np.uint64(periods) * np.uint64(np.abs(stride))
1762+
except FloatingPointError:
1763+
raise tslib.OutOfBoundsDatetime(msg)
1764+
1765+
if np.abs(addend) <= i64max:
1766+
# relatively easy case without casting concerns
1767+
return _generate_range_overflow_safe_signed(
1768+
endpoint, periods, stride, side)
1769+
1770+
elif ((endpoint > 0 and side == 'start') or
1771+
(endpoint < 0 and side == 'end')):
1772+
# no chance of not-overflowing
1773+
raise tslib.OutOfBoundsDatetime(msg)
1774+
1775+
elif (side == 'end' and endpoint > i64max and endpoint - stride <= i64max):
1776+
# in _generate_regular_range we added `stride` thereby overflowing
1777+
# the bounds. Adjust to fix this.
1778+
return _generate_range_overflow_safe(endpoint - stride,
1779+
periods - 1, stride, side)
1780+
1781+
# split into smaller pieces
1782+
return _generate_range_recurse(endpoint, periods, stride, side)
1783+
1784+
1785+
def _generate_range_overflow_safe_signed(endpoint, periods, stride, side):
1786+
"""
1787+
A special case for _generate_range_overflow_safe where `periods * stride`
1788+
can be calculated without overflowing int64 bounds.
1789+
"""
1790+
assert side in ['start', 'end']
17501791
if side == 'end':
17511792
stride *= -1
17521793

1794+
with np.errstate(over="raise"):
1795+
addend = np.int64(periods) * np.int64(stride)
1796+
try:
1797+
# easy case with no overflows
1798+
return np.int64(endpoint) + addend
1799+
except (FloatingPointError, OverflowError):
1800+
# with endpoint negative and addend positive we risk
1801+
# FloatingPointError; with reversed signed we risk OverflowError
1802+
pass
1803+
1804+
if stride > 0:
1805+
# watch out for very special case in which we just slightly
1806+
# exceed implementation bounds, but when passing the result to
1807+
# np.arange will get a result slightly within the bounds
1808+
if endpoint >= 0:
1809+
result = np.uint64(endpoint) + np.uint64(addend)
1810+
i64max = np.uint64(np.iinfo(np.int64).max)
1811+
if result <= i64max + np.uint64(stride):
1812+
return result
1813+
else:
1814+
return _generate_range_recurse(endpoint, periods,
1815+
np.abs(stride), side)
1816+
elif stride < 0 and endpoint > 0:
1817+
return _generate_range_recurse(np.uint64(endpoint), periods,
1818+
np.abs(stride), side)
1819+
17531820
try:
17541821
other_end = checked_add_with_arr(np.int64(endpoint),
1755-
np.int64(periods) * stride)
1822+
addend)
17561823
except OverflowError:
17571824
raise tslib.OutOfBoundsDatetime('Cannot generate range with '
17581825
'{side}={endpoint} and '
@@ -1762,6 +1829,33 @@ def _generate_range_overflow_safe(endpoint, periods, stride, side='start'):
17621829
return other_end
17631830

17641831

1832+
def _generate_range_recurse(endpoint, periods, stride, side):
1833+
"""
1834+
Avoid problems in int64/uint64 mismatch by splitting range generation into
1835+
smaller pieces.
1836+
1837+
Parameters
1838+
----------
1839+
endpoint : int
1840+
periods : int
1841+
stride : int
1842+
side : {'start', 'end'}
1843+
1844+
Returns
1845+
-------
1846+
other_end : int
1847+
"""
1848+
# split into smaller pieces
1849+
mid_periods = periods // 2
1850+
remaining = periods - mid_periods
1851+
assert 0 < remaining < periods, (remaining, periods, endpoint, stride)
1852+
print(periods, mid_periods, endpoint, stride, side)
1853+
1854+
midpoint = _generate_range_overflow_safe(endpoint, mid_periods,
1855+
stride, side)
1856+
return _generate_range_overflow_safe(midpoint, remaining, stride, side)
1857+
1858+
17651859
# -------------------------------------------------------------------
17661860
# Validation and Inference
17671861

pandas/tests/indexes/datetimes/test_date_range.py

+25
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,31 @@ def test_date_range_timestamp_equiv_preserve_frequency(self):
8080

8181

8282
class TestDateRanges(TestData):
83+
def test_date_range_multiplication_overflow(self):
84+
# check that overflows in calculating `addend = periods * stride`
85+
# are caught
86+
with tm.assert_produces_warning(None):
87+
# we should _not_ be seeing a overflow RuntimeWarning
88+
dti = date_range(start='1677-09-22', periods=213503, freq='D')
89+
90+
assert dti[0] == Timestamp('1677-09-22')
91+
assert len(dti) == 213503
92+
93+
msg = "Cannot generate range with"
94+
with pytest.raises(OutOfBoundsDatetime, match=msg):
95+
date_range('1969-05-04', periods=200000000, freq='30000D')
96+
97+
def test_date_range_unsigned_overflow_handling(self):
98+
# case where `addend = periods * stride` overflows int64 bounds
99+
# but not uint64 bounds
100+
dti = date_range(start='1677-09-22', end='2262-04-11', freq='D')
101+
102+
dti2 = date_range(start=dti[0], periods=len(dti), freq='D')
103+
assert dti2.equals(dti)
104+
105+
dti3 = date_range(end=dti[-1], periods=len(dti), freq='D')
106+
assert dti3.equals(dti)
107+
83108
def test_date_range_out_of_bounds(self):
84109
# GH#14187
85110
with pytest.raises(OutOfBoundsDatetime):

0 commit comments

Comments
 (0)