@@ -1724,7 +1724,8 @@ def _generate_regular_range(cls, start, end, periods, freq):
1724
1724
return data
1725
1725
1726
1726
1727
- def _generate_range_overflow_safe (endpoint , periods , stride , side = 'start' ):
1727
+ def _generate_range_overflow_safe (endpoint , periods , stride ,
1728
+ side = 'start' ):
1728
1729
"""
1729
1730
Calculate the second endpoint for passing to np.arange, checking
1730
1731
to avoid an integer overflow. Catch OverflowError and re-raise
@@ -1747,12 +1748,78 @@ def _generate_range_overflow_safe(endpoint, periods, stride, side='start'):
1747
1748
"""
1748
1749
# GH#14187 raise instead of incorrectly wrapping around
1749
1750
assert side in ['start' , 'end' ]
1751
+
1752
+ i64max = np .iinfo (np .int64 ).max
1753
+ msg = ('Cannot generate range with {side}={endpoint} and '
1754
+ 'periods={periods}'
1755
+ .format (side = side , endpoint = endpoint , periods = periods ))
1756
+
1757
+ with np .errstate (over = "raise" ):
1758
+ # if periods * strides cannot be multiplied within the *uint64* bounds,
1759
+ # we cannot salvage the operation by recursing, so raise
1760
+ try :
1761
+ addend = np .uint64 (periods ) * np .uint64 (np .abs (stride ))
1762
+ except FloatingPointError :
1763
+ raise tslib .OutOfBoundsDatetime (msg )
1764
+
1765
+ if np .abs (addend ) <= i64max :
1766
+ # relatively easy case without casting concerns
1767
+ return _generate_range_overflow_safe_signed (
1768
+ endpoint , periods , stride , side )
1769
+
1770
+ elif ((endpoint > 0 and side == 'start' ) or
1771
+ (endpoint < 0 and side == 'end' )):
1772
+ # no chance of not-overflowing
1773
+ raise tslib .OutOfBoundsDatetime (msg )
1774
+
1775
+ elif (side == 'end' and endpoint > i64max and endpoint - stride <= i64max ):
1776
+ # in _generate_regular_range we added `stride` thereby overflowing
1777
+ # the bounds. Adjust to fix this.
1778
+ return _generate_range_overflow_safe (endpoint - stride ,
1779
+ periods - 1 , stride , side )
1780
+
1781
+ # split into smaller pieces
1782
+ return _generate_range_recurse (endpoint , periods , stride , side )
1783
+
1784
+
1785
+ def _generate_range_overflow_safe_signed (endpoint , periods , stride , side ):
1786
+ """
1787
+ A special case for _generate_range_overflow_safe where `periods * stride`
1788
+ can be calculated without overflowing int64 bounds.
1789
+ """
1790
+ assert side in ['start' , 'end' ]
1750
1791
if side == 'end' :
1751
1792
stride *= - 1
1752
1793
1794
+ with np .errstate (over = "raise" ):
1795
+ addend = np .int64 (periods ) * np .int64 (stride )
1796
+ try :
1797
+ # easy case with no overflows
1798
+ return np .int64 (endpoint ) + addend
1799
+ except (FloatingPointError , OverflowError ):
1800
+ # with endpoint negative and addend positive we risk
1801
+ # FloatingPointError; with reversed signed we risk OverflowError
1802
+ pass
1803
+
1804
+ if stride > 0 :
1805
+ # watch out for very special case in which we just slightly
1806
+ # exceed implementation bounds, but when passing the result to
1807
+ # np.arange will get a result slightly within the bounds
1808
+ if endpoint >= 0 :
1809
+ result = np .uint64 (endpoint ) + np .uint64 (addend )
1810
+ i64max = np .uint64 (np .iinfo (np .int64 ).max )
1811
+ if result <= i64max + np .uint64 (stride ):
1812
+ return result
1813
+ else :
1814
+ return _generate_range_recurse (endpoint , periods ,
1815
+ np .abs (stride ), side )
1816
+ elif stride < 0 and endpoint > 0 :
1817
+ return _generate_range_recurse (np .uint64 (endpoint ), periods ,
1818
+ np .abs (stride ), side )
1819
+
1753
1820
try :
1754
1821
other_end = checked_add_with_arr (np .int64 (endpoint ),
1755
- np . int64 ( periods ) * stride )
1822
+ addend )
1756
1823
except OverflowError :
1757
1824
raise tslib .OutOfBoundsDatetime ('Cannot generate range with '
1758
1825
'{side}={endpoint} and '
@@ -1762,6 +1829,33 @@ def _generate_range_overflow_safe(endpoint, periods, stride, side='start'):
1762
1829
return other_end
1763
1830
1764
1831
1832
+ def _generate_range_recurse (endpoint , periods , stride , side ):
1833
+ """
1834
+ Avoid problems in int64/uint64 mismatch by splitting range generation into
1835
+ smaller pieces.
1836
+
1837
+ Parameters
1838
+ ----------
1839
+ endpoint : int
1840
+ periods : int
1841
+ stride : int
1842
+ side : {'start', 'end'}
1843
+
1844
+ Returns
1845
+ -------
1846
+ other_end : int
1847
+ """
1848
+ # split into smaller pieces
1849
+ mid_periods = periods // 2
1850
+ remaining = periods - mid_periods
1851
+ assert 0 < remaining < periods , (remaining , periods , endpoint , stride )
1852
+ print (periods , mid_periods , endpoint , stride , side )
1853
+
1854
+ midpoint = _generate_range_overflow_safe (endpoint , mid_periods ,
1855
+ stride , side )
1856
+ return _generate_range_overflow_safe (midpoint , remaining , stride , side )
1857
+
1858
+
1765
1859
# -------------------------------------------------------------------
1766
1860
# Validation and Inference
1767
1861
0 commit comments