Skip to content

Commit 85ca49d

Browse files
committed
BUG: Catch overflow in both directions for checked add
1) Add checks to ensure that add overflow does not occur both in the positive or negative directions. 2) Add benchmarks to ensure that operations involving this checked add function are significantly impacted.
1 parent 7cad3f1 commit 85ca49d

File tree

5 files changed

+81
-6
lines changed

5 files changed

+81
-6
lines changed

asv_bench/benchmarks/algorithms.py

+26
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,14 @@ def setup(self):
1515
self.int = pd.Int64Index(np.arange(N).repeat(5))
1616
self.float = pd.Float64Index(np.random.randn(N).repeat(5))
1717

18+
# Convenience naming.
19+
self.checked_add = pd.core.nanops._checked_add_with_arr
20+
21+
self.arr = np.arange(1000000)
22+
self.arrpos = np.arange(1000000)
23+
self.arrneg = np.arange(-1000000, 0)
24+
self.arrmixed = np.array([1, -1]).repeat(500000)
25+
1826
def time_int_factorize(self):
1927
self.int.factorize()
2028

@@ -29,3 +37,21 @@ def time_int_duplicated(self):
2937

3038
def time_float_duplicated(self):
3139
self.float.duplicated()
40+
41+
def time_add_overflow_pos_scalar(self):
42+
self.checked_add(self.arr, 1)
43+
44+
def time_add_overflow_neg_scalar(self):
45+
self.checked_add(self.arr, -1)
46+
47+
def time_add_overflow_zero_scalar(self):
48+
self.checked_add(self.arr, 0)
49+
50+
def time_add_overflow_pos_arr(self):
51+
self.checked_add(self.arr, self.arrpos)
52+
53+
def time_add_overflow_neg_arr(self):
54+
self.checked_add(self.arr, self.arrneg)
55+
56+
def time_add_overflow_mixed_arr(self):
57+
self.checked_add(self.arr, self.arrmixed)

asv_bench/benchmarks/timedelta.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from .pandas_vb_common import *
2-
from pandas import to_timedelta
2+
from pandas import to_timedelta, Timestamp
33

44

55
class timedelta_convert_int(object):
@@ -47,3 +47,14 @@ def time_timedelta_convert_coerce(self):
4747

4848
def time_timedelta_convert_ignore(self):
4949
to_timedelta(self.arr, errors='ignore')
50+
51+
52+
class timedelta_add_overflow(object):
53+
goal_time = 0.2
54+
55+
def setup(self):
56+
self.td = to_timedelta(np.arange(1000000))
57+
self.ts = Timestamp('2000')
58+
59+
def test_add_td_ts(self):
60+
self.td + self.ts

pandas/core/nanops.py

+33-4
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import pandas.hashtable as _hash
1313
from pandas import compat, lib, algos, tslib
14+
from pandas.compat.numpy import _np_version_under1p10
1415
from pandas.types.common import (_ensure_int64, _ensure_object,
1516
_ensure_float64, _get_dtype,
1617
is_float, is_scalar,
@@ -829,9 +830,37 @@ def _checked_add_with_arr(arr, b):
829830
830831
Raises
831832
------
832-
OverflowError if any x + y exceeds the maximum int64 value.
833+
OverflowError if any x + y exceeds the maximum or minimum int64 value.
833834
"""
834-
if (np.iinfo(np.int64).max - b < arr).any():
835-
raise OverflowError("Python int too large to "
836-
"convert to C long")
835+
# For performance reasons, we broadcast 'b' to the new array 'b2'
836+
# so that it has the same size as 'arr'.
837+
if _np_version_under1p10:
838+
if lib.isscalar(b):
839+
b2 = np.empty(arr.shape)
840+
b2.fill(b)
841+
else:
842+
b2 = b
843+
else:
844+
b2 = np.broadcast_to(b, arr.shape)
845+
846+
# gh-14324: For each element in 'arr' and its corresponding element
847+
# in 'b2', we check the sign of the element in 'b2'. If it is positive,
848+
# we then check whether its sum with the element in 'arr' exceeds
849+
# np.iinfo(np.int64).max. If so, we have an overflow error. If it
850+
# it is negative, we then check whether its sum with the element in
851+
# 'arr' exceeds np.iinfo(np.int64).min. If so, we have an overflow
852+
# error as well.
853+
mask1 = b2 > 0
854+
mask2 = b2 < 0
855+
856+
if not mask1.any():
857+
to_raise = (np.iinfo(np.int64).min - b2 > arr).any()
858+
elif not mask2.any():
859+
to_raise = (np.iinfo(np.int64).max - b2 < arr).any()
860+
else:
861+
to_raise = ((np.iinfo(np.int64).max - b2[mask1] < arr[mask1]).any() or
862+
(np.iinfo(np.int64).min - b2[mask2] > arr[mask2]).any())
863+
864+
if to_raise:
865+
raise OverflowError("Overflow in int64 addition")
837866
return arr + b

pandas/tests/test_nanops.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -1004,13 +1004,20 @@ def prng(self):
10041004

10051005
def test_int64_add_overflow():
10061006
# see gh-14068
1007-
msg = "too (big|large) to convert"
1007+
msg = "Overflow in int64 addition"
10081008
m = np.iinfo(np.int64).max
1009+
n = np.iinfo(np.int64).min
10091010

10101011
with tm.assertRaisesRegexp(OverflowError, msg):
10111012
nanops._checked_add_with_arr(np.array([m, m]), m)
10121013
with tm.assertRaisesRegexp(OverflowError, msg):
10131014
nanops._checked_add_with_arr(np.array([m, m]), np.array([m, m]))
1015+
with tm.assertRaisesRegexp(OverflowError, msg):
1016+
nanops._checked_add_with_arr(np.array([n, n]), n)
1017+
with tm.assertRaisesRegexp(OverflowError, msg):
1018+
nanops._checked_add_with_arr(np.array([n, n]), np.array([n, n]))
1019+
with tm.assertRaisesRegexp(OverflowError, msg):
1020+
nanops._checked_add_with_arr(np.array([m, n]), np.array([n, n]))
10141021
with tm.assertRaisesRegexp(OverflowError, msg):
10151022
with tm.assert_produces_warning(RuntimeWarning):
10161023
nanops._checked_add_with_arr(np.array([m, m]),

pandas/tseries/tests/test_timedeltas.py

+2
Original file line numberDiff line numberDiff line change
@@ -1957,6 +1957,8 @@ def test_add_overflow(self):
19571957
to_timedelta(106580, 'D') + Timestamp('2000')
19581958
with tm.assertRaisesRegexp(OverflowError, msg):
19591959
Timestamp('2000') + to_timedelta(106580, 'D')
1960+
1961+
msg = "Overflow in int64 addition"
19601962
with tm.assertRaisesRegexp(OverflowError, msg):
19611963
to_timedelta([106580], 'D') + Timestamp('2000')
19621964
with tm.assertRaisesRegexp(OverflowError, msg):

0 commit comments

Comments
 (0)