Skip to content

Commit ce48455

Browse files
committed
BUG: Prevent addition overflow with TimedeltaIndex
Expands checked-add array addition introduced in pandas-devgh-14237 to include all other addition cases (i.e. TimedeltaIndex and Timedelta). Follow-up to pandas-devgh-14453.
1 parent b6de920 commit ce48455

File tree

5 files changed

+87
-8
lines changed

5 files changed

+87
-8
lines changed

asv_bench/benchmarks/algorithms.py

+13
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ def setup(self):
2424
self.arrneg = np.arange(-1000000, 0)
2525
self.arrmixed = np.array([1, -1]).repeat(500000)
2626

27+
self.arr_nan = np.random.choice([True, False], size=1000000)
28+
self.arrmixed_nan = np.random.choice([True, False], size=1000000)
29+
2730
# match
2831
self.uniques = tm.makeStringIndex(1000).values
2932
self.all = self.uniques.repeat(10)
@@ -64,6 +67,16 @@ def time_add_overflow_neg_arr(self):
6467
def time_add_overflow_mixed_arr(self):
6568
self.checked_add(self.arr, self.arrmixed)
6669

70+
def time_add_overflow_first_arg_nan(self):
71+
self.checked_add(self.arr, self.arrmixed, arr_mask=self.arr_nan)
72+
73+
def time_add_overflow_second_arg_nan(self):
74+
self.checked_add(self.arr, self.arrmixed, b_mask=self.arrmixed_arr_nan)
75+
76+
def time_add_overflow_both_arg_nan(self):
77+
self.checked_add(self.arr, self.arrmixed, arr_mask=self.arr_nan,
78+
b_mask=self.arrmixed_arr_nan)
79+
6780

6881
class Hashing(object):
6982
goal_time = 0.2

pandas/core/nanops.py

+27-6
Original file line numberDiff line numberDiff line change
@@ -812,15 +812,23 @@ def unique1d(values):
812812
return uniques
813813

814814

815-
def _checked_add_with_arr(arr, b):
815+
def _checked_add_with_arr(arr, b, arr_mask=None, b_mask=None):
816816
"""
817+
Perform array addition that checks for underflow and overflow.
818+
817819
Performs the addition of an int64 array and an int64 integer (or array)
818-
but checks that they do not result in overflow first.
820+
but checks that they do not result in overflow first. For elements that
821+
are indicated to be NaN, whether or not there is overflow for that element
822+
is automatically ignored.
819823
820824
Parameters
821825
----------
822826
arr : array addend.
823827
b : array or scalar addend.
828+
arr_mask : boolean array or None
829+
array indicating which elements to exclude from checking
830+
b_mask : boolean array or boolean or None
831+
array or scalar indicating which element(s) to exclude from checking
824832
825833
Returns
826834
-------
@@ -843,6 +851,17 @@ def _checked_add_with_arr(arr, b):
843851
else:
844852
b2 = np.broadcast_to(b, arr.shape)
845853

854+
# For elements that are NaN, regardless of their value, we should
855+
# ignore whether they overflow or not when doing the checked add.
856+
if arr_mask is not None and b_mask is not None:
857+
not_nan = np.logical_not(arr_mask | b_mask)
858+
elif arr_mask is not None:
859+
not_nan = np.logical_not(arr_mask)
860+
elif b_mask is not None:
861+
not_nan = np.logical_not(b_mask)
862+
else:
863+
not_nan = np.array([True])
864+
846865
# gh-14324: For each element in 'arr' and its corresponding element
847866
# in 'b2', we check the sign of the element in 'b2'. If it is positive,
848867
# we then check whether its sum with the element in 'arr' exceeds
@@ -854,12 +873,14 @@ def _checked_add_with_arr(arr, b):
854873
mask2 = b2 < 0
855874

856875
if not mask1.any():
857-
to_raise = (np.iinfo(np.int64).min - b2 > arr).any()
876+
to_raise = ((np.iinfo(np.int64).min - b2 > arr) & not_nan).any()
858877
elif not mask2.any():
859-
to_raise = (np.iinfo(np.int64).max - b2 < arr).any()
878+
to_raise = ((np.iinfo(np.int64).max - b2 < arr) & not_nan).any()
860879
else:
861-
to_raise = ((np.iinfo(np.int64).max - b2[mask1] < arr[mask1]).any() or
862-
(np.iinfo(np.int64).min - b2[mask2] > arr[mask2]).any())
880+
to_raise = (((np.iinfo(np.int64).max -
881+
b2[mask1] < arr[mask1]) & not_nan[mask1]).any() or
882+
((np.iinfo(np.int64).min -
883+
b2[mask2] > arr[mask2]) & not_nan[mask2]).any())
863884

864885
if to_raise:
865886
raise OverflowError("Overflow in int64 addition")

pandas/tests/test_nanops.py

+27
Original file line numberDiff line numberDiff line change
@@ -1018,11 +1018,38 @@ def test_int64_add_overflow():
10181018
nanops._checked_add_with_arr(np.array([n, n]), np.array([n, n]))
10191019
with tm.assertRaisesRegexp(OverflowError, msg):
10201020
nanops._checked_add_with_arr(np.array([m, n]), np.array([n, n]))
1021+
with tm.assertRaisesRegexp(OverflowError, msg):
1022+
nanops._checked_add_with_arr(np.array([m, m]), np.array([m, m]),
1023+
arr_mask=np.array([False, True]))
1024+
with tm.assertRaisesRegexp(OverflowError, msg):
1025+
nanops._checked_add_with_arr(np.array([m, m]), np.array([m, m]),
1026+
b_mask=np.array([False, True]))
1027+
with tm.assertRaisesRegexp(OverflowError, msg):
1028+
nanops._checked_add_with_arr(np.array([m, m]), np.array([m, m]),
1029+
arr_mask=np.array([False, True]),
1030+
b_mask=np.array([False, True]))
10211031
with tm.assertRaisesRegexp(OverflowError, msg):
10221032
with tm.assert_produces_warning(RuntimeWarning):
10231033
nanops._checked_add_with_arr(np.array([m, m]),
10241034
np.array([np.nan, m]))
10251035

1036+
# Check that the nan boolean arrays override whether or not
1037+
# the addition overflows. We don't check the result but just
1038+
# the fact that an OverflowError is not raised.
1039+
with tm.assertRaises(AssertionError):
1040+
with tm.assertRaisesRegexp(OverflowError, msg):
1041+
nanops._checked_add_with_arr(np.array([m, m]), np.array([m, m]),
1042+
arr_mask=np.array([True, True]))
1043+
with tm.assertRaises(AssertionError):
1044+
with tm.assertRaisesRegexp(OverflowError, msg):
1045+
nanops._checked_add_with_arr(np.array([m, m]), np.array([m, m]),
1046+
b_mask=np.array([True, True]))
1047+
with tm.assertRaises(AssertionError):
1048+
with tm.assertRaisesRegexp(OverflowError, msg):
1049+
nanops._checked_add_with_arr(np.array([m, m]), np.array([m, m]),
1050+
arr_mask=np.array([True, False]),
1051+
b_mask=np.array([False, True]))
1052+
10261053

10271054
if __name__ == '__main__':
10281055
import nose

pandas/tseries/base.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from pandas.types.missing import isnull
1818
from pandas.core import common as com, algorithms
1919
from pandas.core.common import AbstractMethodError
20+
from pandas.core.nanops import _checked_add_with_arr
2021

2122
import pandas.formats.printing as printing
2223
import pandas.tslib as tslib
@@ -684,7 +685,8 @@ def _add_delta_td(self, other):
684685
# return the i8 result view
685686

686687
inc = tslib._delta_to_nanoseconds(other)
687-
new_values = (self.asi8 + inc).view('i8')
688+
new_values = _checked_add_with_arr(self.asi8, inc,
689+
arr_mask=self._isnan).view('i8')
688690
if self.hasnans:
689691
new_values[self._isnan] = tslib.iNaT
690692
return new_values.view('i8')
@@ -699,7 +701,9 @@ def _add_delta_tdi(self, other):
699701

700702
self_i8 = self.asi8
701703
other_i8 = other.asi8
702-
new_values = self_i8 + other_i8
704+
new_values = _checked_add_with_arr(self_i8, other_i8,
705+
arr_mask=self._isnan,
706+
b_mask=other._isnan)
703707
if self.hasnans or other.hasnans:
704708
mask = (self._isnan) | (other._isnan)
705709
new_values[mask] = tslib.iNaT

pandas/tseries/tests/test_timedeltas.py

+14
Original file line numberDiff line numberDiff line change
@@ -1964,6 +1964,20 @@ def test_add_overflow(self):
19641964
with tm.assertRaisesRegexp(OverflowError, msg):
19651965
Timestamp('2000') + to_timedelta([106580], 'D')
19661966

1967+
# These should not overflow!
1968+
exp = TimedeltaIndex([pd.NaT])
1969+
result = to_timedelta([pd.NaT]) - Timedelta('1 days')
1970+
tm.assert_index_equal(result, exp)
1971+
1972+
exp = TimedeltaIndex(['4 days', pd.NaT])
1973+
result = to_timedelta(['5 days', pd.NaT]) - Timedelta('1 days')
1974+
tm.assert_index_equal(result, exp)
1975+
1976+
exp = TimedeltaIndex([pd.NaT, pd.NaT, '5 hours'])
1977+
result = (to_timedelta([pd.NaT, '5 days', '1 hours']) +
1978+
to_timedelta(['7 seconds', pd.NaT, '4 hours']))
1979+
tm.assert_index_equal(result, exp)
1980+
19671981
if __name__ == '__main__':
19681982
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
19691983
exit=False)

0 commit comments

Comments
 (0)