Skip to content

Commit 636ccf8

Browse files
jbrockmendelNo-Stream
authored andcommitted
Masking and overflow checks for datetimeindex and timedeltaindex ops (pandas-dev#18020)
closes pandas-dev#17991
1 parent 6d9064a commit 636ccf8

File tree

6 files changed

+63
-4
lines changed

6 files changed

+63
-4
lines changed

doc/source/whatsnew/v0.21.1.txt

+2
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ Documentation Changes
5757
Bug Fixes
5858
~~~~~~~~~
5959
- Bug in ``DataFrame.resample(...).apply(...)`` when there is a callable that returns different columns (:issue:`15169`)
60+
- Bug in :class:`TimedeltaIndex` subtraction could incorrectly overflow when ``NaT`` is present (:issue:`17791`)
61+
- Bug in :class:`DatetimeIndex` subtracting datetimelike from DatetimeIndex could fail to overflow (:issue:`18020`)
6062

6163
Conversion
6264
^^^^^^^^^^

pandas/core/indexes/datetimelike.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -679,7 +679,7 @@ def __sub__(self, other):
679679
return self._add_delta(-other)
680680
elif is_integer(other):
681681
return self.shift(-other)
682-
elif isinstance(other, datetime):
682+
elif isinstance(other, (datetime, np.datetime64)):
683683
return self._sub_datelike(other)
684684
elif isinstance(other, Period):
685685
return self._sub_period(other)

pandas/core/indexes/datetimes.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import pandas.core.dtypes.concat as _concat
3030
from pandas.errors import PerformanceWarning
3131
from pandas.core.common import _values_from_object, _maybe_box
32+
from pandas.core.algorithms import checked_add_with_arr
3233

3334
from pandas.core.indexes.base import Index, _index_shared_docs
3435
from pandas.core.indexes.numeric import Int64Index, Float64Index
@@ -767,7 +768,7 @@ def _sub_datelike(self, other):
767768
raise TypeError("DatetimeIndex subtraction must have the same "
768769
"timezones or no timezones")
769770
result = self._sub_datelike_dti(other)
770-
elif isinstance(other, datetime):
771+
elif isinstance(other, (datetime, np.datetime64)):
771772
other = Timestamp(other)
772773
if other is libts.NaT:
773774
result = self._nat_new(box=False)
@@ -777,7 +778,8 @@ def _sub_datelike(self, other):
777778
"timezones or no timezones")
778779
else:
779780
i8 = self.asi8
780-
result = i8 - other.value
781+
result = checked_add_with_arr(i8, -other.value,
782+
arr_mask=self._isnan)
781783
result = self._maybe_mask_results(result,
782784
fill_value=libts.iNaT)
783785
else:

pandas/core/indexes/timedeltas.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,8 @@ def _add_datelike(self, other):
361361
else:
362362
other = Timestamp(other)
363363
i8 = self.asi8
364-
result = checked_add_with_arr(i8, other.value)
364+
result = checked_add_with_arr(i8, other.value,
365+
arr_mask=self._isnan)
365366
result = self._maybe_mask_results(result, fill_value=iNaT)
366367
return DatetimeIndex(result, name=self.name, copy=False)
367368

pandas/tests/indexes/datetimes/test_arithmetic.py

+34
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,40 @@ def test_ufunc_coercions(self):
199199
tm.assert_index_equal(result, exp)
200200
assert result.freq == 'D'
201201

202+
def test_datetimeindex_sub_timestamp_overflow(self):
203+
dtimax = pd.to_datetime(['now', pd.Timestamp.max])
204+
dtimin = pd.to_datetime(['now', pd.Timestamp.min])
205+
206+
tsneg = Timestamp('1950-01-01')
207+
ts_neg_variants = [tsneg,
208+
tsneg.to_pydatetime(),
209+
tsneg.to_datetime64().astype('datetime64[ns]'),
210+
tsneg.to_datetime64().astype('datetime64[D]')]
211+
212+
tspos = Timestamp('1980-01-01')
213+
ts_pos_variants = [tspos,
214+
tspos.to_pydatetime(),
215+
tspos.to_datetime64().astype('datetime64[ns]'),
216+
tspos.to_datetime64().astype('datetime64[D]')]
217+
218+
for variant in ts_neg_variants:
219+
with pytest.raises(OverflowError):
220+
dtimax - variant
221+
222+
expected = pd.Timestamp.max.value - tspos.value
223+
for variant in ts_pos_variants:
224+
res = dtimax - variant
225+
assert res[1].value == expected
226+
227+
expected = pd.Timestamp.min.value - tsneg.value
228+
for variant in ts_neg_variants:
229+
res = dtimin - variant
230+
assert res[1].value == expected
231+
232+
for variant in ts_pos_variants:
233+
with pytest.raises(OverflowError):
234+
dtimin - variant
235+
202236

203237
# GH 10699
204238
@pytest.mark.parametrize('klass,assert_func', zip([Series, DatetimeIndex],

pandas/tests/indexes/timedeltas/test_arithmetic.py

+20
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,26 @@ def test_add_overflow(self):
576576
to_timedelta(['7 seconds', pd.NaT, '4 hours']))
577577
tm.assert_index_equal(result, exp)
578578

579+
def test_timedeltaindex_add_timestamp_nat_masking(self):
580+
# GH17991 checking for overflow-masking with NaT
581+
tdinat = pd.to_timedelta(['24658 days 11:15:00', 'NaT'])
582+
583+
tsneg = Timestamp('1950-01-01')
584+
ts_neg_variants = [tsneg,
585+
tsneg.to_pydatetime(),
586+
tsneg.to_datetime64().astype('datetime64[ns]'),
587+
tsneg.to_datetime64().astype('datetime64[D]')]
588+
589+
tspos = Timestamp('1980-01-01')
590+
ts_pos_variants = [tspos,
591+
tspos.to_pydatetime(),
592+
tspos.to_datetime64().astype('datetime64[ns]'),
593+
tspos.to_datetime64().astype('datetime64[D]')]
594+
595+
for variant in ts_neg_variants + ts_pos_variants:
596+
res = tdinat + variant
597+
assert res[1] is pd.NaT
598+
579599
def test_tdi_ops_attributes(self):
580600
rng = timedelta_range('2 days', periods=5, freq='2D', name='x')
581601

0 commit comments

Comments
 (0)