Skip to content

Commit 8adc68f

Browse files
jbrockmendelTomAugspurger
authored andcommitted
Masking and overflow checks for datetimeindex and timedeltaindex ops (pandas-dev#18020)
closes pandas-dev#17991 (cherry picked from commit 8388a47)
1 parent 8bede1a commit 8adc68f

File tree

6 files changed

+64
-4
lines changed

6 files changed

+64
-4
lines changed

doc/source/whatsnew/v0.21.1.txt

+3
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,9 @@ Documentation Changes
8585

8686
Bug Fixes
8787
~~~~~~~~~
88+
- Bug in ``DataFrame.resample(...).apply(...)`` when there is a callable that returns different columns (:issue:`15169`)
89+
- Bug in :class:`TimedeltaIndex` subtraction could incorrectly overflow when ``NaT`` is present (:issue:`17791`)
90+
- Bug in :class:`DatetimeIndex` subtracting datetimelike from DatetimeIndex could fail to overflow (:issue:`18020`)
8891

8992
Conversion
9093
^^^^^^^^^^

pandas/core/indexes/datetimelike.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -681,7 +681,7 @@ def __sub__(self, other):
681681
return self._add_delta(-other)
682682
elif is_integer(other):
683683
return self.shift(-other)
684-
elif isinstance(other, datetime):
684+
elif isinstance(other, (datetime, np.datetime64)):
685685
return self._sub_datelike(other)
686686
elif isinstance(other, Period):
687687
return self._sub_period(other)

pandas/core/indexes/datetimes.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import pandas.core.dtypes.concat as _concat
3232
from pandas.errors import PerformanceWarning
3333
from pandas.core.common import _values_from_object, _maybe_box
34+
from pandas.core.algorithms import checked_add_with_arr
3435

3536
from pandas.core.indexes.base import Index, _index_shared_docs
3637
from pandas.core.indexes.numeric import Int64Index, Float64Index
@@ -762,7 +763,7 @@ def _sub_datelike(self, other):
762763
raise TypeError("DatetimeIndex subtraction must have the same "
763764
"timezones or no timezones")
764765
result = self._sub_datelike_dti(other)
765-
elif isinstance(other, datetime):
766+
elif isinstance(other, (datetime, np.datetime64)):
766767
other = Timestamp(other)
767768
if other is libts.NaT:
768769
result = self._nat_new(box=False)
@@ -772,7 +773,8 @@ def _sub_datelike(self, other):
772773
"timezones or no timezones")
773774
else:
774775
i8 = self.asi8
775-
result = i8 - other.value
776+
result = checked_add_with_arr(i8, -other.value,
777+
arr_mask=self._isnan)
776778
result = self._maybe_mask_results(result,
777779
fill_value=libts.iNaT)
778780
else:

pandas/core/indexes/timedeltas.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,8 @@ def _add_datelike(self, other):
381381
else:
382382
other = Timestamp(other)
383383
i8 = self.asi8
384-
result = checked_add_with_arr(i8, other.value)
384+
result = checked_add_with_arr(i8, other.value,
385+
arr_mask=self._isnan)
385386
result = self._maybe_mask_results(result, fill_value=iNaT)
386387
return DatetimeIndex(result, name=self.name, copy=False)
387388

pandas/tests/indexes/datetimes/test_datetime.py

+34
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,40 @@ def test_ufunc_coercions(self):
211211
tm.assert_index_equal(result, exp)
212212
assert result.freq == 'D'
213213

214+
def test_datetimeindex_sub_timestamp_overflow(self):
215+
dtimax = pd.to_datetime(['now', pd.Timestamp.max])
216+
dtimin = pd.to_datetime(['now', pd.Timestamp.min])
217+
218+
tsneg = Timestamp('1950-01-01')
219+
ts_neg_variants = [tsneg,
220+
tsneg.to_pydatetime(),
221+
tsneg.to_datetime64().astype('datetime64[ns]'),
222+
tsneg.to_datetime64().astype('datetime64[D]')]
223+
224+
tspos = Timestamp('1980-01-01')
225+
ts_pos_variants = [tspos,
226+
tspos.to_pydatetime(),
227+
tspos.to_datetime64().astype('datetime64[ns]'),
228+
tspos.to_datetime64().astype('datetime64[D]')]
229+
230+
for variant in ts_neg_variants:
231+
with pytest.raises(OverflowError):
232+
dtimax - variant
233+
234+
expected = pd.Timestamp.max.value - tspos.value
235+
for variant in ts_pos_variants:
236+
res = dtimax - variant
237+
assert res[1].value == expected
238+
239+
expected = pd.Timestamp.min.value - tsneg.value
240+
for variant in ts_neg_variants:
241+
res = dtimin - variant
242+
assert res[1].value == expected
243+
244+
for variant in ts_pos_variants:
245+
with pytest.raises(OverflowError):
246+
dtimin - variant
247+
214248
def test_week_of_month_frequency(self):
215249
# GH 5348: "ValueError: Could not evaluate WOM-1SUN" shouldn't raise
216250
d1 = date(2002, 9, 1)

pandas/tests/indexes/timedeltas/test_ops.py

+20
Original file line numberDiff line numberDiff line change
@@ -1282,3 +1282,23 @@ def test_add_overflow(self):
12821282
result = (to_timedelta([pd.NaT, '5 days', '1 hours']) +
12831283
to_timedelta(['7 seconds', pd.NaT, '4 hours']))
12841284
tm.assert_index_equal(result, exp)
1285+
1286+
def test_timedeltaindex_add_timestamp_nat_masking(self):
1287+
# GH17991 checking for overflow-masking with NaT
1288+
tdinat = pd.to_timedelta(['24658 days 11:15:00', 'NaT'])
1289+
1290+
tsneg = Timestamp('1950-01-01')
1291+
ts_neg_variants = [tsneg,
1292+
tsneg.to_pydatetime(),
1293+
tsneg.to_datetime64().astype('datetime64[ns]'),
1294+
tsneg.to_datetime64().astype('datetime64[D]')]
1295+
1296+
tspos = Timestamp('1980-01-01')
1297+
ts_pos_variants = [tspos,
1298+
tspos.to_pydatetime(),
1299+
tspos.to_datetime64().astype('datetime64[ns]'),
1300+
tspos.to_datetime64().astype('datetime64[D]')]
1301+
1302+
for variant in ts_neg_variants + ts_pos_variants:
1303+
res = tdinat + variant
1304+
assert res[1] is pd.NaT

0 commit comments

Comments
 (0)