Skip to content

Masking and overflow checks for datetimeindex and timedeltaindex ops #18020

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Nov 4, 2017
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import pandas.core.dtypes.concat as _concat
from pandas.errors import PerformanceWarning
from pandas.core.common import _values_from_object, _maybe_box
from pandas.core.algorithms import checked_add_with_arr

from pandas.core.indexes.base import Index, _index_shared_docs
from pandas.core.indexes.numeric import Int64Index, Float64Index
Expand Down Expand Up @@ -777,7 +778,8 @@ def _sub_datelike(self, other):
"timezones or no timezones")
else:
i8 = self.asi8
result = i8 - other.value
result = checked_add_with_arr(i8, -other.value,
arr_mask=self._isnan)
result = self._maybe_mask_results(result,
fill_value=libts.iNaT)
else:
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,8 @@ def _add_datelike(self, other):
else:
other = Timestamp(other)
i8 = self.asi8
result = checked_add_with_arr(i8, other.value)
result = checked_add_with_arr(i8, other.value,
arr_mask=self._isnan)
result = self._maybe_mask_results(result, fill_value=iNaT)
return DatetimeIndex(result, name=self.name, copy=False)

Expand Down
183 changes: 183 additions & 0 deletions pandas/tests/indexes/test_datetimelike_arithmetic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
""" Test Matrix for arithmetic operations on DatetimeIndex, TimedeltaIndex,
and PeriodIndex
"""

import pytest

import pandas as pd
from pandas import Timestamp, Timedelta, NaT


class TestDatetimeLikeIndexArithmetic(object):
# GH17991 checking for overflows and NaT masking on arithmetic ops

# TODO: Fill out the matrix of allowed arithmetic operations:
# - __rsub__, __radd__
# - ops with scalars boxed in Index/Series/DataFrame/np.array
# - ops with scalars:
# NaT, Timestamp.min/max, Timedelta.min/max
# datetime, timedelta, date(?),
# relativedelta,
# np.datetime64, np.timedelta64,
# DateOffset,
# Period
# - timezone-aware variants
# - object-dtype, categorical dtype
# - PeriodIndex
# - consistency with .map(...) ?

def test_timedeltaindex_add_timestamp_nat_masking(self):
tdinat = pd.to_timedelta(['24658 days 11:15:00', 'NaT'])

# tsneg.value < 0, tspos.value > 0
tsneg = Timestamp('1950-01-01')
tspos = Timestamp('1980-01-01')

res1 = tdinat + tsneg
assert res1[1] is NaT
res2 = tdinat + tspos
assert res2[1] is NaT

def test_timedeltaindex_add_timestamp_overflow(self):
tdimax = pd.to_timedelta(['24658 days 11:15:00', Timedelta.max])
tdimin = pd.to_timedelta(['24658 days 11:15:00', Timedelta.min])

# tsneg.value < 0, tspos.value > 0
tsneg = Timestamp('1950-01-01')
tspos = Timestamp('1980-01-01')

res1 = tdimax + tsneg
assert res1[1].value == Timedelta.max.value + tsneg.value
res2 = tdimin + tspos
assert res2[1].value == Timedelta.min.value + tspos.value

with pytest.raises(OverflowError):
tdimax + tspos

with pytest.raises(OverflowError):
tdimin + tsneg

def test_timedeltaindex_add_timedelta_overflow(self):
tdimax = pd.to_timedelta(['24658 days 11:15:00', Timedelta.max])
tdimin = pd.to_timedelta(['24658 days 11:15:00', Timedelta.min])

# tdpos.value > 0, tdneg.value < 0
tdpos = Timedelta('1h')
tdneg = Timedelta('-1h')

with pytest.raises(OverflowError):
tdimax + tdpos

res2 = tdimax + tdneg
assert res2[1].value == Timedelta.max.value + tdneg.value
res3 = tdimin + tdpos
assert res3[1].value == Timedelta.min.value + tdpos.value

with pytest.raises(OverflowError):
tdimin + tdneg

def test_timedeltaindex_sub_timedelta_overflow(self):
tdimax = pd.to_timedelta(['24658 days 11:15:00', Timedelta.max])
tdimin = pd.to_timedelta(['24658 days 11:15:00', Timedelta.min])

# tdpos.value > 0, tdneg.value < 0
tdpos = Timedelta('1h')
tdneg = Timedelta('-1h')

res1 = tdimax - tdpos
assert res1[1].value == Timedelta.max.value - tdpos.value

with pytest.raises(OverflowError):
tdimax - tdneg

with pytest.raises(OverflowError):
tdimin - tdpos

res4 = tdimin - tdneg
assert res4[1].value == Timedelta.min.value - tdneg.value

def test_datetimeindex_add_nat_masking(self):
# Checking for NaTs and checking that we don't get an OverflowError
dtinat = pd.to_datetime(['now', 'NaT'])

# tdpos.value > 0, tdneg.value < 0
tdpos = Timedelta('1h')
tdneg = Timedelta('-1h')

res1 = dtinat + tdpos
assert res1[1] is NaT
res2 = dtinat + tdneg
assert res2[1] is NaT

def test_datetimeindex_sub_nat_masking(self):
# Checking for NaTs and checking that we don't get an OverflowError
dtinat = pd.to_datetime(['now', 'NaT'])

# tdpos.value > 0, tdneg.value < 0
tdpos = Timedelta('1h')
tdneg = Timedelta('-1h')

res1 = dtinat - tdpos
assert res1[1] is NaT
res2 = dtinat - tdneg
assert res2[1] is NaT

def test_datetimeindex_add_timedelta_overflow(self):
dtimax = pd.to_datetime(['now', Timestamp.max])
dtimin = pd.to_datetime(['now', Timestamp.min])

# tdpos.value < 0, tdneg.value > 0
tdpos = Timedelta('1h')
tdneg = Timedelta('-1h')

with pytest.raises(OverflowError):
dtimax + tdpos

res2 = dtimax + tdneg
assert res2[1].value == Timestamp.max.value + tdneg.value

res3 = dtimin + tdpos
assert res3[1].value == Timestamp.min.value + tdpos.value

with pytest.raises(OverflowError):
dtimin + tdneg

def test_datetimeindex_sub_timedelta_overflow(self):
dtimax = pd.to_datetime(['now', Timestamp.max])
dtimin = pd.to_datetime(['now', Timestamp.min])

# tdpos.value < 0, tdneg.value > 0
tdpos = Timedelta('1h')
tdneg = Timedelta('-1h')

res1 = dtimax - tdpos
assert res1[1].value == Timestamp.max.value - tdpos.value

with pytest.raises(OverflowError):
dtimax - tdneg

with pytest.raises(OverflowError):
dtimin - tdpos

res4 = dtimin - tdneg
assert res4[1].value == Timestamp.min.value - tdneg.value

def test_datetimeindex_sub_timestamp_overflow(self):
dtimax = pd.to_datetime(['now', Timestamp.max])
dtimin = pd.to_datetime(['now', Timestamp.min])

# tsneg.value < 0, tspos.value > 0
tsneg = Timestamp('1950-01-01')
tspos = Timestamp('1980-01-01')

with pytest.raises(OverflowError):
dtimax - tsneg

res2 = dtimax - tspos
assert res2[1].value == Timestamp.max.value - tspos.value

res3 = dtimin - tsneg
assert res3[1].value == Timestamp.min.value - tsneg.value

with pytest.raises(OverflowError):
dtimin - tspos