Skip to content

BUG: Timestamp subtraction of NaT with timezones #12241

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.18.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -870,6 +870,8 @@ Bug Fixes
- Bug in ``Series.resample`` using a frequency of ``Nano`` when the index is a ``DatetimeIndex`` and contains non-zero nanosecond parts (:issue:`12037`)


- Bug in ``NaT`` subtraction from ``Timestamp`` or ``DatetimeIndex`` with timezones (:issue:`11718`)

- Bug in ``Timedelta.round`` with negative values (:issue:`11690`)
- Bug in ``.loc`` against ``CategoricalIndex`` may result in normal ``Index`` (:issue:`11586`)
- Bug in ``DataFrame.info`` when duplicated column names exist (:issue:`11761`)
Expand Down
25 changes: 23 additions & 2 deletions pandas/tseries/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,27 @@ def inferred_freq(self):
except ValueError:
return None

def _nat_new(self, box=True):
"""
Return Index or ndarray filled with NaT which has the same
length as the caller.

Parameters
----------
box : boolean, default True
- If True returns a Index as the same as caller.
- If False returns ndarray of np.int64.
"""
result = np.zeros(len(self), dtype=np.int64)
result.fill(tslib.iNaT)
if not box:
return result

attribs = self._get_attributes_dict()
if not isinstance(self, com.ABCPeriodIndex):
attribs['freq'] = None
return self._simple_new(result, **attribs)

# Try to run function on index first, and then on elements of index
# Especially important for group-by functionality
def map(self, f):
Expand All @@ -224,8 +245,8 @@ def sort_values(self, return_indexer=False, ascending=True):
sorted_values = np.sort(self.values)
attribs = self._get_attributes_dict()
freq = attribs['freq']
from pandas.tseries.period import PeriodIndex
if freq is not None and not isinstance(self, PeriodIndex):

if freq is not None and not isinstance(self, com.ABCPeriodIndex):
if freq.n > 0 and not ascending:
freq = freq * -1
elif freq.n < 0 and ascending:
Expand Down
20 changes: 13 additions & 7 deletions pandas/tseries/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -740,20 +740,26 @@ def __setstate__(self, state):
raise Exception("invalid pickle state")
_unpickle_compat = __setstate__

def _add_datelike(self, other):
# adding a timedeltaindex to a datetimelike
if other is tslib.NaT:
return self._nat_new(box=True)
raise TypeError("cannot add a datelike to a DatetimeIndex")

def _sub_datelike(self, other):
# subtract a datetime from myself, yielding a TimedeltaIndex

from pandas import TimedeltaIndex
other = Timestamp(other)

if other is tslib.NaT:
result = self._nat_new(box=False)
# require tz compat
if not self._has_same_tz(other):
elif not self._has_same_tz(other):
raise TypeError("Timestamp subtraction must have the same "
"timezones or no timezones")

i8 = self.asi8
result = i8 - other.value
result = self._maybe_mask_results(result, fill_value=tslib.iNaT)
else:
i8 = self.asi8
result = i8 - other.value
result = self._maybe_mask_results(result, fill_value=tslib.iNaT)
return TimedeltaIndex(result, name=self.name, copy=False)

def _maybe_update_attributes(self, attrs):
Expand Down
19 changes: 13 additions & 6 deletions pandas/tseries/tdi.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,17 +317,24 @@ def _evaluate_with_timedelta_like(self, other, op, opstr):
return NotImplemented

def _add_datelike(self, other):

# adding a timedeltaindex to a datetimelike
from pandas import Timestamp, DatetimeIndex
other = Timestamp(other)
i8 = self.asi8
result = i8 + other.value
result = self._maybe_mask_results(result, fill_value=tslib.iNaT)
if other is tslib.NaT:
result = self._nat_new(box=False)
else:
other = Timestamp(other)
i8 = self.asi8
result = i8 + other.value
result = self._maybe_mask_results(result, fill_value=tslib.iNaT)
return DatetimeIndex(result, name=self.name, copy=False)

def _sub_datelike(self, other):
raise TypeError("cannot subtract a datelike from a TimedeltaIndex")
from pandas import DatetimeIndex
if other is tslib.NaT:
result = self._nat_new(box=False)
else:
raise TypeError("cannot subtract a datelike from a TimedeltaIndex")
return DatetimeIndex(result, name=self.name, copy=False)

def _format_native_types(self, na_rep=u('NaT'),
date_format=None, **kwargs):
Expand Down
50 changes: 49 additions & 1 deletion pandas/tseries/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,14 @@ def test_add_iadd(self):
rng += 1
tm.assert_index_equal(rng, expected)

idx = DatetimeIndex(['2011-01-01', '2011-01-02'])
msg = "cannot add a datelike to a DatetimeIndex"
with tm.assertRaisesRegexp(TypeError, msg):
idx + pd.Timestamp('2011-01-01')

with tm.assertRaisesRegexp(TypeError, msg):
pd.Timestamp('2011-01-01') + idx

def test_sub_isub(self):
for tz in self.tz:
# diff
Expand Down Expand Up @@ -598,6 +606,16 @@ def test_infer_freq(self):
tm.assert_index_equal(idx, result)
self.assertEqual(result.freq, freq)

def test_nat_new(self):
idx = pd.date_range('2011-01-01', freq='D', periods=5, name='x')
result = idx._nat_new()
exp = pd.DatetimeIndex([pd.NaT] * 5, name='x')
tm.assert_index_equal(result, exp)

result = idx._nat_new(box=False)
exp = np.array([tslib.iNaT] * 5, dtype=np.int64)
tm.assert_numpy_array_equal(result, exp)


class TestTimedeltaIndexOps(Ops):
def setUp(self):
Expand Down Expand Up @@ -777,7 +795,6 @@ def test_add_iadd(self):
tm.assert_index_equal(rng, expected)

def test_sub_isub(self):

# only test adding/sub offsets as - is now numeric

# offset
Expand All @@ -800,6 +817,15 @@ def test_sub_isub(self):
rng -= 1
tm.assert_index_equal(rng, expected)

idx = TimedeltaIndex(['1 day', '2 day'])
msg = "cannot subtract a datelike from a TimedeltaIndex"
with tm.assertRaisesRegexp(TypeError, msg):
idx - pd.Timestamp('2011-01-01')

result = Timestamp('2011-01-01') + idx
expected = DatetimeIndex(['2011-01-02', '2011-01-03'])
tm.assert_index_equal(result, expected)

def test_ops_compat(self):

offsets = [pd.offsets.Hour(2), timedelta(hours=2),
Expand Down Expand Up @@ -1252,6 +1278,17 @@ def test_infer_freq(self):
tm.assert_index_equal(idx, result)
self.assertEqual(result.freq, freq)

def test_nat_new(self):

idx = pd.timedelta_range('1', freq='D', periods=5, name='x')
result = idx._nat_new()
exp = pd.TimedeltaIndex([pd.NaT] * 5, name='x')
tm.assert_index_equal(result, exp)

result = idx._nat_new(box=False)
exp = np.array([tslib.iNaT] * 5, dtype=np.int64)
tm.assert_numpy_array_equal(result, exp)


class TestPeriodIndexOps(Ops):
def setUp(self):
Expand Down Expand Up @@ -2053,3 +2090,14 @@ def test_take(self):
self.assert_index_equal(result, expected)
self.assertEqual(result.freq, expected.freq)
self.assertEqual(result.freq, 'D')

def test_nat_new(self):

idx = pd.period_range('2011-01', freq='M', periods=5, name='x')
result = idx._nat_new()
exp = pd.PeriodIndex([pd.NaT] * 5, freq='M', name='x')
tm.assert_index_equal(result, exp)

result = idx._nat_new(box=False)
exp = np.array([tslib.iNaT] * 5, dtype=np.int64)
tm.assert_numpy_array_equal(result, exp)
92 changes: 71 additions & 21 deletions pandas/tseries/tests/test_tslib.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pandas._period as period
import datetime

import pandas as pd
from pandas.core.api import Timestamp, Series, Timedelta, Period, to_datetime
from pandas.tslib import get_timezone
from pandas._period import period_asfreq, period_ordinal
Expand All @@ -22,6 +23,7 @@


class TestTimestamp(tm.TestCase):

def test_constructor(self):
base_str = '2014-07-01 09:00'
base_dt = datetime.datetime(2014, 7, 1, 9)
Expand Down Expand Up @@ -915,37 +917,85 @@ def test_nanosecond_timestamp(self):

def test_nat_arithmetic(self):
# GH 6873
nat = tslib.NaT
t = Timestamp('2014-01-01')
dt = datetime.datetime(2014, 1, 1)
delta = datetime.timedelta(3600)
td = Timedelta('5s')
i = 2
f = 1.5

for (left, right) in [(nat, i), (nat, f), (nat, np.nan)]:
self.assertTrue((left / right) is nat)
self.assertTrue((left * right) is nat)
self.assertTrue((right * left) is nat)
for (left, right) in [(pd.NaT, i), (pd.NaT, f), (pd.NaT, np.nan)]:
self.assertIs(left / right, pd.NaT)
self.assertIs(left * right, pd.NaT)
self.assertIs(right * left, pd.NaT)
with tm.assertRaises(TypeError):
right / left

# Timestamp / datetime
for (left, right) in [(nat, nat), (nat, t), (nat, dt)]:
t = Timestamp('2014-01-01')
dt = datetime.datetime(2014, 1, 1)
for (left, right) in [(pd.NaT, pd.NaT), (pd.NaT, t), (pd.NaT, dt)]:
# NaT __add__ or __sub__ Timestamp-like (or inverse) returns NaT
self.assertTrue((right + left) is nat)
self.assertTrue((left + right) is nat)
self.assertTrue((left - right) is nat)
self.assertTrue((right - left) is nat)
self.assertIs(right + left, pd.NaT)
self.assertIs(left + right, pd.NaT)
self.assertIs(left - right, pd.NaT)
self.assertIs(right - left, pd.NaT)

# timedelta-like
# offsets are tested in test_offsets.py
for (left, right) in [(nat, delta), (nat, td)]:

delta = datetime.timedelta(3600)
td = Timedelta('5s')

for (left, right) in [(pd.NaT, delta), (pd.NaT, td)]:
# NaT + timedelta-like returns NaT
self.assertTrue((right + left) is nat)
self.assertTrue((left + right) is nat)
self.assertTrue((right - left) is nat)
self.assertTrue((left - right) is nat)
self.assertIs(right + left, pd.NaT)
self.assertIs(left + right, pd.NaT)
self.assertIs(right - left, pd.NaT)
self.assertIs(left - right, pd.NaT)

# GH 11718
tm._skip_if_no_pytz()
import pytz

t_utc = Timestamp('2014-01-01', tz='UTC')
t_tz = Timestamp('2014-01-01', tz='US/Eastern')
dt_tz = pytz.timezone('Asia/Tokyo').localize(dt)

for (left, right) in [(pd.NaT, t_utc), (pd.NaT, t_tz),
(pd.NaT, dt_tz)]:
# NaT __add__ or __sub__ Timestamp-like (or inverse) returns NaT
self.assertIs(right + left, pd.NaT)
self.assertIs(left + right, pd.NaT)
self.assertIs(left - right, pd.NaT)
self.assertIs(right - left, pd.NaT)

def test_nat_arithmetic_index(self):
# GH 11718

# datetime
tm._skip_if_no_pytz()

dti = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], name='x')
exp = pd.DatetimeIndex([pd.NaT, pd.NaT], name='x')
self.assert_index_equal(dti + pd.NaT, exp)
self.assert_index_equal(pd.NaT + dti, exp)

dti_tz = pd.DatetimeIndex(['2011-01-01', '2011-01-02'],
tz='US/Eastern', name='x')
exp = pd.DatetimeIndex([pd.NaT, pd.NaT], name='x', tz='US/Eastern')
self.assert_index_equal(dti_tz + pd.NaT, exp)
self.assert_index_equal(pd.NaT + dti_tz, exp)

exp = pd.TimedeltaIndex([pd.NaT, pd.NaT], name='x')
for (left, right) in [(pd.NaT, dti), (pd.NaT, dti_tz)]:
self.assert_index_equal(left - right, exp)
self.assert_index_equal(right - left, exp)

# timedelta
tdi = pd.TimedeltaIndex(['1 day', '2 day'], name='x')
exp = pd.DatetimeIndex([pd.NaT, pd.NaT], name='x')
for (left, right) in [(pd.NaT, tdi)]:
self.assert_index_equal(left + right, exp)
self.assert_index_equal(right + left, exp)
self.assert_index_equal(left - right, exp)
self.assert_index_equal(right - left, exp)


class TestTslib(tm.TestCase):
Expand Down Expand Up @@ -1173,8 +1223,8 @@ def test_resolution(self):
period.H_RESO, period.T_RESO,
period.S_RESO, period.MS_RESO,
period.US_RESO]):
for tz in [None, 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Eastern'
]:
for tz in [None, 'Asia/Tokyo', 'US/Eastern',
'dateutil/US/Eastern']:
idx = date_range(start='2013-04-01', periods=30, freq=freq,
tz=tz)
result = period.resolution(idx.asi8, idx.tz)
Expand Down
20 changes: 7 additions & 13 deletions pandas/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1055,16 +1055,12 @@ cdef class _Timestamp(datetime):
return self + neg_other

# a Timestamp-DatetimeIndex -> yields a negative TimedeltaIndex
elif getattr(other,'_typ',None) == 'datetimeindex':

# we may be passed reverse ops
if get_timezone(getattr(self,'tzinfo',None)) != get_timezone(other.tz):
raise TypeError("Timestamp subtraction must have the same timezones or no timezones")

elif getattr(other, '_typ', None) == 'datetimeindex':
# timezone comparison is performed in DatetimeIndex._sub_datelike
return -other.__sub__(self)

# a Timestamp-TimedeltaIndex -> yields a negative TimedeltaIndex
elif getattr(other,'_typ',None) == 'timedeltaindex':
elif getattr(other, '_typ', None) == 'timedeltaindex':
return (-other).__add__(self)

elif other is NaT:
Expand Down Expand Up @@ -1157,22 +1153,20 @@ cdef class _NaT(_Timestamp):
if isinstance(other, datetime):
return NaT
result = _Timestamp.__add__(self, other)
# Timestamp.__add__ doesn't return DatetimeIndex/TimedeltaIndex
if result is NotImplemented:
return result
except (OverflowError, OutOfBoundsDatetime):
pass
return NaT

def __sub__(self, other):

if other is NaT:
if isinstance(other, (datetime, timedelta)):
return NaT

if type(self) is datetime:
other, self = self, other
try:
result = _Timestamp.__sub__(self, other)
if result is NotImplemented:
# Timestamp.__sub__ may return DatetimeIndex/TimedeltaIndex
if result is NotImplemented or hasattr(result, '_typ'):
return result
except (OverflowError, OutOfBoundsDatetime):
pass
Expand Down