Skip to content

Commit 3b800b1

Browse files
committed
BUG: Timestamp subtraction of NaT with timezones
1 parent 4404d39 commit 3b800b1

File tree

7 files changed

+178
-50
lines changed

7 files changed

+178
-50
lines changed

doc/source/whatsnew/v0.18.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -870,6 +870,8 @@ Bug Fixes
870870
- Bug in ``Series.resample`` using a frequency of ``Nano`` when the index is a ``DatetimeIndex`` and contains non-zero nanosecond parts (:issue:`12037`)
871871

872872

873+
- Bug in ``NaT`` subtraction from ``Timestamp`` or ``DatetimeIndex`` with timezones (:issue:`11718`)
874+
873875
- Bug in ``Timedelta.round`` with negative values (:issue:`11690`)
874876
- Bug in ``.loc`` against ``CategoricalIndex`` may result in normal ``Index`` (:issue:`11586`)
875877
- Bug in ``DataFrame.info`` when duplicated column names exist (:issue:`11761`)

pandas/tseries/base.py

+23-2
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,27 @@ def inferred_freq(self):
199199
except ValueError:
200200
return None
201201

202+
def _nat_new(self, box=True):
203+
"""
204+
Return Index or ndarray filled with NaT which has the same
205+
length as the caller.
206+
207+
Parameters
208+
----------
209+
box : boolean, default True
210+
- If True returns a Index as the same as caller.
211+
- If False returns ndarray of np.int64.
212+
"""
213+
result = np.zeros(len(self), dtype=np.int64)
214+
result.fill(tslib.iNaT)
215+
if not box:
216+
return result
217+
218+
attribs = self._get_attributes_dict()
219+
if not isinstance(self, com.ABCPeriodIndex):
220+
attribs['freq'] = None
221+
return self._simple_new(result, **attribs)
222+
202223
# Try to run function on index first, and then on elements of index
203224
# Especially important for group-by functionality
204225
def map(self, f):
@@ -224,8 +245,8 @@ def sort_values(self, return_indexer=False, ascending=True):
224245
sorted_values = np.sort(self.values)
225246
attribs = self._get_attributes_dict()
226247
freq = attribs['freq']
227-
from pandas.tseries.period import PeriodIndex
228-
if freq is not None and not isinstance(self, PeriodIndex):
248+
249+
if freq is not None and not isinstance(self, com.ABCPeriodIndex):
229250
if freq.n > 0 and not ascending:
230251
freq = freq * -1
231252
elif freq.n < 0 and ascending:

pandas/tseries/index.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -740,20 +740,26 @@ def __setstate__(self, state):
740740
raise Exception("invalid pickle state")
741741
_unpickle_compat = __setstate__
742742

743+
def _add_datelike(self, other):
744+
# adding a timedeltaindex to a datetimelike
745+
if other is tslib.NaT:
746+
return self._nat_new(box=True)
747+
raise TypeError("cannot add a datelike to a DatetimeIndex")
748+
743749
def _sub_datelike(self, other):
744750
# subtract a datetime from myself, yielding a TimedeltaIndex
745-
746751
from pandas import TimedeltaIndex
747752
other = Timestamp(other)
748-
753+
if other is tslib.NaT:
754+
result = self._nat_new(box=False)
749755
# require tz compat
750-
if not self._has_same_tz(other):
756+
elif not self._has_same_tz(other):
751757
raise TypeError("Timestamp subtraction must have the same "
752758
"timezones or no timezones")
753-
754-
i8 = self.asi8
755-
result = i8 - other.value
756-
result = self._maybe_mask_results(result, fill_value=tslib.iNaT)
759+
else:
760+
i8 = self.asi8
761+
result = i8 - other.value
762+
result = self._maybe_mask_results(result, fill_value=tslib.iNaT)
757763
return TimedeltaIndex(result, name=self.name, copy=False)
758764

759765
def _maybe_update_attributes(self, attrs):

pandas/tseries/tdi.py

+13-6
Original file line numberDiff line numberDiff line change
@@ -317,17 +317,24 @@ def _evaluate_with_timedelta_like(self, other, op, opstr):
317317
return NotImplemented
318318

319319
def _add_datelike(self, other):
320-
321320
# adding a timedeltaindex to a datetimelike
322321
from pandas import Timestamp, DatetimeIndex
323-
other = Timestamp(other)
324-
i8 = self.asi8
325-
result = i8 + other.value
326-
result = self._maybe_mask_results(result, fill_value=tslib.iNaT)
322+
if other is tslib.NaT:
323+
result = self._nat_new(box=False)
324+
else:
325+
other = Timestamp(other)
326+
i8 = self.asi8
327+
result = i8 + other.value
328+
result = self._maybe_mask_results(result, fill_value=tslib.iNaT)
327329
return DatetimeIndex(result, name=self.name, copy=False)
328330

329331
def _sub_datelike(self, other):
330-
raise TypeError("cannot subtract a datelike from a TimedeltaIndex")
332+
from pandas import DatetimeIndex
333+
if other is tslib.NaT:
334+
result = self._nat_new(box=False)
335+
else:
336+
raise TypeError("cannot subtract a datelike from a TimedeltaIndex")
337+
return DatetimeIndex(result, name=self.name, copy=False)
331338

332339
def _format_native_types(self, na_rep=u('NaT'),
333340
date_format=None, **kwargs):

pandas/tseries/tests/test_base.py

+49-1
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,14 @@ def test_add_iadd(self):
341341
rng += 1
342342
tm.assert_index_equal(rng, expected)
343343

344+
idx = DatetimeIndex(['2011-01-01', '2011-01-02'])
345+
msg = "cannot add a datelike to a DatetimeIndex"
346+
with tm.assertRaisesRegexp(TypeError, msg):
347+
idx + pd.Timestamp('2011-01-01')
348+
349+
with tm.assertRaisesRegexp(TypeError, msg):
350+
pd.Timestamp('2011-01-01') + idx
351+
344352
def test_sub_isub(self):
345353
for tz in self.tz:
346354
# diff
@@ -598,6 +606,16 @@ def test_infer_freq(self):
598606
tm.assert_index_equal(idx, result)
599607
self.assertEqual(result.freq, freq)
600608

609+
def test_nat_new(self):
610+
idx = pd.date_range('2011-01-01', freq='D', periods=5, name='x')
611+
result = idx._nat_new()
612+
exp = pd.DatetimeIndex([pd.NaT] * 5, name='x')
613+
tm.assert_index_equal(result, exp)
614+
615+
result = idx._nat_new(box=False)
616+
exp = np.array([tslib.iNaT] * 5, dtype=np.int64)
617+
tm.assert_numpy_array_equal(result, exp)
618+
601619

602620
class TestTimedeltaIndexOps(Ops):
603621
def setUp(self):
@@ -777,7 +795,6 @@ def test_add_iadd(self):
777795
tm.assert_index_equal(rng, expected)
778796

779797
def test_sub_isub(self):
780-
781798
# only test adding/sub offsets as - is now numeric
782799

783800
# offset
@@ -800,6 +817,15 @@ def test_sub_isub(self):
800817
rng -= 1
801818
tm.assert_index_equal(rng, expected)
802819

820+
idx = TimedeltaIndex(['1 day', '2 day'])
821+
msg = "cannot subtract a datelike from a TimedeltaIndex"
822+
with tm.assertRaisesRegexp(TypeError, msg):
823+
idx - pd.Timestamp('2011-01-01')
824+
825+
result = Timestamp('2011-01-01') + idx
826+
expected = DatetimeIndex(['2011-01-02', '2011-01-03'])
827+
tm.assert_index_equal(result, expected)
828+
803829
def test_ops_compat(self):
804830

805831
offsets = [pd.offsets.Hour(2), timedelta(hours=2),
@@ -1252,6 +1278,17 @@ def test_infer_freq(self):
12521278
tm.assert_index_equal(idx, result)
12531279
self.assertEqual(result.freq, freq)
12541280

1281+
def test_nat_new(self):
1282+
1283+
idx = pd.timedelta_range('1', freq='D', periods=5, name='x')
1284+
result = idx._nat_new()
1285+
exp = pd.TimedeltaIndex([pd.NaT] * 5, name='x')
1286+
tm.assert_index_equal(result, exp)
1287+
1288+
result = idx._nat_new(box=False)
1289+
exp = np.array([tslib.iNaT] * 5, dtype=np.int64)
1290+
tm.assert_numpy_array_equal(result, exp)
1291+
12551292

12561293
class TestPeriodIndexOps(Ops):
12571294
def setUp(self):
@@ -2053,3 +2090,14 @@ def test_take(self):
20532090
self.assert_index_equal(result, expected)
20542091
self.assertEqual(result.freq, expected.freq)
20552092
self.assertEqual(result.freq, 'D')
2093+
2094+
def test_nat_new(self):
2095+
2096+
idx = pd.period_range('2011-01', freq='M', periods=5, name='x')
2097+
result = idx._nat_new()
2098+
exp = pd.PeriodIndex([pd.NaT] * 5, freq='M', name='x')
2099+
tm.assert_index_equal(result, exp)
2100+
2101+
result = idx._nat_new(box=False)
2102+
exp = np.array([tslib.iNaT] * 5, dtype=np.int64)
2103+
tm.assert_numpy_array_equal(result, exp)

pandas/tseries/tests/test_tslib.py

+71-21
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import pandas._period as period
77
import datetime
88

9+
import pandas as pd
910
from pandas.core.api import Timestamp, Series, Timedelta, Period, to_datetime
1011
from pandas.tslib import get_timezone
1112
from pandas._period import period_asfreq, period_ordinal
@@ -22,6 +23,7 @@
2223

2324

2425
class TestTimestamp(tm.TestCase):
26+
2527
def test_constructor(self):
2628
base_str = '2014-07-01 09:00'
2729
base_dt = datetime.datetime(2014, 7, 1, 9)
@@ -915,37 +917,85 @@ def test_nanosecond_timestamp(self):
915917

916918
def test_nat_arithmetic(self):
917919
# GH 6873
918-
nat = tslib.NaT
919-
t = Timestamp('2014-01-01')
920-
dt = datetime.datetime(2014, 1, 1)
921-
delta = datetime.timedelta(3600)
922-
td = Timedelta('5s')
923920
i = 2
924921
f = 1.5
925922

926-
for (left, right) in [(nat, i), (nat, f), (nat, np.nan)]:
927-
self.assertTrue((left / right) is nat)
928-
self.assertTrue((left * right) is nat)
929-
self.assertTrue((right * left) is nat)
923+
for (left, right) in [(pd.NaT, i), (pd.NaT, f), (pd.NaT, np.nan)]:
924+
self.assertIs(left / right, pd.NaT)
925+
self.assertIs(left * right, pd.NaT)
926+
self.assertIs(right * left, pd.NaT)
930927
with tm.assertRaises(TypeError):
931928
right / left
932929

933930
# Timestamp / datetime
934-
for (left, right) in [(nat, nat), (nat, t), (nat, dt)]:
931+
t = Timestamp('2014-01-01')
932+
dt = datetime.datetime(2014, 1, 1)
933+
for (left, right) in [(pd.NaT, pd.NaT), (pd.NaT, t), (pd.NaT, dt)]:
935934
# NaT __add__ or __sub__ Timestamp-like (or inverse) returns NaT
936-
self.assertTrue((right + left) is nat)
937-
self.assertTrue((left + right) is nat)
938-
self.assertTrue((left - right) is nat)
939-
self.assertTrue((right - left) is nat)
935+
self.assertIs(right + left, pd.NaT)
936+
self.assertIs(left + right, pd.NaT)
937+
self.assertIs(left - right, pd.NaT)
938+
self.assertIs(right - left, pd.NaT)
940939

941940
# timedelta-like
942941
# offsets are tested in test_offsets.py
943-
for (left, right) in [(nat, delta), (nat, td)]:
942+
943+
delta = datetime.timedelta(3600)
944+
td = Timedelta('5s')
945+
946+
for (left, right) in [(pd.NaT, delta), (pd.NaT, td)]:
944947
# NaT + timedelta-like returns NaT
945-
self.assertTrue((right + left) is nat)
946-
self.assertTrue((left + right) is nat)
947-
self.assertTrue((right - left) is nat)
948-
self.assertTrue((left - right) is nat)
948+
self.assertIs(right + left, pd.NaT)
949+
self.assertIs(left + right, pd.NaT)
950+
self.assertIs(right - left, pd.NaT)
951+
self.assertIs(left - right, pd.NaT)
952+
953+
# GH 11718
954+
tm._skip_if_no_pytz()
955+
import pytz
956+
957+
t_utc = Timestamp('2014-01-01', tz='UTC')
958+
t_tz = Timestamp('2014-01-01', tz='US/Eastern')
959+
dt_tz = pytz.timezone('Asia/Tokyo').localize(dt)
960+
961+
for (left, right) in [(pd.NaT, t_utc), (pd.NaT, t_tz),
962+
(pd.NaT, dt_tz)]:
963+
# NaT __add__ or __sub__ Timestamp-like (or inverse) returns NaT
964+
self.assertIs(right + left, pd.NaT)
965+
self.assertIs(left + right, pd.NaT)
966+
self.assertIs(left - right, pd.NaT)
967+
self.assertIs(right - left, pd.NaT)
968+
969+
def test_nat_arithmetic_index(self):
970+
# GH 11718
971+
972+
# datetime
973+
tm._skip_if_no_pytz()
974+
975+
dti = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], name='x')
976+
exp = pd.DatetimeIndex([pd.NaT, pd.NaT], name='x')
977+
self.assert_index_equal(dti + pd.NaT, exp)
978+
self.assert_index_equal(pd.NaT + dti, exp)
979+
980+
dti_tz = pd.DatetimeIndex(['2011-01-01', '2011-01-02'],
981+
tz='US/Eastern', name='x')
982+
exp = pd.DatetimeIndex([pd.NaT, pd.NaT], name='x', tz='US/Eastern')
983+
self.assert_index_equal(dti_tz + pd.NaT, exp)
984+
self.assert_index_equal(pd.NaT + dti_tz, exp)
985+
986+
exp = pd.TimedeltaIndex([pd.NaT, pd.NaT], name='x')
987+
for (left, right) in [(pd.NaT, dti), (pd.NaT, dti_tz)]:
988+
self.assert_index_equal(left - right, exp)
989+
self.assert_index_equal(right - left, exp)
990+
991+
# timedelta
992+
tdi = pd.TimedeltaIndex(['1 day', '2 day'], name='x')
993+
exp = pd.DatetimeIndex([pd.NaT, pd.NaT], name='x')
994+
for (left, right) in [(pd.NaT, tdi)]:
995+
self.assert_index_equal(left + right, exp)
996+
self.assert_index_equal(right + left, exp)
997+
self.assert_index_equal(left - right, exp)
998+
self.assert_index_equal(right - left, exp)
949999

9501000

9511001
class TestTslib(tm.TestCase):
@@ -1173,8 +1223,8 @@ def test_resolution(self):
11731223
period.H_RESO, period.T_RESO,
11741224
period.S_RESO, period.MS_RESO,
11751225
period.US_RESO]):
1176-
for tz in [None, 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Eastern'
1177-
]:
1226+
for tz in [None, 'Asia/Tokyo', 'US/Eastern',
1227+
'dateutil/US/Eastern']:
11781228
idx = date_range(start='2013-04-01', periods=30, freq=freq,
11791229
tz=tz)
11801230
result = period.resolution(idx.asi8, idx.tz)

pandas/tslib.pyx

+7-13
Original file line numberDiff line numberDiff line change
@@ -1055,16 +1055,12 @@ cdef class _Timestamp(datetime):
10551055
return self + neg_other
10561056

10571057
# a Timestamp-DatetimeIndex -> yields a negative TimedeltaIndex
1058-
elif getattr(other,'_typ',None) == 'datetimeindex':
1059-
1060-
# we may be passed reverse ops
1061-
if get_timezone(getattr(self,'tzinfo',None)) != get_timezone(other.tz):
1062-
raise TypeError("Timestamp subtraction must have the same timezones or no timezones")
1063-
1058+
elif getattr(other, '_typ', None) == 'datetimeindex':
1059+
# timezone comparison is performed in DatetimeIndex._sub_datelike
10641060
return -other.__sub__(self)
10651061

10661062
# a Timestamp-TimedeltaIndex -> yields a negative TimedeltaIndex
1067-
elif getattr(other,'_typ',None) == 'timedeltaindex':
1063+
elif getattr(other, '_typ', None) == 'timedeltaindex':
10681064
return (-other).__add__(self)
10691065

10701066
elif other is NaT:
@@ -1157,22 +1153,20 @@ cdef class _NaT(_Timestamp):
11571153
if isinstance(other, datetime):
11581154
return NaT
11591155
result = _Timestamp.__add__(self, other)
1156+
# Timestamp.__add__ doesn't return DatetimeIndex/TimedeltaIndex
11601157
if result is NotImplemented:
11611158
return result
11621159
except (OverflowError, OutOfBoundsDatetime):
11631160
pass
11641161
return NaT
11651162

11661163
def __sub__(self, other):
1167-
1168-
if other is NaT:
1164+
if isinstance(other, (datetime, timedelta)):
11691165
return NaT
1170-
1171-
if type(self) is datetime:
1172-
other, self = self, other
11731166
try:
11741167
result = _Timestamp.__sub__(self, other)
1175-
if result is NotImplemented:
1168+
# Timestamp.__sub__ may return DatetimeIndex/TimedeltaIndex
1169+
if result is NotImplemented or hasattr(result, '_typ'):
11761170
return result
11771171
except (OverflowError, OutOfBoundsDatetime):
11781172
pass

0 commit comments

Comments
 (0)