From 3b800b198700c3c4eceae321b00a953598ac53f9 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Sat, 6 Feb 2016 12:02:41 +0900 Subject: [PATCH] BUG: Timestamp subtraction of NaT with timezones --- doc/source/whatsnew/v0.18.0.txt | 2 + pandas/tseries/base.py | 25 +++++++- pandas/tseries/index.py | 20 ++++--- pandas/tseries/tdi.py | 19 ++++-- pandas/tseries/tests/test_base.py | 50 +++++++++++++++- pandas/tseries/tests/test_tslib.py | 92 +++++++++++++++++++++++------- pandas/tslib.pyx | 20 +++---- 7 files changed, 178 insertions(+), 50 deletions(-) diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index d52e0e3098b98..5ffc2fc40701d 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -870,6 +870,8 @@ Bug Fixes - Bug in ``Series.resample`` using a frequency of ``Nano`` when the index is a ``DatetimeIndex`` and contains non-zero nanosecond parts (:issue:`12037`) +- Bug in ``NaT`` subtraction from ``Timestamp`` or ``DatetimeIndex`` with timezones (:issue:`11718`) + - Bug in ``Timedelta.round`` with negative values (:issue:`11690`) - Bug in ``.loc`` against ``CategoricalIndex`` may result in normal ``Index`` (:issue:`11586`) - Bug in ``DataFrame.info`` when duplicated column names exist (:issue:`11761`) diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index 4b8192edc56ce..d82a229f48de8 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -199,6 +199,27 @@ def inferred_freq(self): except ValueError: return None + def _nat_new(self, box=True): + """ + Return Index or ndarray filled with NaT which has the same + length as the caller. + + Parameters + ---------- + box : boolean, default True + - If True returns a Index as the same as caller. + - If False returns ndarray of np.int64. + """ + result = np.zeros(len(self), dtype=np.int64) + result.fill(tslib.iNaT) + if not box: + return result + + attribs = self._get_attributes_dict() + if not isinstance(self, com.ABCPeriodIndex): + attribs['freq'] = None + return self._simple_new(result, **attribs) + # Try to run function on index first, and then on elements of index # Especially important for group-by functionality def map(self, f): @@ -224,8 +245,8 @@ def sort_values(self, return_indexer=False, ascending=True): sorted_values = np.sort(self.values) attribs = self._get_attributes_dict() freq = attribs['freq'] - from pandas.tseries.period import PeriodIndex - if freq is not None and not isinstance(self, PeriodIndex): + + if freq is not None and not isinstance(self, com.ABCPeriodIndex): if freq.n > 0 and not ascending: freq = freq * -1 elif freq.n < 0 and ascending: diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 77aa05bc1189d..9faf6f174115c 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -740,20 +740,26 @@ def __setstate__(self, state): raise Exception("invalid pickle state") _unpickle_compat = __setstate__ + def _add_datelike(self, other): + # adding a timedeltaindex to a datetimelike + if other is tslib.NaT: + return self._nat_new(box=True) + raise TypeError("cannot add a datelike to a DatetimeIndex") + def _sub_datelike(self, other): # subtract a datetime from myself, yielding a TimedeltaIndex - from pandas import TimedeltaIndex other = Timestamp(other) - + if other is tslib.NaT: + result = self._nat_new(box=False) # require tz compat - if not self._has_same_tz(other): + elif not self._has_same_tz(other): raise TypeError("Timestamp subtraction must have the same " "timezones or no timezones") - - i8 = self.asi8 - result = i8 - other.value - result = self._maybe_mask_results(result, fill_value=tslib.iNaT) + else: + i8 = self.asi8 + result = i8 - other.value + result = self._maybe_mask_results(result, fill_value=tslib.iNaT) return TimedeltaIndex(result, name=self.name, copy=False) def _maybe_update_attributes(self, attrs): diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index 9129a156848a9..e74879602fa64 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -317,17 +317,24 @@ def _evaluate_with_timedelta_like(self, other, op, opstr): return NotImplemented def _add_datelike(self, other): - # adding a timedeltaindex to a datetimelike from pandas import Timestamp, DatetimeIndex - other = Timestamp(other) - i8 = self.asi8 - result = i8 + other.value - result = self._maybe_mask_results(result, fill_value=tslib.iNaT) + if other is tslib.NaT: + result = self._nat_new(box=False) + else: + other = Timestamp(other) + i8 = self.asi8 + result = i8 + other.value + result = self._maybe_mask_results(result, fill_value=tslib.iNaT) return DatetimeIndex(result, name=self.name, copy=False) def _sub_datelike(self, other): - raise TypeError("cannot subtract a datelike from a TimedeltaIndex") + from pandas import DatetimeIndex + if other is tslib.NaT: + result = self._nat_new(box=False) + else: + raise TypeError("cannot subtract a datelike from a TimedeltaIndex") + return DatetimeIndex(result, name=self.name, copy=False) def _format_native_types(self, na_rep=u('NaT'), date_format=None, **kwargs): diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py index 2f28c55ae520f..7ddf3354324f9 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tseries/tests/test_base.py @@ -341,6 +341,14 @@ def test_add_iadd(self): rng += 1 tm.assert_index_equal(rng, expected) + idx = DatetimeIndex(['2011-01-01', '2011-01-02']) + msg = "cannot add a datelike to a DatetimeIndex" + with tm.assertRaisesRegexp(TypeError, msg): + idx + pd.Timestamp('2011-01-01') + + with tm.assertRaisesRegexp(TypeError, msg): + pd.Timestamp('2011-01-01') + idx + def test_sub_isub(self): for tz in self.tz: # diff @@ -598,6 +606,16 @@ def test_infer_freq(self): tm.assert_index_equal(idx, result) self.assertEqual(result.freq, freq) + def test_nat_new(self): + idx = pd.date_range('2011-01-01', freq='D', periods=5, name='x') + result = idx._nat_new() + exp = pd.DatetimeIndex([pd.NaT] * 5, name='x') + tm.assert_index_equal(result, exp) + + result = idx._nat_new(box=False) + exp = np.array([tslib.iNaT] * 5, dtype=np.int64) + tm.assert_numpy_array_equal(result, exp) + class TestTimedeltaIndexOps(Ops): def setUp(self): @@ -777,7 +795,6 @@ def test_add_iadd(self): tm.assert_index_equal(rng, expected) def test_sub_isub(self): - # only test adding/sub offsets as - is now numeric # offset @@ -800,6 +817,15 @@ def test_sub_isub(self): rng -= 1 tm.assert_index_equal(rng, expected) + idx = TimedeltaIndex(['1 day', '2 day']) + msg = "cannot subtract a datelike from a TimedeltaIndex" + with tm.assertRaisesRegexp(TypeError, msg): + idx - pd.Timestamp('2011-01-01') + + result = Timestamp('2011-01-01') + idx + expected = DatetimeIndex(['2011-01-02', '2011-01-03']) + tm.assert_index_equal(result, expected) + def test_ops_compat(self): offsets = [pd.offsets.Hour(2), timedelta(hours=2), @@ -1252,6 +1278,17 @@ def test_infer_freq(self): tm.assert_index_equal(idx, result) self.assertEqual(result.freq, freq) + def test_nat_new(self): + + idx = pd.timedelta_range('1', freq='D', periods=5, name='x') + result = idx._nat_new() + exp = pd.TimedeltaIndex([pd.NaT] * 5, name='x') + tm.assert_index_equal(result, exp) + + result = idx._nat_new(box=False) + exp = np.array([tslib.iNaT] * 5, dtype=np.int64) + tm.assert_numpy_array_equal(result, exp) + class TestPeriodIndexOps(Ops): def setUp(self): @@ -2053,3 +2090,14 @@ def test_take(self): self.assert_index_equal(result, expected) self.assertEqual(result.freq, expected.freq) self.assertEqual(result.freq, 'D') + + def test_nat_new(self): + + idx = pd.period_range('2011-01', freq='M', periods=5, name='x') + result = idx._nat_new() + exp = pd.PeriodIndex([pd.NaT] * 5, freq='M', name='x') + tm.assert_index_equal(result, exp) + + result = idx._nat_new(box=False) + exp = np.array([tslib.iNaT] * 5, dtype=np.int64) + tm.assert_numpy_array_equal(result, exp) diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index 4c6ec91ad1f18..381b106b17eb0 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -6,6 +6,7 @@ import pandas._period as period import datetime +import pandas as pd from pandas.core.api import Timestamp, Series, Timedelta, Period, to_datetime from pandas.tslib import get_timezone from pandas._period import period_asfreq, period_ordinal @@ -22,6 +23,7 @@ class TestTimestamp(tm.TestCase): + def test_constructor(self): base_str = '2014-07-01 09:00' base_dt = datetime.datetime(2014, 7, 1, 9) @@ -915,37 +917,85 @@ def test_nanosecond_timestamp(self): def test_nat_arithmetic(self): # GH 6873 - nat = tslib.NaT - t = Timestamp('2014-01-01') - dt = datetime.datetime(2014, 1, 1) - delta = datetime.timedelta(3600) - td = Timedelta('5s') i = 2 f = 1.5 - for (left, right) in [(nat, i), (nat, f), (nat, np.nan)]: - self.assertTrue((left / right) is nat) - self.assertTrue((left * right) is nat) - self.assertTrue((right * left) is nat) + for (left, right) in [(pd.NaT, i), (pd.NaT, f), (pd.NaT, np.nan)]: + self.assertIs(left / right, pd.NaT) + self.assertIs(left * right, pd.NaT) + self.assertIs(right * left, pd.NaT) with tm.assertRaises(TypeError): right / left # Timestamp / datetime - for (left, right) in [(nat, nat), (nat, t), (nat, dt)]: + t = Timestamp('2014-01-01') + dt = datetime.datetime(2014, 1, 1) + for (left, right) in [(pd.NaT, pd.NaT), (pd.NaT, t), (pd.NaT, dt)]: # NaT __add__ or __sub__ Timestamp-like (or inverse) returns NaT - self.assertTrue((right + left) is nat) - self.assertTrue((left + right) is nat) - self.assertTrue((left - right) is nat) - self.assertTrue((right - left) is nat) + self.assertIs(right + left, pd.NaT) + self.assertIs(left + right, pd.NaT) + self.assertIs(left - right, pd.NaT) + self.assertIs(right - left, pd.NaT) # timedelta-like # offsets are tested in test_offsets.py - for (left, right) in [(nat, delta), (nat, td)]: + + delta = datetime.timedelta(3600) + td = Timedelta('5s') + + for (left, right) in [(pd.NaT, delta), (pd.NaT, td)]: # NaT + timedelta-like returns NaT - self.assertTrue((right + left) is nat) - self.assertTrue((left + right) is nat) - self.assertTrue((right - left) is nat) - self.assertTrue((left - right) is nat) + self.assertIs(right + left, pd.NaT) + self.assertIs(left + right, pd.NaT) + self.assertIs(right - left, pd.NaT) + self.assertIs(left - right, pd.NaT) + + # GH 11718 + tm._skip_if_no_pytz() + import pytz + + t_utc = Timestamp('2014-01-01', tz='UTC') + t_tz = Timestamp('2014-01-01', tz='US/Eastern') + dt_tz = pytz.timezone('Asia/Tokyo').localize(dt) + + for (left, right) in [(pd.NaT, t_utc), (pd.NaT, t_tz), + (pd.NaT, dt_tz)]: + # NaT __add__ or __sub__ Timestamp-like (or inverse) returns NaT + self.assertIs(right + left, pd.NaT) + self.assertIs(left + right, pd.NaT) + self.assertIs(left - right, pd.NaT) + self.assertIs(right - left, pd.NaT) + + def test_nat_arithmetic_index(self): + # GH 11718 + + # datetime + tm._skip_if_no_pytz() + + dti = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], name='x') + exp = pd.DatetimeIndex([pd.NaT, pd.NaT], name='x') + self.assert_index_equal(dti + pd.NaT, exp) + self.assert_index_equal(pd.NaT + dti, exp) + + dti_tz = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], + tz='US/Eastern', name='x') + exp = pd.DatetimeIndex([pd.NaT, pd.NaT], name='x', tz='US/Eastern') + self.assert_index_equal(dti_tz + pd.NaT, exp) + self.assert_index_equal(pd.NaT + dti_tz, exp) + + exp = pd.TimedeltaIndex([pd.NaT, pd.NaT], name='x') + for (left, right) in [(pd.NaT, dti), (pd.NaT, dti_tz)]: + self.assert_index_equal(left - right, exp) + self.assert_index_equal(right - left, exp) + + # timedelta + tdi = pd.TimedeltaIndex(['1 day', '2 day'], name='x') + exp = pd.DatetimeIndex([pd.NaT, pd.NaT], name='x') + for (left, right) in [(pd.NaT, tdi)]: + self.assert_index_equal(left + right, exp) + self.assert_index_equal(right + left, exp) + self.assert_index_equal(left - right, exp) + self.assert_index_equal(right - left, exp) class TestTslib(tm.TestCase): @@ -1173,8 +1223,8 @@ def test_resolution(self): period.H_RESO, period.T_RESO, period.S_RESO, period.MS_RESO, period.US_RESO]): - for tz in [None, 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Eastern' - ]: + for tz in [None, 'Asia/Tokyo', 'US/Eastern', + 'dateutil/US/Eastern']: idx = date_range(start='2013-04-01', periods=30, freq=freq, tz=tz) result = period.resolution(idx.asi8, idx.tz) diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 49b8f2c19700c..fe5d06e520cbf 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -1055,16 +1055,12 @@ cdef class _Timestamp(datetime): return self + neg_other # a Timestamp-DatetimeIndex -> yields a negative TimedeltaIndex - elif getattr(other,'_typ',None) == 'datetimeindex': - - # we may be passed reverse ops - if get_timezone(getattr(self,'tzinfo',None)) != get_timezone(other.tz): - raise TypeError("Timestamp subtraction must have the same timezones or no timezones") - + elif getattr(other, '_typ', None) == 'datetimeindex': + # timezone comparison is performed in DatetimeIndex._sub_datelike return -other.__sub__(self) # a Timestamp-TimedeltaIndex -> yields a negative TimedeltaIndex - elif getattr(other,'_typ',None) == 'timedeltaindex': + elif getattr(other, '_typ', None) == 'timedeltaindex': return (-other).__add__(self) elif other is NaT: @@ -1157,6 +1153,7 @@ cdef class _NaT(_Timestamp): if isinstance(other, datetime): return NaT result = _Timestamp.__add__(self, other) + # Timestamp.__add__ doesn't return DatetimeIndex/TimedeltaIndex if result is NotImplemented: return result except (OverflowError, OutOfBoundsDatetime): @@ -1164,15 +1161,12 @@ cdef class _NaT(_Timestamp): return NaT def __sub__(self, other): - - if other is NaT: + if isinstance(other, (datetime, timedelta)): return NaT - - if type(self) is datetime: - other, self = self, other try: result = _Timestamp.__sub__(self, other) - if result is NotImplemented: + # Timestamp.__sub__ may return DatetimeIndex/TimedeltaIndex + if result is NotImplemented or hasattr(result, '_typ'): return result except (OverflowError, OutOfBoundsDatetime): pass