diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index 06c93541a7783..086c24246918d 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -198,6 +198,10 @@ Bug Fixes - Bug in ``DataFrame.as_matrix()`` with mixed ``datetime64[ns]`` and ``timedelta64[ns]`` dtypes (:issue:`7778`) - Bug in ``HDFStore.select_column()`` not preserving UTC timezone info when selecting a DatetimeIndex (:issue:`7777`) +- Bug in ``DatetimeIndex`` and ``PeriodIndex`` in-place addition and subtraction cause different result from normal one (:issue:`6527`) +- Bug in adding and subtracting ``PeriodIndex`` with ``PeriodIndex`` raise ``TypeError`` (:issue:`7741`) +- Bug in ``combine_first`` with ``PeriodIndex`` data raises ``TypeError`` (:issue:`3367`) + - Bug in pickles contains ``DateOffset`` may raise ``AttributeError`` when ``normalize`` attribute is reffered internally (:issue:`7748`) diff --git a/pandas/core/base.py b/pandas/core/base.py index 4035627b98458..243e34e35784a 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1,6 +1,8 @@ """ Base and utility classes for pandas objects. """ +import datetime + from pandas import compat import numpy as np from pandas.core import common as com @@ -511,4 +513,34 @@ def resolution(self): from pandas.tseries.frequencies import get_reso_string return get_reso_string(self._resolution) + def __add__(self, other): + from pandas.core.index import Index + from pandas.tseries.offsets import DateOffset + if isinstance(other, Index): + return self.union(other) + elif isinstance(other, (DateOffset, datetime.timedelta, np.timedelta64)): + return self._add_delta(other) + elif com.is_integer(other): + return self.shift(other) + else: # pragma: no cover + return NotImplemented + + def __sub__(self, other): + from pandas.core.index import Index + from pandas.tseries.offsets import DateOffset + if isinstance(other, Index): + return self.diff(other) + elif isinstance(other, (DateOffset, datetime.timedelta, np.timedelta64)): + return self._add_delta(-other) + elif com.is_integer(other): + return self.shift(-other) + else: # pragma: no cover + return NotImplemented + + __iadd__ = __add__ + __isub__ = __sub__ + + def _add_delta(self, other): + return NotImplemented + diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 761d79a288df3..1b7db1451f6cf 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -481,6 +481,8 @@ def test_factorize(self): class TestDatetimeIndexOps(Ops): _allowed = '_allow_datetime_index_ops' + tz = [None, 'UTC', 'Asia/Tokyo', 'US/Eastern', + 'dateutil/Asia/Singapore', 'dateutil/US/Pacific'] def setUp(self): super(TestDatetimeIndexOps, self).setUp() @@ -545,7 +547,7 @@ def test_asobject_tolist(self): self.assertEqual(idx.tolist(), expected_list) def test_minmax(self): - for tz in [None, 'Asia/Tokyo', 'US/Eastern']: + for tz in self.tz: # monotonic idx1 = pd.DatetimeIndex([pd.NaT, '2011-01-01', '2011-01-02', '2011-01-03'], tz=tz) @@ -613,6 +615,100 @@ def test_resolution(self): idx = pd.date_range(start='2013-04-01', periods=30, freq=freq, tz=tz) self.assertEqual(idx.resolution, expected) + def test_add_iadd(self): + for tz in self.tz: + # union + rng1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + other1 = pd.date_range('1/6/2000', freq='D', periods=5, tz=tz) + expected1 = pd.date_range('1/1/2000', freq='D', periods=10, tz=tz) + + rng2 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + other2 = pd.date_range('1/4/2000', freq='D', periods=5, tz=tz) + expected2 = pd.date_range('1/1/2000', freq='D', periods=8, tz=tz) + + rng3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + other3 = pd.DatetimeIndex([], tz=tz) + expected3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + + for rng, other, expected in [(rng1, other1, expected1), (rng2, other2, expected2), + (rng3, other3, expected3)]: + result_add = rng + other + result_union = rng.union(other) + + tm.assert_index_equal(result_add, expected) + tm.assert_index_equal(result_union, expected) + rng += other + tm.assert_index_equal(rng, expected) + + # offset + if _np_version_under1p7: + offsets = [pd.offsets.Hour(2), timedelta(hours=2)] + else: + offsets = [pd.offsets.Hour(2), timedelta(hours=2), np.timedelta64(2, 'h')] + + for delta in offsets: + rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) + result = rng + delta + expected = pd.date_range('2000-01-01 02:00', '2000-02-01 02:00', tz=tz) + tm.assert_index_equal(result, expected) + rng += delta + tm.assert_index_equal(rng, expected) + + # int + rng = pd.date_range('2000-01-01 09:00', freq='H', periods=10, tz=tz) + result = rng + 1 + expected = pd.date_range('2000-01-01 10:00', freq='H', periods=10, tz=tz) + tm.assert_index_equal(result, expected) + rng += 1 + tm.assert_index_equal(rng, expected) + + def test_sub_isub(self): + for tz in self.tz: + # diff + rng1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + other1 = pd.date_range('1/6/2000', freq='D', periods=5, tz=tz) + expected1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + + rng2 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + other2 = pd.date_range('1/4/2000', freq='D', periods=5, tz=tz) + expected2 = pd.date_range('1/1/2000', freq='D', periods=3, tz=tz) + + rng3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + other3 = pd.DatetimeIndex([], tz=tz) + expected3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + + for rng, other, expected in [(rng1, other1, expected1), (rng2, other2, expected2), + (rng3, other3, expected3)]: + result_add = rng - other + result_union = rng.diff(other) + + tm.assert_index_equal(result_add, expected) + tm.assert_index_equal(result_union, expected) + rng -= other + tm.assert_index_equal(rng, expected) + + # offset + if _np_version_under1p7: + offsets = [pd.offsets.Hour(2), timedelta(hours=2)] + else: + offsets = [pd.offsets.Hour(2), timedelta(hours=2), np.timedelta64(2, 'h')] + + for delta in offsets: + rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) + result = rng - delta + expected = pd.date_range('1999-12-31 22:00', '2000-01-31 22:00', tz=tz) + tm.assert_index_equal(result, expected) + rng -= delta + tm.assert_index_equal(rng, expected) + + # int + rng = pd.date_range('2000-01-01 09:00', freq='H', periods=10, tz=tz) + result = rng - 1 + expected = pd.date_range('2000-01-01 08:00', freq='H', periods=10, tz=tz) + tm.assert_index_equal(result, expected) + rng -= 1 + tm.assert_index_equal(rng, expected) + class TestPeriodIndexOps(Ops): _allowed = '_allow_period_index_ops' @@ -745,6 +841,133 @@ def test_resolution(self): idx = pd.period_range(start='2013-04-01', periods=30, freq=freq) self.assertEqual(idx.resolution, expected) + def test_add_iadd(self): + # union + rng1 = pd.period_range('1/1/2000', freq='D', periods=5) + other1 = pd.period_range('1/6/2000', freq='D', periods=5) + expected1 = pd.period_range('1/1/2000', freq='D', periods=10) + + rng2 = pd.period_range('1/1/2000', freq='D', periods=5) + other2 = pd.period_range('1/4/2000', freq='D', periods=5) + expected2 = pd.period_range('1/1/2000', freq='D', periods=8) + + rng3 = pd.period_range('1/1/2000', freq='D', periods=5) + other3 = pd.PeriodIndex([], freq='D') + expected3 = pd.period_range('1/1/2000', freq='D', periods=5) + + rng4 = pd.period_range('2000-01-01 09:00', freq='H', periods=5) + other4 = pd.period_range('2000-01-02 09:00', freq='H', periods=5) + expected4 = pd.PeriodIndex(['2000-01-01 09:00', '2000-01-01 10:00', + '2000-01-01 11:00', '2000-01-01 12:00', + '2000-01-01 13:00', '2000-01-02 09:00', + '2000-01-02 10:00', '2000-01-02 11:00', + '2000-01-02 12:00', '2000-01-02 13:00'], + freq='H') + + rng5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03', + '2000-01-01 09:05'], freq='T') + other5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:05' + '2000-01-01 09:08'], freq='T') + expected5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03', + '2000-01-01 09:05', '2000-01-01 09:08'], + freq='T') + + rng6 = pd.period_range('2000-01-01', freq='M', periods=7) + other6 = pd.period_range('2000-04-01', freq='M', periods=7) + expected6 = pd.period_range('2000-01-01', freq='M', periods=10) + + rng7 = pd.period_range('2003-01-01', freq='A', periods=5) + other7 = pd.period_range('1998-01-01', freq='A', periods=8) + expected7 = pd.period_range('1998-01-01', freq='A', periods=10) + + for rng, other, expected in [(rng1, other1, expected1), (rng2, other2, expected2), + (rng3, other3, expected3), (rng4, other4, expected4), + (rng5, other5, expected5), (rng6, other6, expected6), + (rng7, other7, expected7)]: + + result_add = rng + other + result_union = rng.union(other) + + tm.assert_index_equal(result_add, expected) + tm.assert_index_equal(result_union, expected) + # GH 6527 + rng += other + tm.assert_index_equal(rng, expected) + + # offset + for delta in [pd.offsets.Hour(2), timedelta(hours=2)]: + rng = pd.period_range('2000-01-01', '2000-02-01') + with tm.assertRaisesRegexp(TypeError, 'unsupported operand type\(s\)'): + result = rng + delta + with tm.assertRaisesRegexp(TypeError, 'unsupported operand type\(s\)'): + rng += delta + + # int + rng = pd.period_range('2000-01-01 09:00', freq='H', periods=10) + result = rng + 1 + expected = pd.period_range('2000-01-01 10:00', freq='H', periods=10) + tm.assert_index_equal(result, expected) + rng += 1 + tm.assert_index_equal(rng, expected) + + def test_sub_isub(self): + # diff + rng1 = pd.period_range('1/1/2000', freq='D', periods=5) + other1 = pd.period_range('1/6/2000', freq='D', periods=5) + expected1 = pd.period_range('1/1/2000', freq='D', periods=5) + + rng2 = pd.period_range('1/1/2000', freq='D', periods=5) + other2 = pd.period_range('1/4/2000', freq='D', periods=5) + expected2 = pd.period_range('1/1/2000', freq='D', periods=3) + + rng3 = pd.period_range('1/1/2000', freq='D', periods=5) + other3 = pd.PeriodIndex([], freq='D') + expected3 = pd.period_range('1/1/2000', freq='D', periods=5) + + rng4 = pd.period_range('2000-01-01 09:00', freq='H', periods=5) + other4 = pd.period_range('2000-01-02 09:00', freq='H', periods=5) + expected4 = rng4 + + rng5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03', + '2000-01-01 09:05'], freq='T') + other5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:05'], freq='T') + expected5 = pd.PeriodIndex(['2000-01-01 09:03'], freq='T') + + rng6 = pd.period_range('2000-01-01', freq='M', periods=7) + other6 = pd.period_range('2000-04-01', freq='M', periods=7) + expected6 = pd.period_range('2000-01-01', freq='M', periods=3) + + rng7 = pd.period_range('2003-01-01', freq='A', periods=5) + other7 = pd.period_range('1998-01-01', freq='A', periods=8) + expected7 = pd.period_range('2006-01-01', freq='A', periods=2) + + for rng, other, expected in [(rng1, other1, expected1), (rng2, other2, expected2), + (rng3, other3, expected3), (rng4, other4, expected4), + (rng5, other5, expected5), (rng6, other6, expected6), + (rng7, other7, expected7),]: + result_add = rng - other + result_union = rng.diff(other) + + tm.assert_index_equal(result_add, expected) + tm.assert_index_equal(result_union, expected) + rng -= other + tm.assert_index_equal(rng, expected) + + # offset + for delta in [pd.offsets.Hour(2), timedelta(hours=2)]: + with tm.assertRaisesRegexp(TypeError, 'unsupported operand type\(s\)'): + result = rng + delta + with tm.assertRaisesRegexp(TypeError, 'unsupported operand type\(s\)'): + rng += delta + + # int + rng = pd.period_range('2000-01-01 09:00', freq='H', periods=10) + result = rng - 1 + expected = pd.period_range('2000-01-01 08:00', freq='H', periods=10) + tm.assert_index_equal(result, expected) + rng -= 1 + tm.assert_index_equal(rng, expected) + if __name__ == '__main__': import nose diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 9423037844e74..2a3c53135a644 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -595,30 +595,6 @@ def __setstate__(self, state): else: # pragma: no cover np.ndarray.__setstate__(self, state) - def __add__(self, other): - if isinstance(other, Index): - return self.union(other) - elif isinstance(other, (DateOffset, timedelta)): - return self._add_delta(other) - elif isinstance(other, np.timedelta64): - return self._add_delta(other) - elif com.is_integer(other): - return self.shift(other) - else: # pragma: no cover - raise TypeError(other) - - def __sub__(self, other): - if isinstance(other, Index): - return self.diff(other) - elif isinstance(other, (DateOffset, timedelta)): - return self._add_delta(-other) - elif isinstance(other, np.timedelta64): - return self._add_delta(-other) - elif com.is_integer(other): - return self.shift(-other) - else: # pragma: no cover - raise TypeError(other) - def _add_delta(self, delta): if isinstance(delta, (Tick, timedelta)): inc = offsets._delta_to_nanoseconds(delta) diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 8c4bb2f5adc5e..887bf806dd4e4 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -872,19 +872,6 @@ def shift(self, n): values[mask] = tslib.iNaT return PeriodIndex(data=values, name=self.name, freq=self.freq) - def __add__(self, other): - try: - return self.shift(other) - except TypeError: - # self.values + other raises TypeError for invalid input - return NotImplemented - - def __sub__(self, other): - try: - return self.shift(-other) - except TypeError: - return NotImplemented - @property def inferred_type(self): # b/c data is represented as ints make sure we can't have ambiguous diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index 53375b4d07796..f5f66a49c29d4 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -2450,6 +2450,20 @@ def test_recreate_from_data(self): idx = PeriodIndex(org.values, freq=o) self.assertTrue(idx.equals(org)) + def test_combine_first(self): + # GH 3367 + didx = pd.DatetimeIndex(start='1950-01-31', end='1950-07-31', freq='M') + pidx = pd.PeriodIndex(start=pd.Period('1950-1'), end=pd.Period('1950-7'), freq='M') + # check to be consistent with DatetimeIndex + for idx in [didx, pidx]: + a = pd.Series([1, np.nan, np.nan, 4, 5, np.nan, 7], index=idx) + b = pd.Series([9, 9, 9, 9, 9, 9, 9], index=idx) + + result = a.combine_first(b) + expected = pd.Series([1, 9, 9, 4, 5, 9, 7], index=idx, dtype=np.float64) + tm.assert_series_equal(result, expected) + + def _permute(obj): return obj.take(np.random.permutation(len(obj))) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index b6761426edc5d..f2bc66f156c75 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -1235,13 +1235,6 @@ def test_last_subset(self): result = ts[:0].last('3M') assert_series_equal(result, ts[:0]) - def test_add_offset(self): - rng = date_range('1/1/2000', '2/1/2000') - - result = rng + offsets.Hour(2) - expected = date_range('1/1/2000 02:00', '2/1/2000 02:00') - self.assertTrue(result.equals(expected)) - def test_format_pre_1900_dates(self): rng = date_range('1/1/1850', '1/1/1950', freq='A-DEC') rng.format() @@ -2314,14 +2307,6 @@ def test_map(self): exp = [f(x) for x in rng] self.assert_numpy_array_equal(result, exp) - def test_add_union(self): - rng = date_range('1/1/2000', periods=5) - rng2 = date_range('1/6/2000', periods=5) - - result = rng + rng2 - expected = rng.union(rng2) - self.assertTrue(result.equals(expected)) - def test_misc_coverage(self): rng = date_range('1/1/2000', periods=5) result = rng.groupby(rng.day)