Skip to content

ENH/BUG: DatetimeIndex and PeriodIndex in-place ops behaves incorrectly #7741

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 23, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/source/v0.15.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,10 @@ Bug Fixes
- Bug in ``DataFrame.as_matrix()`` with mixed ``datetime64[ns]`` and ``timedelta64[ns]`` dtypes (:issue:`7778`)
- Bug in ``HDFStore.select_column()`` not preserving UTC timezone info when selecting a DatetimeIndex (:issue:`7777`)

- Bug in ``DatetimeIndex`` and ``PeriodIndex`` in-place addition and subtraction cause different result from normal one (:issue:`6527`)
- Bug in adding and subtracting ``PeriodIndex`` with ``PeriodIndex`` raise ``TypeError`` (:issue:`7741`)
- Bug in ``combine_first`` with ``PeriodIndex`` data raises ``TypeError`` (:issue:`3367`)


- Bug in pickles contains ``DateOffset`` may raise ``AttributeError`` when ``normalize`` attribute is reffered internally (:issue:`7748`)

Expand Down
32 changes: 32 additions & 0 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""
Base and utility classes for pandas objects.
"""
import datetime

from pandas import compat
import numpy as np
from pandas.core import common as com
Expand Down Expand Up @@ -511,4 +513,34 @@ def resolution(self):
from pandas.tseries.frequencies import get_reso_string
return get_reso_string(self._resolution)

def __add__(self, other):
from pandas.core.index import Index
from pandas.tseries.offsets import DateOffset
if isinstance(other, Index):
return self.union(other)
elif isinstance(other, (DateOffset, datetime.timedelta, np.timedelta64)):
return self._add_delta(other)
elif com.is_integer(other):
return self.shift(other)
else: # pragma: no cover
return NotImplemented

def __sub__(self, other):
from pandas.core.index import Index
from pandas.tseries.offsets import DateOffset
if isinstance(other, Index):
return self.diff(other)
elif isinstance(other, (DateOffset, datetime.timedelta, np.timedelta64)):
return self._add_delta(-other)
elif com.is_integer(other):
return self.shift(-other)
else: # pragma: no cover
return NotImplemented

__iadd__ = __add__
__isub__ = __sub__

def _add_delta(self, other):
return NotImplemented


225 changes: 224 additions & 1 deletion pandas/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,8 @@ def test_factorize(self):

class TestDatetimeIndexOps(Ops):
_allowed = '_allow_datetime_index_ops'
tz = [None, 'UTC', 'Asia/Tokyo', 'US/Eastern',
'dateutil/Asia/Singapore', 'dateutil/US/Pacific']

def setUp(self):
super(TestDatetimeIndexOps, self).setUp()
Expand Down Expand Up @@ -545,7 +547,7 @@ def test_asobject_tolist(self):
self.assertEqual(idx.tolist(), expected_list)

def test_minmax(self):
for tz in [None, 'Asia/Tokyo', 'US/Eastern']:
for tz in self.tz:
# monotonic
idx1 = pd.DatetimeIndex([pd.NaT, '2011-01-01', '2011-01-02',
'2011-01-03'], tz=tz)
Expand Down Expand Up @@ -613,6 +615,100 @@ def test_resolution(self):
idx = pd.date_range(start='2013-04-01', periods=30, freq=freq, tz=tz)
self.assertEqual(idx.resolution, expected)

def test_add_iadd(self):
for tz in self.tz:
# union
rng1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz)
other1 = pd.date_range('1/6/2000', freq='D', periods=5, tz=tz)
expected1 = pd.date_range('1/1/2000', freq='D', periods=10, tz=tz)

rng2 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz)
other2 = pd.date_range('1/4/2000', freq='D', periods=5, tz=tz)
expected2 = pd.date_range('1/1/2000', freq='D', periods=8, tz=tz)

rng3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz)
other3 = pd.DatetimeIndex([], tz=tz)
expected3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz)

for rng, other, expected in [(rng1, other1, expected1), (rng2, other2, expected2),
(rng3, other3, expected3)]:
result_add = rng + other
result_union = rng.union(other)

tm.assert_index_equal(result_add, expected)
tm.assert_index_equal(result_union, expected)
rng += other
tm.assert_index_equal(rng, expected)

# offset
if _np_version_under1p7:
offsets = [pd.offsets.Hour(2), timedelta(hours=2)]
else:
offsets = [pd.offsets.Hour(2), timedelta(hours=2), np.timedelta64(2, 'h')]

for delta in offsets:
rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz)
result = rng + delta
expected = pd.date_range('2000-01-01 02:00', '2000-02-01 02:00', tz=tz)
tm.assert_index_equal(result, expected)
rng += delta
tm.assert_index_equal(rng, expected)

# int
rng = pd.date_range('2000-01-01 09:00', freq='H', periods=10, tz=tz)
result = rng + 1
expected = pd.date_range('2000-01-01 10:00', freq='H', periods=10, tz=tz)
tm.assert_index_equal(result, expected)
rng += 1
tm.assert_index_equal(rng, expected)

def test_sub_isub(self):
for tz in self.tz:
# diff
rng1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz)
other1 = pd.date_range('1/6/2000', freq='D', periods=5, tz=tz)
expected1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz)

rng2 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz)
other2 = pd.date_range('1/4/2000', freq='D', periods=5, tz=tz)
expected2 = pd.date_range('1/1/2000', freq='D', periods=3, tz=tz)

rng3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz)
other3 = pd.DatetimeIndex([], tz=tz)
expected3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz)

for rng, other, expected in [(rng1, other1, expected1), (rng2, other2, expected2),
(rng3, other3, expected3)]:
result_add = rng - other
result_union = rng.diff(other)

tm.assert_index_equal(result_add, expected)
tm.assert_index_equal(result_union, expected)
rng -= other
tm.assert_index_equal(rng, expected)

# offset
if _np_version_under1p7:
offsets = [pd.offsets.Hour(2), timedelta(hours=2)]
else:
offsets = [pd.offsets.Hour(2), timedelta(hours=2), np.timedelta64(2, 'h')]

for delta in offsets:
rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz)
result = rng - delta
expected = pd.date_range('1999-12-31 22:00', '2000-01-31 22:00', tz=tz)
tm.assert_index_equal(result, expected)
rng -= delta
tm.assert_index_equal(rng, expected)

# int
rng = pd.date_range('2000-01-01 09:00', freq='H', periods=10, tz=tz)
result = rng - 1
expected = pd.date_range('2000-01-01 08:00', freq='H', periods=10, tz=tz)
tm.assert_index_equal(result, expected)
rng -= 1
tm.assert_index_equal(rng, expected)


class TestPeriodIndexOps(Ops):
_allowed = '_allow_period_index_ops'
Expand Down Expand Up @@ -745,6 +841,133 @@ def test_resolution(self):
idx = pd.period_range(start='2013-04-01', periods=30, freq=freq)
self.assertEqual(idx.resolution, expected)

def test_add_iadd(self):
# union
rng1 = pd.period_range('1/1/2000', freq='D', periods=5)
other1 = pd.period_range('1/6/2000', freq='D', periods=5)
expected1 = pd.period_range('1/1/2000', freq='D', periods=10)

rng2 = pd.period_range('1/1/2000', freq='D', periods=5)
other2 = pd.period_range('1/4/2000', freq='D', periods=5)
expected2 = pd.period_range('1/1/2000', freq='D', periods=8)

rng3 = pd.period_range('1/1/2000', freq='D', periods=5)
other3 = pd.PeriodIndex([], freq='D')
expected3 = pd.period_range('1/1/2000', freq='D', periods=5)

rng4 = pd.period_range('2000-01-01 09:00', freq='H', periods=5)
other4 = pd.period_range('2000-01-02 09:00', freq='H', periods=5)
expected4 = pd.PeriodIndex(['2000-01-01 09:00', '2000-01-01 10:00',
'2000-01-01 11:00', '2000-01-01 12:00',
'2000-01-01 13:00', '2000-01-02 09:00',
'2000-01-02 10:00', '2000-01-02 11:00',
'2000-01-02 12:00', '2000-01-02 13:00'],
freq='H')

rng5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03',
'2000-01-01 09:05'], freq='T')
other5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:05'
'2000-01-01 09:08'], freq='T')
expected5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03',
'2000-01-01 09:05', '2000-01-01 09:08'],
freq='T')

rng6 = pd.period_range('2000-01-01', freq='M', periods=7)
other6 = pd.period_range('2000-04-01', freq='M', periods=7)
expected6 = pd.period_range('2000-01-01', freq='M', periods=10)

rng7 = pd.period_range('2003-01-01', freq='A', periods=5)
other7 = pd.period_range('1998-01-01', freq='A', periods=8)
expected7 = pd.period_range('1998-01-01', freq='A', periods=10)

for rng, other, expected in [(rng1, other1, expected1), (rng2, other2, expected2),
(rng3, other3, expected3), (rng4, other4, expected4),
(rng5, other5, expected5), (rng6, other6, expected6),
(rng7, other7, expected7)]:

result_add = rng + other
result_union = rng.union(other)

tm.assert_index_equal(result_add, expected)
tm.assert_index_equal(result_union, expected)
# GH 6527
rng += other
tm.assert_index_equal(rng, expected)

# offset
for delta in [pd.offsets.Hour(2), timedelta(hours=2)]:
rng = pd.period_range('2000-01-01', '2000-02-01')
with tm.assertRaisesRegexp(TypeError, 'unsupported operand type\(s\)'):
result = rng + delta
with tm.assertRaisesRegexp(TypeError, 'unsupported operand type\(s\)'):
rng += delta

# int
rng = pd.period_range('2000-01-01 09:00', freq='H', periods=10)
result = rng + 1
expected = pd.period_range('2000-01-01 10:00', freq='H', periods=10)
tm.assert_index_equal(result, expected)
rng += 1
tm.assert_index_equal(rng, expected)

def test_sub_isub(self):
# diff
rng1 = pd.period_range('1/1/2000', freq='D', periods=5)
other1 = pd.period_range('1/6/2000', freq='D', periods=5)
expected1 = pd.period_range('1/1/2000', freq='D', periods=5)

rng2 = pd.period_range('1/1/2000', freq='D', periods=5)
other2 = pd.period_range('1/4/2000', freq='D', periods=5)
expected2 = pd.period_range('1/1/2000', freq='D', periods=3)

rng3 = pd.period_range('1/1/2000', freq='D', periods=5)
other3 = pd.PeriodIndex([], freq='D')
expected3 = pd.period_range('1/1/2000', freq='D', periods=5)

rng4 = pd.period_range('2000-01-01 09:00', freq='H', periods=5)
other4 = pd.period_range('2000-01-02 09:00', freq='H', periods=5)
expected4 = rng4

rng5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03',
'2000-01-01 09:05'], freq='T')
other5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:05'], freq='T')
expected5 = pd.PeriodIndex(['2000-01-01 09:03'], freq='T')

rng6 = pd.period_range('2000-01-01', freq='M', periods=7)
other6 = pd.period_range('2000-04-01', freq='M', periods=7)
expected6 = pd.period_range('2000-01-01', freq='M', periods=3)

rng7 = pd.period_range('2003-01-01', freq='A', periods=5)
other7 = pd.period_range('1998-01-01', freq='A', periods=8)
expected7 = pd.period_range('2006-01-01', freq='A', periods=2)

for rng, other, expected in [(rng1, other1, expected1), (rng2, other2, expected2),
(rng3, other3, expected3), (rng4, other4, expected4),
(rng5, other5, expected5), (rng6, other6, expected6),
(rng7, other7, expected7),]:
result_add = rng - other
result_union = rng.diff(other)

tm.assert_index_equal(result_add, expected)
tm.assert_index_equal(result_union, expected)
rng -= other
tm.assert_index_equal(rng, expected)

# offset
for delta in [pd.offsets.Hour(2), timedelta(hours=2)]:
with tm.assertRaisesRegexp(TypeError, 'unsupported operand type\(s\)'):
result = rng + delta
with tm.assertRaisesRegexp(TypeError, 'unsupported operand type\(s\)'):
rng += delta

# int
rng = pd.period_range('2000-01-01 09:00', freq='H', periods=10)
result = rng - 1
expected = pd.period_range('2000-01-01 08:00', freq='H', periods=10)
tm.assert_index_equal(result, expected)
rng -= 1
tm.assert_index_equal(rng, expected)


if __name__ == '__main__':
import nose
Expand Down
24 changes: 0 additions & 24 deletions pandas/tseries/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,30 +595,6 @@ def __setstate__(self, state):
else: # pragma: no cover
np.ndarray.__setstate__(self, state)

def __add__(self, other):
if isinstance(other, Index):
return self.union(other)
elif isinstance(other, (DateOffset, timedelta)):
return self._add_delta(other)
elif isinstance(other, np.timedelta64):
return self._add_delta(other)
elif com.is_integer(other):
return self.shift(other)
else: # pragma: no cover
raise TypeError(other)

def __sub__(self, other):
if isinstance(other, Index):
return self.diff(other)
elif isinstance(other, (DateOffset, timedelta)):
return self._add_delta(-other)
elif isinstance(other, np.timedelta64):
return self._add_delta(-other)
elif com.is_integer(other):
return self.shift(-other)
else: # pragma: no cover
raise TypeError(other)

def _add_delta(self, delta):
if isinstance(delta, (Tick, timedelta)):
inc = offsets._delta_to_nanoseconds(delta)
Expand Down
13 changes: 0 additions & 13 deletions pandas/tseries/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -872,19 +872,6 @@ def shift(self, n):
values[mask] = tslib.iNaT
return PeriodIndex(data=values, name=self.name, freq=self.freq)

def __add__(self, other):
try:
return self.shift(other)
except TypeError:
# self.values + other raises TypeError for invalid input
return NotImplemented

def __sub__(self, other):
try:
return self.shift(-other)
except TypeError:
return NotImplemented

@property
def inferred_type(self):
# b/c data is represented as ints make sure we can't have ambiguous
Expand Down
14 changes: 14 additions & 0 deletions pandas/tseries/tests/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -2450,6 +2450,20 @@ def test_recreate_from_data(self):
idx = PeriodIndex(org.values, freq=o)
self.assertTrue(idx.equals(org))

def test_combine_first(self):
# GH 3367
didx = pd.DatetimeIndex(start='1950-01-31', end='1950-07-31', freq='M')
pidx = pd.PeriodIndex(start=pd.Period('1950-1'), end=pd.Period('1950-7'), freq='M')
# check to be consistent with DatetimeIndex
for idx in [didx, pidx]:
a = pd.Series([1, np.nan, np.nan, 4, 5, np.nan, 7], index=idx)
b = pd.Series([9, 9, 9, 9, 9, 9, 9], index=idx)

result = a.combine_first(b)
expected = pd.Series([1, 9, 9, 4, 5, 9, 7], index=idx, dtype=np.float64)
tm.assert_series_equal(result, expected)


def _permute(obj):
return obj.take(np.random.permutation(len(obj)))

Expand Down
Loading