Skip to content

BUG: DatetimeIndex + arraylike of DateOffsets #18849

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Dec 29, 2017
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,7 @@ Conversion
- Bug in :meth:`Index.astype` with a categorical dtype where the resultant index is not converted to a :class:`CategoricalIndex` for all types of index (:issue:`18630`)
- Bug in :meth:`Series.astype` and ``Categorical.astype()`` where an existing categorical data does not get updated (:issue:`10696`, :issue:`18593`)
- Bug in :class:`Series` constructor with an int or float list where specifying ``dtype=str``, ``dtype='str'`` or ``dtype='U'`` failed to convert the data elements to strings (:issue:`16605`)
- Bug in :class:`DatetimeIndex` where adding or subtracting an array-like of ``DateOffset`` objects either raised (``np.array``, ``pd.Index``) or broadcast incorrectly (``pd.Series``) (:issue:`18224`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use this PR number here (as 18224 is a very general reference). Is there any issue for this one specifically? (don't create one, just if there is an open one).



Indexing
Expand Down
29 changes: 23 additions & 6 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from pandas.core.dtypes.common import (
is_integer, is_float,
is_bool_dtype, _ensure_int64,
is_scalar, is_dtype_equal,
is_scalar, is_dtype_equal, is_offsetlike,
is_list_like, is_timedelta64_dtype)
from pandas.core.dtypes.generic import (
ABCIndex, ABCSeries,
Expand Down Expand Up @@ -640,6 +640,14 @@ def _sub_datelike(self, other):
def _sub_period(self, other):
return NotImplemented

def _add_offset_array(self, other):
# Array/Index of DateOffset objects
return NotImplemented

def _sub_offset_array(self, other):
# Array/Index of DateOffset objects
return NotImplemented

@classmethod
def _add_datetimelike_methods(cls):
"""
Expand All @@ -662,7 +670,12 @@ def __add__(self, other):
return self._add_delta(other)
elif is_integer(other):
return self.shift(other)
elif isinstance(other, (Index, datetime, np.datetime64)):
elif isinstance(other, (datetime, np.datetime64)):
return self._add_datelike(other)
elif is_offsetlike(other):
# Array/Index of DateOffset objects
return self._add_offset_array(other)
elif isinstance(other, Index):
return self._add_datelike(other)
else: # pragma: no cover
return NotImplemented
Expand All @@ -683,10 +696,6 @@ def __sub__(self, other):
return self._add_delta(-other)
elif isinstance(other, DatetimeIndex):
return self._sub_datelike(other)
elif isinstance(other, Index):
raise TypeError("cannot subtract {typ1} and {typ2}"
.format(typ1=type(self).__name__,
typ2=type(other).__name__))
elif isinstance(other, (DateOffset, timedelta)):
return self._add_delta(-other)
elif is_integer(other):
Expand All @@ -695,6 +704,14 @@ def __sub__(self, other):
return self._sub_datelike(other)
elif isinstance(other, Period):
return self._sub_period(other)
elif is_offsetlike(other):
# Array/Index of DateOffset objects
return self._sub_offset_array(other)
elif isinstance(other, Index):
raise TypeError("cannot subtract {typ1} and {typ2}"
.format(typ1=type(self).__name__,
typ2=type(other).__name__))

else: # pragma: no cover
return NotImplemented
cls.__sub__ = __sub__
Expand Down
26 changes: 26 additions & 0 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -894,6 +894,32 @@ def _add_offset(self, offset):
"or DatetimeIndex", PerformanceWarning)
return self.astype('O') + offset

def _add_offset_array(self, other):
# Array/Index of DateOffset objects
if isinstance(other, ABCSeries):
return NotImplemented
elif len(other) == 1:
return self + other[0]
else:
warnings.warn("Adding/subtracting array of DateOffsets to "
"{} not vectorized".format(type(self)),
PerformanceWarning)
return self.astype('O') + np.array(other)
# TODO: This works for __add__ but loses dtype in __sub__

def _sub_offset_array(self, other):
# Array/Index of DateOffset objects
if isinstance(other, ABCSeries):
return NotImplemented
elif len(other) == 1:
return self - other[0]
else:
warnings.warn("Adding/subtracting array of DateOffsets to "
"{} not vectorized".format(type(self)),
PerformanceWarning)
res_values = self.astype('O').values - np.array(other)
return self.__class__(res_values, freq='infer')

def _format_native_types(self, na_rep='NaT', date_format=None, **kwargs):
from pandas.io.formats.format import _get_format_datetime64_from_values
format = _get_format_datetime64_from_values(self, date_format)
Expand Down
42 changes: 42 additions & 0 deletions pandas/tests/indexes/datetimes/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,48 @@ def test_datetimeindex_sub_timestamp_overflow(self):
with pytest.raises(OverflowError):
dtimin - variant

@pytest.mark.parametrize('box', [np.array, pd.Index])
def test_dti_add_offset_array(self, tz, box):
dti = pd.date_range('2017-01-01', periods=2, tz=tz)
# TODO: check that `name` propogates correctly
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reference issue number below all of your new tests.

other = box([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)])
res = dti + other
expected = DatetimeIndex([dti[n] + other[n] for n in range(len(dti))],
name=dti.name, freq='infer')
tm.assert_index_equal(res, expected)

res2 = other + dti
tm.assert_index_equal(res2, expected)

@pytest.mark.parametrize('box', [np.array, pd.Index])
def test_dti_sub_offset_array(self, tz, box):
# GH#18824
dti = pd.date_range('2017-01-01', periods=2, tz=tz)
other = box([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)])
res = dti - other
expected = DatetimeIndex([dti[n] - other[n] for n in range(len(dti))],
name=dti.name, freq='infer')
tm.assert_index_equal(res, expected)

def test_dti_with_offset_series(self, tz):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you parametrize this with name (None, same name, other name) to make sure they are propagated correctly.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good call. Looks like Series.op(Index) was always taking on the name of the Series.

# GH#18824
dti = pd.date_range('2017-01-01', periods=2, tz=tz)
other = pd.Series([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)],
name='foo')

expected_add = pd.Series([dti[n] + other[n] for n in range(len(dti))],
name='foo')
res = dti + other
tm.assert_series_equal(res, expected_add)
res2 = other + dti
tm.assert_series_equal(res2, expected_add)

expected_sub = pd.Series([dti[n] - other[n] for n in range(len(dti))],
name='foo')

res3 = dti - other
tm.assert_series_equal(res3, expected_sub)


# GH 10699
@pytest.mark.parametrize('klass,assert_func', zip([Series, DatetimeIndex],
Expand Down
24 changes: 24 additions & 0 deletions pandas/tests/indexes/period/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,30 @@


class TestPeriodIndexArithmetic(object):
def test_pi_add_offset_array(self):
pi = pd.PeriodIndex([pd.Period('2015Q1'), pd.Period('2016Q2')])
offs = np.array([pd.offsets.QuarterEnd(n=1, startingMonth=12),
pd.offsets.QuarterEnd(n=-2, startingMonth=12)])
res = pi + offs
expected = pd.PeriodIndex([pd.Period('2015Q2'), pd.Period('2015Q4')])
tm.assert_index_equal(res, expected)

unanchored = np.array([pd.offsets.Hour(n=1),
pd.offsets.Minute(n=-2)])
with pytest.raises(period.IncompatibleFrequency):
pi + unanchored
with pytest.raises(TypeError):
unanchored + pi

@pytest.mark.xfail(reason='GH#18824 radd doesnt implement this case')
def test_pi_radd_offset_array(self):
pi = pd.PeriodIndex([pd.Period('2015Q1'), pd.Period('2016Q2')])
offs = np.array([pd.offsets.QuarterEnd(n=1, startingMonth=12),
pd.offsets.QuarterEnd(n=-2, startingMonth=12)])
res = offs + pi
expected = pd.PeriodIndex([pd.Period('2015Q2'), pd.Period('2015Q4')])
tm.assert_index_equal(res, expected)

def test_add_iadd(self):
rng = pd.period_range('1/1/2000', freq='D', periods=5)
other = pd.period_range('1/6/2000', freq='D', periods=5)
Expand Down
17 changes: 17 additions & 0 deletions pandas/tests/indexes/timedeltas/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,23 @@ def freq(request):
class TestTimedeltaIndexArithmetic(object):
_holder = TimedeltaIndex

@pytest.mark.xfail(reason='GH#18824 ufunc add cannot use operands...')
def test_tdi_with_offset_array(self):
tdi = pd.TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00'])
offs = np.array([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)])
expected = pd.TimedeltaIndex(['1 days 01:00:00', '3 days 04:02:00'])

res = tdi + offs
tm.assert_index_equal(res, expected)

res2 = offs + tdi
tm.assert_index_equal(res2, expected)

anchored = np.array([pd.offsets.QuarterEnd(),
pd.offsets.Week(weekday=2)])
with pytest.raises(TypeError):
tdi + anchored

# TODO: Split by ops, better name
def test_numeric_compat(self):
idx = self._holder(np.arange(5, dtype='int64'))
Expand Down