Skip to content

Fix TimedeltaIndex +/- offset array #19095

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 7, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ Conversion
- Bug in :class:`WeekOfMonth` and class:`Week` where addition and subtraction did not roll correctly (:issue:`18510`,:issue:`18672`,:issue:`18864`)
- Bug in :meth:`DatetimeIndex.astype` when converting between timezone aware dtypes, and converting from timezone aware to naive (:issue:`18951`)
- Bug in :class:`FY5253` where ``datetime`` addition and subtraction incremented incorrectly for dates on the year-end but not normalized to midnight (:issue:`18854`)
- Bug in :class:`DatetimeIndex` where adding or subtracting an array-like of ``DateOffset`` objects either raised (``np.array``, ``pd.Index``) or broadcast incorrectly (``pd.Series``) (:issue:`18849`)
- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` where adding or subtracting an array-like of ``DateOffset`` objects either raised (``np.array``, ``pd.Index``) or broadcast incorrectly (``pd.Series``) (:issue:`18849`)
- Bug in :class:`Series` floor-division where operating on a scalar ``timedelta`` raises an exception (:issue:`18846`)
- Bug in :class:`FY5253Quarter`, :class:`LastWeekOfMonth` where rollback and rollforward behavior was inconsistent with addition and subtraction behavior (:issue:`18854`)
- Bug in :class:`Index` constructor with ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (issue:`19032`)
Expand Down
20 changes: 10 additions & 10 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -675,20 +675,20 @@ def __add__(self, other):
return NotImplemented
elif is_timedelta64_dtype(other):
return self._add_delta(other)
elif isinstance(other, (DateOffset, timedelta)):
return self._add_delta(other)
elif is_offsetlike(other):
# Array/Index of DateOffset objects
return self._add_offset_array(other)
elif isinstance(self, TimedeltaIndex) and isinstance(other, Index):
if hasattr(other, '_add_delta'):
return other._add_delta(self)
raise TypeError("cannot add TimedeltaIndex and {typ}"
.format(typ=type(other)))
elif isinstance(other, (DateOffset, timedelta)):
return self._add_delta(other)
elif is_integer(other):
return self.shift(other)
elif isinstance(other, (datetime, np.datetime64)):
return self._add_datelike(other)
elif is_offsetlike(other):
# Array/Index of DateOffset objects
return self._add_offset_array(other)
elif isinstance(other, Index):
return self._add_datelike(other)
else: # pragma: no cover
Expand All @@ -708,24 +708,24 @@ def __sub__(self, other):
return NotImplemented
elif is_timedelta64_dtype(other):
return self._add_delta(-other)
elif isinstance(other, (DateOffset, timedelta)):
return self._add_delta(-other)
elif is_offsetlike(other):
# Array/Index of DateOffset objects
return self._sub_offset_array(other)
elif isinstance(self, TimedeltaIndex) and isinstance(other, Index):
if not isinstance(other, TimedeltaIndex):
raise TypeError("cannot subtract TimedeltaIndex and {typ}"
.format(typ=type(other).__name__))
return self._add_delta(-other)
elif isinstance(other, DatetimeIndex):
return self._sub_datelike(other)
elif isinstance(other, (DateOffset, timedelta)):
return self._add_delta(-other)
elif is_integer(other):
return self.shift(-other)
elif isinstance(other, (datetime, np.datetime64)):
return self._sub_datelike(other)
elif isinstance(other, Period):
return self._sub_period(other)
elif is_offsetlike(other):
# Array/Index of DateOffset objects
return self._sub_offset_array(other)
elif isinstance(other, Index):
raise TypeError("cannot subtract {typ1} and {typ2}"
.format(typ1=type(self).__name__,
Expand Down
47 changes: 45 additions & 2 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
""" implement the TimedeltaIndex """

from datetime import timedelta
import warnings

import numpy as np
from pandas.core.dtypes.common import (
_TD_DTYPE,
Expand Down Expand Up @@ -364,8 +366,8 @@ def _add_delta(self, delta):
# update name when delta is index
name = com._maybe_match_name(self, delta)
else:
raise ValueError("cannot add the type {0} to a TimedeltaIndex"
.format(type(delta)))
raise TypeError("cannot add the type {0} to a TimedeltaIndex"
.format(type(delta)))

result = TimedeltaIndex(new_values, freq='infer', name=name)
return result
Expand Down Expand Up @@ -411,6 +413,47 @@ def _sub_datelike(self, other):
raise TypeError("cannot subtract a datelike from a TimedeltaIndex")
return DatetimeIndex(result, name=self.name, copy=False)

def _add_offset_array(self, other):
# Array/Index of DateOffset objects
try:
# TimedeltaIndex can only operate with a subset of DateOffset
# subclasses. Incompatible classes will raise AttributeError,
# which we re-raise as TypeError
if isinstance(other, ABCSeries):
return NotImplemented
elif len(other) == 1:
return self + other[0]
else:
from pandas.errors import PerformanceWarning
warnings.warn("Adding/subtracting array of DateOffsets to "
"{} not vectorized".format(type(self)),
PerformanceWarning)
return self.astype('O') + np.array(other)
# TODO: This works for __add__ but loses dtype in __sub__
except AttributeError:
raise TypeError("Cannot add non-tick DateOffset to TimedeltaIndex")

def _sub_offset_array(self, other):
# Array/Index of DateOffset objects
try:
# TimedeltaIndex can only operate with a subset of DateOffset
# subclasses. Incompatible classes will raise AttributeError,
# which we re-raise as TypeError
if isinstance(other, ABCSeries):
return NotImplemented
elif len(other) == 1:
return self - other[0]
else:
from pandas.errors import PerformanceWarning
warnings.warn("Adding/subtracting array of DateOffsets to "
"{} not vectorized".format(type(self)),
PerformanceWarning)
res_values = self.astype('O').values - np.array(other)
return self.__class__(res_values, freq='infer')
except AttributeError:
raise TypeError("Cannot subtrack non-tick DateOffset from"
" TimedeltaIndex")

def _format_native_types(self, na_rep=u('NaT'),
date_format=None, **kwargs):
from pandas.io.formats.format import Timedelta64Formatter
Expand Down
102 changes: 92 additions & 10 deletions pandas/tests/indexes/timedeltas/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
to_timedelta, timedelta_range, date_range,
Series,
Timestamp, Timedelta)
from pandas.errors import PerformanceWarning


@pytest.fixture(params=[pd.offsets.Hour(2), timedelta(hours=2),
Expand All @@ -28,23 +29,104 @@ def freq(request):
class TestTimedeltaIndexArithmetic(object):
_holder = TimedeltaIndex

@pytest.mark.xfail(reason='GH#18824 ufunc add cannot use operands...')
def test_tdi_with_offset_array(self):
@pytest.mark.parametrize('box', [np.array, pd.Index])
def test_tdi_add_offset_array(self, box):
# GH#18849
tdi = pd.TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00'])
offs = np.array([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)])
expected = pd.TimedeltaIndex(['1 days 01:00:00', '3 days 04:02:00'])
tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00'])
other = box([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)])

res = tdi + offs
expected = TimedeltaIndex([tdi[n] + other[n] for n in range(len(tdi))],
freq='infer')

with tm.assert_produces_warning(PerformanceWarning):
res = tdi + other
tm.assert_index_equal(res, expected)

res2 = offs + tdi
with tm.assert_produces_warning(PerformanceWarning):
res2 = other + tdi
tm.assert_index_equal(res2, expected)

anchored = np.array([pd.offsets.QuarterEnd(),
pd.offsets.Week(weekday=2)])
anchored = box([pd.offsets.QuarterEnd(),
pd.offsets.Week(weekday=2)])

# addition/subtraction ops with anchored offsets should issue
# a PerformanceWarning and _then_ raise a TypeError.
with pytest.raises(TypeError):
with tm.assert_produces_warning(PerformanceWarning):
tdi + anchored
with pytest.raises(TypeError):
with tm.assert_produces_warning(PerformanceWarning):
anchored + tdi

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ideally could parameterize some of these for index, ndarray & Series as well (you can leave them here is ok). I believe you have some duplication in test_base

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yah, part of it is tricky because parametrizing with box and names at the same time is cumbersome. My thought at the moment is to get the bugs fixed and tested first and take on refactoring later (when there's not a constant flow of rebasing to be done).

@pytest.mark.parametrize('box', [np.array, pd.Index])
def test_tdi_sub_offset_array(self, box):
# GH#18824
tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00'])
other = box([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)])

expected = TimedeltaIndex([tdi[n] - other[n] for n in range(len(tdi))],
freq='infer')

with tm.assert_produces_warning(PerformanceWarning):
res = tdi - other
tm.assert_index_equal(res, expected)

anchored = box([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)])

# addition/subtraction ops with anchored offsets should issue
# a PerformanceWarning and _then_ raise a TypeError.
with pytest.raises(TypeError):
with tm.assert_produces_warning(PerformanceWarning):
tdi - anchored
with pytest.raises(TypeError):
with tm.assert_produces_warning(PerformanceWarning):
anchored - tdi

@pytest.mark.parametrize('names', [(None, None, None),
('foo', 'bar', None),
('foo', 'foo', 'foo')])
def test_tdi_with_offset_series(self, names):
# GH#18849
tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00'],
name=names[0])
other = Series([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)],
name=names[1])

expected_add = Series([tdi[n] + other[n] for n in range(len(tdi))],
name=names[2])

with tm.assert_produces_warning(PerformanceWarning):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would put a comment on before these sub-section of tests, hard to follow your flow others, IOW enumerate the cases. only looking for a comment at lines 89, 100, 107

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do.

res = tdi + other
tm.assert_series_equal(res, expected_add)

with tm.assert_produces_warning(PerformanceWarning):
res2 = other + tdi
tm.assert_series_equal(res2, expected_add)

expected_sub = Series([tdi[n] - other[n] for n in range(len(tdi))],
name=names[2])

with tm.assert_produces_warning(PerformanceWarning):
res3 = tdi - other
tm.assert_series_equal(res3, expected_sub)

anchored = Series([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)],
name=names[1])

# addition/subtraction ops with anchored offsets should issue
# a PerformanceWarning and _then_ raise a TypeError.
with pytest.raises(TypeError):
with tm.assert_produces_warning(PerformanceWarning):
tdi + anchored
with pytest.raises(TypeError):
with tm.assert_produces_warning(PerformanceWarning):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

some duplication here

anchored + tdi
with pytest.raises(TypeError):
with tm.assert_produces_warning(PerformanceWarning):
tdi - anchored
with pytest.raises(TypeError):
tdi + anchored
with tm.assert_produces_warning(PerformanceWarning):
anchored - tdi

# TODO: Split by ops, better name
def test_numeric_compat(self):
Expand Down