Skip to content

Fix name setting in DTI/TDI __add__ and __sub__ #19744

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Feb 21, 2018
Merged
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,7 @@ Datetimelike
- Bug in :class:`Timestamp` and :func:`to_datetime` where a string representing a barely out-of-bounds timestamp would be incorrectly rounded down instead of raising ``OutOfBoundsDatetime`` (:issue:`19382`)
- Bug in :func:`Timestamp.floor` :func:`DatetimeIndex.floor` where time stamps far in the future and past were not rounded correctly (:issue:`19206`)
- Bug in :func:`to_datetime` where passing an out-of-bounds datetime with ``errors='coerce'`` and ``utc=True`` would raise ``OutOfBoundsDatetime`` instead of parsing to ``NaT`` (:issue:`19612`)
- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` addition and subtraction where name of the returned object was not always set consistently. (:issue:`19744`)
-

Timezones
Expand Down
51 changes: 33 additions & 18 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from pandas.core.dtypes.generic import (
ABCIndex, ABCSeries, ABCPeriodIndex, ABCIndexClass)
from pandas.core.dtypes.missing import isna
from pandas.core import common as com, algorithms
from pandas.core import common as com, algorithms, ops
from pandas.core.algorithms import checked_add_with_arr
from pandas.errors import NullFrequencyError
import pandas.io.formats.printing as printing
Expand Down Expand Up @@ -661,29 +661,37 @@ def __add__(self, other):
if isinstance(other, ABCSeries):
return NotImplemented
elif is_timedelta64_dtype(other):
return self._add_delta(other)
result = self._add_delta(other)
elif isinstance(other, (DateOffset, timedelta)):
return self._add_delta(other)
result = self._add_delta(other)
elif is_offsetlike(other):
# Array/Index of DateOffset objects
return self._add_offset_array(other)
result = self._add_offset_array(other)
elif isinstance(self, TimedeltaIndex) and isinstance(other, Index):
if hasattr(other, '_add_delta'):
return other._add_delta(self)
raise TypeError("cannot add TimedeltaIndex and {typ}"
.format(typ=type(other)))
result = other._add_delta(self)
else:
raise TypeError("cannot add TimedeltaIndex and {typ}"
.format(typ=type(other)))
elif is_integer(other):
return self.shift(other)
# This check must come after the check for timedelta64_dtype
# or else it will incorrectly catch np.timedelta64 objects
result = self.shift(other)
elif isinstance(other, (datetime, np.datetime64)):
return self._add_datelike(other)
result = self._add_datelike(other)
elif isinstance(other, Index):
return self._add_datelike(other)
result = self._add_datelike(other)
elif is_integer_dtype(other) and self.freq is None:
# GH#19123
raise NullFrequencyError("Cannot shift with no freq")
else: # pragma: no cover
return NotImplemented

if result is not NotImplemented:
res_name = ops._get_series_op_result_name(self, other)
result = result.rename(name=res_name)
return result

cls.__add__ = __add__
cls.__radd__ = __add__

Expand All @@ -697,25 +705,27 @@ def __sub__(self, other):
if isinstance(other, ABCSeries):
return NotImplemented
elif is_timedelta64_dtype(other):
return self._add_delta(-other)
result = self._add_delta(-other)
elif isinstance(other, (DateOffset, timedelta)):
return self._add_delta(-other)
result = self._add_delta(-other)
elif is_offsetlike(other):
# Array/Index of DateOffset objects
return self._sub_offset_array(other)
result = self._sub_offset_array(other)
elif isinstance(self, TimedeltaIndex) and isinstance(other, Index):
if not isinstance(other, TimedeltaIndex):
raise TypeError("cannot subtract TimedeltaIndex and {typ}"
.format(typ=type(other).__name__))
return self._add_delta(-other)
result = self._add_delta(-other)
elif isinstance(other, DatetimeIndex):
return self._sub_datelike(other)
result = self._sub_datelike(other)
elif is_integer(other):
return self.shift(-other)
# This check must come after the check for timedelta64_dtype
# or else it will incorrectly catch np.timedelta64 objects
result = self.shift(-other)
elif isinstance(other, (datetime, np.datetime64)):
return self._sub_datelike(other)
result = self._sub_datelike(other)
elif isinstance(other, Period):
return self._sub_period(other)
result = self._sub_period(other)
elif isinstance(other, Index):
raise TypeError("cannot subtract {typ1} and {typ2}"
.format(typ1=type(self).__name__,
Expand All @@ -726,6 +736,11 @@ def __sub__(self, other):
else: # pragma: no cover
return NotImplemented

if result is not NotImplemented:
res_name = ops._get_series_op_result_name(self, other)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe rename this to

get_op_result_name

result = result.rename(name=res_name)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this copies again, just set the name

return result

cls.__sub__ = __sub__

def __rsub__(self, other):
Expand Down
20 changes: 5 additions & 15 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -886,7 +886,7 @@ def _sub_datelike(self, other):
else:
raise TypeError("cannot subtract DatetimeIndex and {typ}"
.format(typ=type(other).__name__))
return TimedeltaIndex(result, name=self.name, copy=False)
return TimedeltaIndex(result)

def _sub_datelike_dti(self, other):
"""subtraction of two DatetimeIndexes"""
Expand All @@ -910,28 +910,21 @@ def _maybe_update_attributes(self, attrs):
return attrs

def _add_delta(self, delta):
if isinstance(delta, ABCSeries):
return NotImplemented

from pandas import TimedeltaIndex
name = self.name

if isinstance(delta, (Tick, timedelta, np.timedelta64)):
new_values = self._add_delta_td(delta)
elif is_timedelta64_dtype(delta):
if not isinstance(delta, TimedeltaIndex):
delta = TimedeltaIndex(delta)
else:
# update name when delta is Index
name = com._maybe_match_name(self, delta)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's move _maybe_match_name to ops and put next to get_op_result_name (and should remove it in favor of get_op_result_name, but that might be slightly tricky).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. While I'm at it I'm going to move get_op_result_name with the utility functions near the top of the file instead of in the arithmetic-specific spot it is now.

new_values = self._add_delta_tdi(delta)
elif isinstance(delta, DateOffset):
new_values = self._add_offset(delta).asi8
else:
new_values = self.astype('O') + delta

tz = 'UTC' if self.tz is not None else None
result = DatetimeIndex(new_values, tz=tz, name=name, freq='infer')
result = DatetimeIndex(new_values, tz=tz, freq='infer')
if self.tz is not None and self.tz is not utc:
result = result.tz_convert(self.tz)
return result
Expand All @@ -954,22 +947,19 @@ def _add_offset(self, offset):

def _add_offset_array(self, other):
# Array/Index of DateOffset objects
if isinstance(other, ABCSeries):
return NotImplemented
elif len(other) == 1:
if len(other) == 1:
return self + other[0]
else:
warnings.warn("Adding/subtracting array of DateOffsets to "
"{} not vectorized".format(type(self)),
PerformanceWarning)
return self.astype('O') + np.array(other)
# TODO: pass freq='infer' like we do in _sub_offset_array?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is the purpose of these comments (esp here, after the return)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a) to make sure it gets seen during review, b) as a note to myself or the next pass

# TODO: This works for __add__ but loses dtype in __sub__

def _sub_offset_array(self, other):
# Array/Index of DateOffset objects
if isinstance(other, ABCSeries):
return NotImplemented
elif len(other) == 1:
if len(other) == 1:
return self - other[0]
else:
warnings.warn("Adding/subtracting array of DateOffsets to "
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -729,7 +729,7 @@ def _sub_datelike(self, other):
if other is tslib.NaT:
new_data = np.empty(len(self), dtype=np.int64)
new_data.fill(tslib.iNaT)
return TimedeltaIndex(new_data, name=self.name)
return TimedeltaIndex(new_data)
return NotImplemented

def _sub_period(self, other):
Expand All @@ -744,7 +744,7 @@ def _sub_period(self, other):
new_data = new_data.astype(np.float64)
new_data[self._isnan] = np.nan
# result must be Int64Index or Float64Index
return Index(new_data, name=self.name)
return Index(new_data)

def shift(self, n):
"""
Expand Down
17 changes: 5 additions & 12 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,17 +358,13 @@ def _maybe_update_attributes(self, attrs):
def _add_delta(self, delta):
if isinstance(delta, (Tick, timedelta, np.timedelta64)):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should prob add a doc-string to these also making the guarantee that the name should not be set

new_values = self._add_delta_td(delta)
name = self.name
elif isinstance(delta, TimedeltaIndex):
new_values = self._add_delta_tdi(delta)
# update name when delta is index
name = com._maybe_match_name(self, delta)
else:
raise TypeError("cannot add the type {0} to a TimedeltaIndex"
.format(type(delta)))

result = TimedeltaIndex(new_values, freq='infer', name=name)
return result
return TimedeltaIndex(new_values, freq='infer')

def _evaluate_with_timedelta_like(self, other, op, opstr, reversed=False):
if isinstance(other, ABCSeries):
Expand Down Expand Up @@ -409,7 +405,7 @@ def _add_datelike(self, other):
result = checked_add_with_arr(i8, other.value,
arr_mask=self._isnan)
result = self._maybe_mask_results(result, fill_value=iNaT)
return DatetimeIndex(result, name=self.name, copy=False)
return DatetimeIndex(result)

def _sub_datelike(self, other):
# GH#19124 Timedelta - datetime is not in general well-defined.
Expand All @@ -426,16 +422,15 @@ def _add_offset_array(self, other):
# TimedeltaIndex can only operate with a subset of DateOffset
# subclasses. Incompatible classes will raise AttributeError,
# which we re-raise as TypeError
if isinstance(other, ABCSeries):
return NotImplemented
elif len(other) == 1:
if len(other) == 1:
return self + other[0]
else:
from pandas.errors import PerformanceWarning
warnings.warn("Adding/subtracting array of DateOffsets to "
"{} not vectorized".format(type(self)),
PerformanceWarning)
return self.astype('O') + np.array(other)
# TODO: pass freq='infer' like we do in _sub_offset_array?
# TODO: This works for __add__ but loses dtype in __sub__
except AttributeError:
raise TypeError("Cannot add non-tick DateOffset to TimedeltaIndex")
Expand All @@ -446,9 +441,7 @@ def _sub_offset_array(self, other):
# TimedeltaIndex can only operate with a subset of DateOffset
# subclasses. Incompatible classes will raise AttributeError,
# which we re-raise as TypeError
if isinstance(other, ABCSeries):
return NotImplemented
elif len(other) == 1:
if len(other) == 1:
return self - other[0]
else:
from pandas.errors import PerformanceWarning
Expand Down
44 changes: 38 additions & 6 deletions pandas/tests/indexes/datetimes/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -721,11 +721,10 @@ def test_dti_add_series(self, tz, names):
result4 = index + ser.values
tm.assert_index_equal(result4, expected)

@pytest.mark.parametrize('box', [np.array, pd.Index])
def test_dti_add_offset_array(self, tz, box):
def test_dti_add_offset_array(self, tz):
# GH#18849
dti = pd.date_range('2017-01-01', periods=2, tz=tz)
other = box([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)])
other = np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)])

with tm.assert_produces_warning(PerformanceWarning):
res = dti + other
Expand All @@ -737,18 +736,51 @@ def test_dti_add_offset_array(self, tz, box):
res2 = other + dti
tm.assert_index_equal(res2, expected)

@pytest.mark.parametrize('box', [np.array, pd.Index])
def test_dti_sub_offset_array(self, tz, box):
@pytest.mark.parametrize('names', [(None, None, None),
('foo', 'bar', None),
('foo', 'foo', 'foo')])
def test_dti_add_offset_index(self, tz, names):
# GH#18849, GH#19744
dti = pd.date_range('2017-01-01', periods=2, tz=tz, name=names[0])
other = pd.Index([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)],
name=names[1])

with tm.assert_produces_warning(PerformanceWarning):
res = dti + other
expected = DatetimeIndex([dti[n] + other[n] for n in range(len(dti))],
name=names[2], freq='infer')
tm.assert_index_equal(res, expected)

with tm.assert_produces_warning(PerformanceWarning):
res2 = other + dti
tm.assert_index_equal(res2, expected)

def test_dti_sub_offset_array(self, tz):
# GH#18824
dti = pd.date_range('2017-01-01', periods=2, tz=tz)
other = box([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are you removing all of the boxing? do these not work?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

box is parametrized over [np.array, pd.Index], but the pd.Index case is being separated out into its own test that also checks names.

other = np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)])

with tm.assert_produces_warning(PerformanceWarning):
res = dti - other
expected = DatetimeIndex([dti[n] - other[n] for n in range(len(dti))],
name=dti.name, freq='infer')
tm.assert_index_equal(res, expected)

@pytest.mark.parametrize('names', [(None, None, None),
('foo', 'bar', None),
('foo', 'foo', 'foo')])
def test_dti_sub_offset_index(self, tz, names):
# GH#18824, GH#19744
dti = pd.date_range('2017-01-01', periods=2, tz=tz, name=names[0])
other = pd.Index([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)],
name=names[1])

with tm.assert_produces_warning(PerformanceWarning):
res = dti - other
expected = DatetimeIndex([dti[n] - other[n] for n in range(len(dti))],
name=names[2], freq='infer')
tm.assert_index_equal(res, expected)

@pytest.mark.parametrize('names', [(None, None, None),
('foo', 'bar', None),
('foo', 'foo', 'foo')])
Expand Down
Loading