Skip to content

delegate (most) datetimelike Series arithmetic ops to DatetimeIndex #18817

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 10 commits into from
15 changes: 14 additions & 1 deletion pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import numpy as np
from pandas.core.dtypes.common import (
is_integer, is_float,
is_integer, is_float, is_integer_dtype,
is_bool_dtype, _ensure_int64,
is_scalar, is_dtype_equal,
is_list_like, is_timedelta64_dtype)
Expand Down Expand Up @@ -650,6 +650,7 @@ def _add_datetimelike_methods(cls):
def __add__(self, other):
from pandas.core.index import Index
from pandas.core.indexes.timedeltas import TimedeltaIndex
from pandas.core.indexes.datetimes import DatetimeIndex
from pandas.tseries.offsets import DateOffset
if is_timedelta64_dtype(other):
return self._add_delta(other)
Expand All @@ -664,6 +665,12 @@ def __add__(self, other):
return self.shift(other)
elif isinstance(other, (Index, datetime, np.datetime64)):
return self._add_datelike(other)
elif (isinstance(self, DatetimeIndex) and
isinstance(other, np.ndarray) and other.size == 1 and
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

huh? I believe we already check is_integer_dtype(other). this check is much too specific

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Under the status quo adding np.array(1, dtype=np.int64) to a DatetimeIndex behaves like adding Timedelta(nanosecond=1). Series raises (and a Series test fails unless this is caught here).

I'm open to ideas for how to make this less hacky.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An alternative would be to -- for the time being -- check for this case in the Series op and avoid dispatching to DatetimeIndex.

is_integer_dtype(other)):
# TODO: Should this be allowed if self.freq is not None?
raise TypeError("cannot add {cls} and {typ}"
.format(cls=type(cls), typ=type(other)))
else: # pragma: no cover
return NotImplemented
cls.__add__ = __add__
Expand Down Expand Up @@ -695,6 +702,12 @@ def __sub__(self, other):
return self._sub_datelike(other)
elif isinstance(other, Period):
return self._sub_period(other)
elif (isinstance(self, DatetimeIndex) and
isinstance(other, np.ndarray) and other.size == 1 and
is_integer_dtype(other)):
# TODO: Should this be allowed if self.freq is not None?
raise TypeError("cannot add {cls} and {typ}"
.format(cls=type(cls), typ=type(other)))
else: # pragma: no cover
return NotImplemented
cls.__sub__ = __sub__
Expand Down
56 changes: 45 additions & 11 deletions pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ def __init__(self, left, right, name, na_op):
rvalues = self._convert_to_array(right, name=name, other=lvalues)

# left
self.is_offset_lhs = self._is_offset(left)
self.is_offset_lhs = _is_offset(left)
self.is_timedelta_lhs = is_timedelta64_dtype(lvalues)
self.is_datetime64_lhs = is_datetime64_dtype(lvalues)
self.is_datetime64tz_lhs = is_datetime64tz_dtype(lvalues)
Expand All @@ -373,7 +373,7 @@ def __init__(self, left, right, name, na_op):
self.is_floating_lhs = left.dtype.kind == 'f'

# right
self.is_offset_rhs = self._is_offset(right)
self.is_offset_rhs = _is_offset(right)
self.is_datetime64_rhs = is_datetime64_dtype(rvalues)
self.is_datetime64tz_rhs = is_datetime64tz_dtype(rvalues)
self.is_datetime_rhs = (self.is_datetime64_rhs or
Expand Down Expand Up @@ -489,6 +489,7 @@ def _convert_to_array(self, values, name=None, other=None):
# datetime with tz
elif (isinstance(ovalues, datetime.datetime) and
hasattr(ovalues, 'tzinfo')):
# TODO: does this mean to say `ovalues.tzinfo is not None`?
values = pd.DatetimeIndex(values)
# datetime array with tz
elif is_datetimetz(values):
Expand All @@ -515,7 +516,7 @@ def _convert_to_array(self, values, name=None, other=None):
values = np.empty(values.shape, dtype=other.dtype)
values[:] = iNaT
return values
elif self._is_offset(values):
elif _is_offset(values):
return values
else:
raise TypeError("incompatible type [{dtype}] for a "
Expand Down Expand Up @@ -618,14 +619,15 @@ def f(x):

return lvalues, rvalues

def _is_offset(self, arr_or_obj):
""" check if obj or all elements of list-like is DateOffset """
if isinstance(arr_or_obj, ABCDateOffset):
return True
elif (is_list_like(arr_or_obj) and len(arr_or_obj) and
is_object_dtype(arr_or_obj)):
return all(isinstance(x, ABCDateOffset) for x in arr_or_obj)
return False

def _is_offset(arr_or_obj):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

better to move this to pandas.core.dtypes.common and de-privatize

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and add tests

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed on all three points.

""" check if obj or all elements of list-like is DateOffset """
if isinstance(arr_or_obj, ABCDateOffset):
return True
elif (is_list_like(arr_or_obj) and len(arr_or_obj) and
is_object_dtype(arr_or_obj)):
return all(isinstance(x, ABCDateOffset) for x in arr_or_obj)
return False


def _align_method_SERIES(left, right, align_asobject=False):
Expand Down Expand Up @@ -663,6 +665,15 @@ def _construct_divmod_result(left, result, index, name, dtype):
)


def _get_series_result_name(left, rvalues):
# TODO: Can we just use right instead of rvalues?
if isinstance(rvalues, ABCSeries):
name = _maybe_match_name(left, rvalues)
else:
name = left.name
return name


def _arith_method_SERIES(op, name, str_rep, fill_zeros=None, default_axis=None,
construct_result=_construct_result, **eval_kwargs):
"""
Expand Down Expand Up @@ -715,6 +726,29 @@ def wrapper(left, right, name=name, na_op=na_op):
if isinstance(right, ABCDataFrame):
return NotImplemented

if _is_offset(right):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this logic does not belong here. this is what happens in Timeop.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this logic does not belong here. this is what happens in Timeop.

Yes. This is preventing this case from being dispatched to DatetimeIndex so that it continues to be handled by TimeOp for now. DatetimeIndex makes a mess of this case (try it...), but I decided to fix that separately.

# special handling for alignment
pass
elif isinstance(right, pd.PeriodIndex):
# not supported for DatetimeIndex
pass
elif is_datetime64_dtype(left) or is_datetime64tz_dtype(left):
# Dispatch to DatetimeIndex method
if right is pd.NaT:
# DatetimeIndex and Series handle this differently, so
# until that is resolved we need to special-case here
return construct_result(left, pd.NaT, index=left.index,
name=left.name, dtype=left.dtype)
# TODO: double-check that the tz part of the dtype
# is supposed to be retained
left, right = _align_method_SERIES(left, right)
name = _get_series_result_name(left, right)
result = op(pd.DatetimeIndex(left), right)
result.name = name # Needs to be overriden if name is None
return construct_result(left, result,
index=left.index, name=name,
dtype=result.dtype)

left, right = _align_method_SERIES(left, right)

converted = _Op.get_op(left, right, name, na_op)
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/indexes/datetimes/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,20 @@ def test_datetimeindex_sub_timestamp_overflow(self):
with pytest.raises(OverflowError):
dtimin - variant

def test_dti_add_intarray(self, tz):
rng = pd.date_range('2000-01-01 09:00', freq='H',
periods=10, tz=tz)
other = np.array(1, dtype=np.int64)
with pytest.raises(TypeError):
rng + other

def test_dti_sub_intarray(self, tz):
rng = pd.date_range('2000-01-01 09:00', freq='H',
periods=10, tz=tz)
other = np.array(1, dtype=np.int64)
with pytest.raises(TypeError):
rng - other


# GH 10699
@pytest.mark.parametrize('klass,assert_func', zip([Series, DatetimeIndex],
Expand Down
19 changes: 18 additions & 1 deletion pandas/tests/series/test_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,23 @@
from .common import TestData


class TestDatetimeLikeArithmetic(object):
def test_datetime_sub_datetime_overflow(self):
# GH#12534
dt = pd.Timestamp('1700-01-31')
dti = pd.date_range('1999-09-30', freq='M', periods=10)
with pytest.raises(OverflowError):
dti - dt
with pytest.raises(OverflowError):
dt - dti

ser = pd.Series(dti)
with pytest.raises(OverflowError):
ser - dt
with pytest.raises(OverflowError):
dt - ser


class TestSeriesOperators(TestData):

def test_series_comparison_scalars(self):
Expand Down Expand Up @@ -612,7 +629,7 @@ def run_ops(ops, get_ser, test_ser):
# defined
for op_str in ops:
op = getattr(get_ser, op_str, None)
with tm.assert_raises_regex(TypeError, 'operate'):
with tm.assert_raises_regex(TypeError, 'operate|cannot'):
op(test_ser)

# ## timedelta64 ###
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/series/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def test_shift(self):
# incompat tz
s2 = Series(date_range('2000-01-01 09:00:00', periods=5,
tz='CET'), name='foo')
pytest.raises(ValueError, lambda: s - s2)
pytest.raises(TypeError, lambda: s - s2)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is an API change

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as in needs to be noted in whatsnew or needs to be avoided?


def test_shift2(self):
ts = Series(np.random.randn(5),
Expand Down