Skip to content

BUG/TST: timedelta-like with Index/Series/DataFrame ops #23320

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Oct 30, 2018
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4978,7 +4978,7 @@ def _combine_match_columns(self, other, func, level=None):
assert left.columns.equals(right.index)
return ops.dispatch_to_series(left, right, func, axis="columns")

def _combine_const(self, other, func, errors='raise'):
def _combine_const(self, other, func):
assert lib.is_scalar(other) or np.ndim(other) == 0
return ops.dispatch_to_series(self, other, func)

Expand Down
7 changes: 7 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4702,6 +4702,13 @@ def dropna(self, how='any'):
def _evaluate_with_timedelta_like(self, other, op):
# Timedelta knows how to operate with np.array, so dispatch to that
# operation and then wrap the results
if self._is_numeric_dtype and op.__name__ in ['add', 'sub',
'radd', 'rsub']:
raise TypeError("Operation {opname} between {cls} and {other} "
"is invalid".format(opname=op.__name__,
cls=type(self).__name__,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, I think we should use here the name of the dtype instead of the class, as this will bubble up for Series as well

In [3]: ser + tdnat
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-3-4ad1d16388f8> in <module>()
----> 1 ser + tdnat

~/scipy/pandas/pandas/core/ops.py in wrapper(left, right)
   1419             #  that may incorrectly raise TypeError when we
   1420             #  should get NullFrequencyError
-> 1421             result = op(pd.Index(left), right)
   1422             return construct_result(left, result,
   1423                                     index=left.index, name=res_name,

~/scipy/pandas/pandas/core/indexes/base.py in index_arithmetic_method(self, other)
    137         # handle time-based others
    138         if isinstance(other, (ABCDateOffset, np.timedelta64, timedelta)):
--> 139             return self._evaluate_with_timedelta_like(other, op)
    140         elif isinstance(other, (datetime, np.datetime64)):
    141             return self._evaluate_with_datetime_like(other, op)

~/scipy/pandas/pandas/core/indexes/base.py in _evaluate_with_timedelta_like(self, other, op)
   4708                             "is invalid".format(opname=op.__name__,
   4709                                                 cls=type(self).__name__,
-> 4710                                                 other=type(other).__name__))
   4711 
   4712         other = Timedelta(other)

TypeError: Operation add between Int64Index and timedelta64 is invalid

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will update in follow-up PR that addresses the bug above.

other=type(other).__name__))

other = Timedelta(other)
values = self.values

Expand Down
9 changes: 7 additions & 2 deletions pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
from pandas.core.dtypes.common import (
is_int64_dtype, is_integer, is_scalar, is_timedelta64_dtype
)
from pandas.core.dtypes.generic import ABCSeries, ABCTimedeltaIndex
from pandas.core.dtypes.generic import (
ABCDataFrame, ABCSeries, ABCTimedeltaIndex
)
from pandas.core.indexes.base import Index, _index_shared_docs
from pandas.core.indexes.numeric import Int64Index
from pandas.util._decorators import Appender, cache_readonly
Expand Down Expand Up @@ -557,6 +559,9 @@ def __getitem__(self, key):
return super_getitem(key)

def __floordiv__(self, other):
if isinstance(other, (ABCSeries, ABCDataFrame)):
return NotImplemented

if is_integer(other) and other != 0:
if (len(self) == 0 or
self._start % other == 0 and
Expand Down Expand Up @@ -588,7 +593,7 @@ def _make_evaluate_binop(op, step=False):
"""

def _evaluate_numeric_binop(self, other):
if isinstance(other, ABCSeries):
if isinstance(other, (ABCSeries, ABCDataFrame)):
return NotImplemented
elif isinstance(other, ABCTimedeltaIndex):
# Defer to TimedeltaIndex implementation
Expand Down
23 changes: 16 additions & 7 deletions pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@
ABCIndex, ABCIndexClass,
ABCSparseSeries, ABCSparseArray)

from pandas.tseries.offsets import Tick


# -----------------------------------------------------------------------------
# Ops Wrapping Utilities
Expand Down Expand Up @@ -125,11 +127,18 @@ def maybe_upcast_for_op(obj):
Be careful to call this *after* determining the `name` attribute to be
attached to the result of the arithmetic operation.
"""
if type(obj) is datetime.timedelta:
if isinstance(obj, (datetime.timedelta, Tick)):
# GH#22390 cast up to Timedelta to rely on Timedelta
# implementation; otherwise operation against numeric-dtype
# raises TypeError
return pd.Timedelta(obj)
elif isinstance(obj, np.timedelta64) and not isna(obj):
# In particular non-nanosecond timedelta64 needs to be cast to
# nanoseconds, or else we get undesired behavior like
# np.timedelta64(3, 'D') / 2 == np.timedelta64(1, 'D')
# The isna check is to avoid casting timedelta64("NaT"), which would
# return NaT and incorrectly be treated as a datetime-NaT.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Regarding this np.timedelta('nat') special case here, isn't this pointing to an inherent problem with not having a timedelta NaT ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. In the past when I've brought up the idea of having something like pd.NaTD the idea has gone nowhere. I'd be on board if you want to revive the suggestion.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was testing a few things on master, this seems a bug:

In [9]: pd.Series([pd.Timedelta('1s')]) + np.timedelta64('nat')
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-9-f56b47937d6f> in <module>()
----> 1 pd.Series([pd.Timedelta('1s')]) + np.timedelta64('nat')

~/scipy/pandas/pandas/core/ops.py in wrapper(left, right)
   1408 
   1409         elif is_timedelta64_dtype(left):
-> 1410             result = dispatch_to_index_op(op, left, right, pd.TimedeltaIndex)
   1411             return construct_result(left, result,
   1412                                     index=left.index, name=res_name,

~/scipy/pandas/pandas/core/ops.py in dispatch_to_index_op(op, left, right, index_class)
   1040         left_idx = left_idx._shallow_copy(freq=None)
   1041     try:
-> 1042         result = op(left_idx, right)
   1043     except NullFrequencyError:
   1044         # DatetimeIndex and TimedeltaIndex with freq == None raise ValueError

~/scipy/pandas/pandas/core/indexes/datetimelike.py in __add__(self, other)
    578         def __add__(self, other):
    579             # dispatch to ExtensionArray implementation
--> 580             result = super(cls, self).__add__(other)
    581             return wrap_arithmetic_op(self, other, result)
    582 

~/scipy/pandas/pandas/core/arrays/datetimelike.py in __add__(self, other)
    626                 result = self._add_nat()
    627             elif isinstance(other, (Tick, timedelta, np.timedelta64)):
--> 628                 result = self._add_delta(other)
    629             elif isinstance(other, DateOffset):
    630                 # specifically _not_ a Tick

~/scipy/pandas/pandas/core/arrays/timedeltas.py in _add_delta(self, delta)
    204         result : TimedeltaArray
    205         """
--> 206         new_values = dtl.DatetimeLikeArrayMixin._add_delta(self, delta)
    207         return type(self)(new_values, freq='infer')
    208 

~/scipy/pandas/pandas/core/arrays/datetimelike.py in _add_delta(self, other)
    371         """
    372         if isinstance(other, (Tick, timedelta, np.timedelta64)):
--> 373             new_values = self._add_timedeltalike_scalar(other)
    374         elif is_timedelta64_dtype(other):
    375             # ndarray[timedelta64] or TimedeltaArray/index

~/scipy/pandas/pandas/core/arrays/datetimelike.py in _add_timedeltalike_scalar(self, other)
    383         return the i8 result view
    384         """
--> 385         inc = delta_to_nanoseconds(other)
    386         new_values = checked_add_with_arr(self.asi8, inc,
    387                                           arr_mask=self._isnan).view('i8')

~/scipy/pandas/pandas/_libs/tslibs/timedeltas.pyx in pandas._libs.tslibs.timedeltas.delta_to_nanoseconds()

~/scipy/pandas/pandas/_libs/tslibs/timedeltas.pyx in pandas._libs.tslibs.timedeltas.delta_to_nanoseconds()

TypeError: an integer is required

But if I remove here the not isna check, it works correctly (since s + pd.NaT works correctly and preserves the timedelta dtype). But I suppose there are other code paths don't work with the pd.NaT ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch. I'll get a PR to fix+test this going sometime today.

return pd.Timedelta(obj)
elif isinstance(obj, np.ndarray) and is_timedelta64_dtype(obj):
# GH#22390 Unfortunately we need to special-case right-hand
# timedelta64 dtypes because numpy casts integer dtypes to
Expand Down Expand Up @@ -1307,11 +1316,12 @@ def wrapper(left, right):
index=left.index, name=res_name,
dtype=result.dtype)

elif is_timedelta64_dtype(right) and not is_scalar(right):
# i.e. exclude np.timedelta64 object
elif is_timedelta64_dtype(right):
# We should only get here with non-scalar or timedelta64('NaT')
# values for right
# Note: we cannot use dispatch_to_index_op because
# that may incorrectly raise TypeError when we
# should get NullFrequencyError
# that may incorrectly raise TypeError when we
# should get NullFrequencyError
result = op(pd.Index(left), right)
return construct_result(left, result,
index=left.index, name=res_name,
Expand Down Expand Up @@ -1940,8 +1950,7 @@ def f(self, other):

# straight boolean comparisons we want to allow all columns
# (regardless of dtype to pass thru) See #4537 for discussion.
res = self._combine_const(other, func,
errors='ignore')
res = self._combine_const(other, func)
return res.fillna(True).astype(bool)

f.__name__ = op_name
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/sparse/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -642,7 +642,7 @@ def _combine_match_columns(self, other, func, level=None):
new_data, index=self.index, columns=union,
default_fill_value=self.default_fill_value).__finalize__(self)

def _combine_const(self, other, func, errors='raise'):
def _combine_const(self, other, func):
return self._apply_columns(lambda x: func(x, other))

def _reindex_index(self, index, method, copy, level, fill_value=np.nan,
Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/arithmetic/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ def scalar_td(request):
pd.Timedelta(days=3).to_pytimedelta(),
pd.Timedelta('72:00:00'),
np.timedelta64(3, 'D'),
np.timedelta64(72, 'h')])
np.timedelta64(72, 'h')],
ids=lambda x: type(x).__name__)
def three_days(request):
"""
Several timedelta-like and DateOffset objects that each represent
Expand All @@ -84,7 +85,8 @@ def three_days(request):
pd.Timedelta(hours=2).to_pytimedelta(),
pd.Timedelta(seconds=2 * 3600),
np.timedelta64(2, 'h'),
np.timedelta64(120, 'm')])
np.timedelta64(120, 'm')],
ids=lambda x: type(x).__name__)
def two_hours(request):
"""
Several timedelta-like and DateOffset objects that each represent
Expand Down
28 changes: 20 additions & 8 deletions pandas/tests/arithmetic/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,14 +150,6 @@ def test_numeric_arr_mul_tdscalar(self, scalar_td, numeric_idx, box):
def test_numeric_arr_rdiv_tdscalar(self, three_days, numeric_idx, box):
index = numeric_idx[1:3]

broken = (isinstance(three_days, np.timedelta64) and
three_days.dtype != 'm8[ns]')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we actually had this in a test ...hahha

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yah, this is to xfail the broken stuff this PR fixes

broken = broken or isinstance(three_days, pd.offsets.Tick)
if box is not pd.Index and broken:
# np.timedelta64(3, 'D') / 2 == np.timedelta64(1, 'D')
raise pytest.xfail("timedelta64 not converted to nanos; "
"Tick division not implemented")

expected = TimedeltaIndex(['3 Days', '36 Hours'])

index = tm.box_expected(index, box)
Expand All @@ -169,6 +161,26 @@ def test_numeric_arr_rdiv_tdscalar(self, three_days, numeric_idx, box):
with pytest.raises(TypeError):
index / three_days

@pytest.mark.parametrize('other', [
pd.Timedelta(hours=31),
pd.Timedelta(hours=31).to_pytimedelta(),
pd.Timedelta(hours=31).to_timedelta64(),
pd.Timedelta(hours=31).to_timedelta64().astype('m8[h]'),
np.timedelta64('NaT'),
np.timedelta64('NaT', 'D'),
pd.offsets.Minute(3),
pd.offsets.Second(0)])
def test_add_sub_timedeltalike_invalid(self, numeric_idx, other, box):
left = tm.box_expected(numeric_idx, box)
with pytest.raises(TypeError):
left + other
with pytest.raises(TypeError):
other + left
with pytest.raises(TypeError):
left - other
with pytest.raises(TypeError):
other - left


# ------------------------------------------------------------------
# Arithmetic
Expand Down
6 changes: 2 additions & 4 deletions pandas/tests/arithmetic/test_timedelta64.py
Original file line number Diff line number Diff line change
Expand Up @@ -1033,10 +1033,8 @@ def test_tdi_mul_float_series(self, box_df_fail):
pd.Float64Index(range(1, 11)),
pd.RangeIndex(1, 11)
], ids=lambda x: type(x).__name__)
def test_tdi_rmul_arraylike(self, other, box_df_fail):
# RangeIndex fails to return NotImplemented, for others
# DataFrame tries to broadcast incorrectly
box = box_df_fail
def test_tdi_rmul_arraylike(self, other, box_df_broadcast_failure):
box = box_df_broadcast_failure

tdi = TimedeltaIndex(['1 Day'] * 10)
expected = timedelta_range('1 days', '10 days')
Expand Down
19 changes: 19 additions & 0 deletions pandas/tests/indexes/test_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,25 @@ def test_constructor_name(self):
assert copy.name == 'copy'
assert new.name == 'new'

# TODO: mod, divmod?
@pytest.mark.parametrize('op', [operator.add, operator.sub,
operator.mul, operator.floordiv,
operator.truediv, operator.pow])
def test_arithmetic_with_frame_or_series(self, op):
# check that we return NotImplemented when operating with Series
# or DataFrame
index = pd.RangeIndex(5)
other = pd.Series(np.random.randn(5))

expected = op(pd.Series(index), other)
result = op(index, other)
tm.assert_series_equal(result, expected)

other = pd.DataFrame(np.random.randn(2, 5))
expected = op(pd.DataFrame([index, index]), other)
result = op(index, other)
tm.assert_frame_equal(result, expected)

def test_numeric_compat2(self):
# validate that we are handling the RangeIndex overrides to numeric ops
# and returning RangeIndex where possible
Expand Down
1 change: 0 additions & 1 deletion pandas/tests/internals/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -1243,7 +1243,6 @@ def test_binop_other(self, op, value, dtype):
(operator.mul, '<M8[ns]'),
(operator.add, '<M8[ns]'),
(operator.pow, '<m8[ns]'),
(operator.mod, '<m8[ns]'),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is testing basically-wrong behavior.

(operator.mul, '<m8[ns]')}

if (op, dtype) in invalid:
Expand Down