Skip to content

Fix arithmetic errors with timedelta64 dtypes #22390

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Aug 23, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -581,12 +581,14 @@ Datetimelike
- Bug in :class:`DataFrame` comparisons against ``Timestamp``-like objects failing to raise ``TypeError`` for inequality checks with mismatched types (:issue:`8932`,:issue:`22163`)
- Bug in :class:`DataFrame` with mixed dtypes including ``datetime64[ns]`` incorrectly raising ``TypeError`` on equality comparisons (:issue:`13128`,:issue:`22163`)
- Bug in :meth:`DataFrame.eq` comparison against ``NaT`` incorrectly returning ``True`` or ``NaN`` (:issue:`15697`,:issue:`22163`)
- Bug in :class:`DataFrame` with ``timedelta64[ns]`` dtype division by ``Timedelta``-like scalar incorrectly returning ``timedelta64[ns]`` dtype instead of ``float64`` dtype (:issue:`20088`,:issue:`22163`)
-

Timedelta
^^^^^^^^^

- Bug in :class:`DataFrame` with ``timedelta64[ns]`` dtype division by ``Timedelta``-like scalar incorrectly returning ``timedelta64[ns]`` dtype instead of ``float64`` dtype (:issue:`20088`,:issue:`22163`)
- Bug in adding a :class:`Index` with object dtype to a :class:`Series` with ``timedelta64[ns]`` dtype incorrectly raising (:issue:`22390`)
- Bug in multiplying a :class:`Series` with numeric dtype against a ``timedelta`` object (:issue:`22390`)
- Bug in :class:`Series` with numeric dtype when adding or subtracting an an array or ``Series`` with ``timedelta64`` dtype (:issue:`22390`)
- Bug in :class:`Index` with numeric dtype when multiplying or dividing an array with dtype ``timedelta64`` (:issue:`22390`)
-
-
-
Expand Down
10 changes: 10 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,14 @@ def index_arithmetic_method(self, other):
elif isinstance(other, ABCTimedeltaIndex):
# Defer to subclass implementation
return NotImplemented
elif isinstance(other, np.ndarray) and is_timedelta64_dtype(other):
# GH#22390; wrap in Series for op, this will in turn wrap in
# TimedeltaIndex, but will correctly raise TypeError instead of
# NullFrequencyError for add/sub ops
from pandas import Series
other = Series(other)
out = op(self, other)
return Index(out, name=self.name)

other = self._validate_for_numeric_binop(other, op)

Expand Down Expand Up @@ -2689,6 +2697,8 @@ def argsort(self, *args, **kwargs):
return result.argsort(*args, **kwargs)

def __add__(self, other):
if isinstance(other, (ABCSeries, ABCDataFrame)):
return NotImplemented
return Index(np.array(self) + other)

def __radd__(self, other):
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pandas.core.dtypes.common import (
is_integer,
is_scalar,
is_timedelta64_dtype,
is_int64_dtype)
from pandas.core.dtypes.generic import ABCSeries, ABCTimedeltaIndex

Expand Down Expand Up @@ -596,6 +597,9 @@ def _evaluate_numeric_binop(self, other):
# GH#19333 is_integer evaluated True on timedelta64,
# so we need to catch these explicitly
return op(self._int64index, other)
elif is_timedelta64_dtype(other):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

doesn't the fall thru raise TypeError?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don’t understand the question

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

your comment is at odds with the checking i think. pls revise comments and/or checks.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment below this line # must be an np.ndarray; GH#22390 along with the check above elif is_timedelta64_dtype(other) is just saying this is an ndarray["timedelta64['ns']"]. I don't think these are at odds.

# Must be an np.ndarray; GH#22390
return op(self._int64index, other)

other = self._validate_for_numeric_binop(other, op)
attrs = self._get_attributes_dict()
Expand Down
42 changes: 42 additions & 0 deletions pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,37 @@ def _maybe_match_name(a, b):
return None


def maybe_upcast_for_op(obj):
"""
Cast non-pandas objects to pandas types to unify behavior of arithmetic
and comparison operations.

Parameters
----------
obj: object

Returns
-------
out : object

Notes
-----
Be careful to call this *after* determining the `name` attribute to be
attached to the result of the arithmetic operation.
"""
if type(obj) is datetime.timedelta:
# GH#22390 cast up to Timedelta to rely on Timedelta
# implementation; otherwise operation against numeric-dtype
# raises TypeError
return pd.Timedelta(obj)
elif isinstance(obj, np.ndarray) and is_timedelta64_dtype(obj):
# GH#22390 Unfortunately we need to special-case right-hand
# timedelta64 dtypes because numpy casts integer dtypes to
# timedelta64 when operating with timedelta64
return pd.TimedeltaIndex(obj)
return obj


# -----------------------------------------------------------------------------
# Reversed Operations not available in the stdlib operator module.
# Defining these instead of using lambdas allows us to reference them by name.
Expand Down Expand Up @@ -1222,6 +1253,7 @@ def wrapper(left, right):

left, right = _align_method_SERIES(left, right)
res_name = get_op_result_name(left, right)
right = maybe_upcast_for_op(right)

if is_categorical_dtype(left):
raise TypeError("{typ} cannot perform the operation "
Expand All @@ -1244,6 +1276,16 @@ def wrapper(left, right):
index=left.index, name=res_name,
dtype=result.dtype)

elif is_timedelta64_dtype(right) and not is_scalar(right):
# i.e. exclude np.timedelta64 object
# Note: we cannot use dispatch_to_index_op because
# that may incorrectly raise TypeError when we
# should get NullFrequencyError
result = op(pd.Index(left), right)
return construct_result(left, result,
index=left.index, name=res_name,
dtype=result.dtype)

lvalues = left.values
rvalues = right
if isinstance(rvalues, ABCSeries):
Expand Down
68 changes: 55 additions & 13 deletions pandas/tests/arithmetic/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
# Arithmetc tests for DataFrame/Series/Index/Array classes that should
# behave identically.
# Specifically for numeric dtypes
from datetime import timedelta
from decimal import Decimal
import operator
from collections import Iterable
Expand Down Expand Up @@ -47,7 +46,61 @@ def test_operator_series_comparison_zerorank(self):
# ------------------------------------------------------------------
# Numeric dtypes Arithmetic with Timedelta Scalar

class TestNumericArraylikeArithmeticWithTimedeltaScalar(object):
class TestNumericArraylikeArithmeticWithTimedeltaLike(object):

# TODO: also check name retentention
@pytest.mark.parametrize('box_cls', [np.array, pd.Index, pd.Series])
@pytest.mark.parametrize('left', [
pd.RangeIndex(10, 40, 10)] + [cls([10, 20, 30], dtype=dtype)
for dtype in ['i1', 'i2', 'i4', 'i8',
'u1', 'u2', 'u4', 'u8',
'f2', 'f4', 'f8']
for cls in [pd.Series, pd.Index]],
ids=lambda x: type(x).__name__ + str(x.dtype))
def test_mul_td64arr(self, left, box_cls):
# GH#22390
right = np.array([1, 2, 3], dtype='m8[s]')
right = box_cls(right)

expected = pd.TimedeltaIndex(['10s', '40s', '90s'])
if isinstance(left, pd.Series) or box_cls is pd.Series:
expected = pd.Series(expected)

result = left * right
tm.assert_equal(result, expected)

result = right * left
tm.assert_equal(result, expected)

# TODO: also check name retentention
@pytest.mark.parametrize('box_cls', [np.array, pd.Index, pd.Series])
@pytest.mark.parametrize('left', [
pd.RangeIndex(10, 40, 10)] + [cls([10, 20, 30], dtype=dtype)
for dtype in ['i1', 'i2', 'i4', 'i8',
'u1', 'u2', 'u4', 'u8',
'f2', 'f4', 'f8']
for cls in [pd.Series, pd.Index]],
ids=lambda x: type(x).__name__ + str(x.dtype))
def test_div_td64arr(self, left, box_cls):
# GH#22390
right = np.array([10, 40, 90], dtype='m8[s]')
right = box_cls(right)

expected = pd.TimedeltaIndex(['1s', '2s', '3s'])
if isinstance(left, pd.Series) or box_cls is pd.Series:
expected = pd.Series(expected)

result = right / left
tm.assert_equal(result, expected)

result = right // left
tm.assert_equal(result, expected)

with pytest.raises(TypeError):
left / right

with pytest.raises(TypeError):
left // right

# TODO: de-duplicate with test_numeric_arr_mul_tdscalar
def test_ops_series(self):
Expand All @@ -71,11 +124,6 @@ def test_ops_series(self):
ids=lambda x: type(x).__name__)
def test_numeric_arr_mul_tdscalar(self, scalar_td, index, box):
# GH#19333

if (box in [Series, pd.DataFrame] and
type(scalar_td) is timedelta and index.dtype == 'f8'):
raise pytest.xfail(reason="Cannot multiply timedelta by float")

expected = pd.timedelta_range('1 days', '10 days')

index = tm.box_expected(index, box)
Expand All @@ -99,12 +147,6 @@ def test_numeric_arr_mul_tdscalar(self, scalar_td, index, box):
Timedelta(days=1).to_pytimedelta()],
ids=lambda x: type(x).__name__)
def test_numeric_arr_rdiv_tdscalar(self, scalar_td, index, box):

if box is Series and type(scalar_td) is timedelta:
raise pytest.xfail(reason="TODO: Figure out why this case fails")
if box is pd.DataFrame and isinstance(scalar_td, timedelta):
raise pytest.xfail(reason="TODO: Figure out why this case fails")

expected = TimedeltaIndex(['1 Day', '12 Hours'])

index = tm.box_expected(index, box)
Expand Down
68 changes: 12 additions & 56 deletions pandas/tests/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,19 +573,8 @@ def test_td64arr_add_int_series_invalid(self, box, tdser):
with pytest.raises(err):
tdser + Series([2, 3, 4])

@pytest.mark.parametrize('box', [
pd.Index,
pytest.param(Series,
marks=pytest.mark.xfail(reason="GH#19123 integer "
"interpreted as "
"nanoseconds",
strict=True)),
pytest.param(pd.DataFrame,
marks=pytest.mark.xfail(reason="Attempts to broadcast "
"incorrectly",
strict=True, raises=ValueError))
], ids=lambda x: x.__name__)
def test_td64arr_radd_int_series_invalid(self, box, tdser):
def test_td64arr_radd_int_series_invalid(self, box_df_fail, tdser):
box = box_df_fail # Tries to broadcast incorrectly
tdser = tm.box_expected(tdser, box)
err = TypeError if box is not pd.Index else NullFrequencyError
with pytest.raises(err):
Expand All @@ -605,11 +594,11 @@ def test_td64arr_sub_int_series_invalid(self, box, tdser):
with pytest.raises(err):
tdser - Series([2, 3, 4])

@pytest.mark.xfail(reason='GH#19123 integer interpreted as nanoseconds',
strict=True)
def test_td64arr_rsub_int_series_invalid(self, box, tdser):
def test_td64arr_rsub_int_series_invalid(self, box_df_fail, tdser):
box = box_df_fail # Tries to broadcast incorrectly
tdser = tm.box_expected(tdser, box)
with pytest.raises(TypeError):
err = TypeError if box is not pd.Index else NullFrequencyError
with pytest.raises(err):
Series([2, 3, 4]) - tdser

@pytest.mark.parametrize('box', [
Expand Down Expand Up @@ -671,14 +660,6 @@ def test_td64arr_add_sub_numeric_scalar_invalid(self, box, scalar, tdser):
with pytest.raises(err):
scalar - tdser

@pytest.mark.parametrize('box', [
pd.Index,
Series,
pytest.param(pd.DataFrame,
marks=pytest.mark.xfail(reason="Tries to broadcast "
"incorrectly",
strict=True, raises=ValueError))
], ids=lambda x: x.__name__)
@pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16',
'uint64', 'uint32', 'uint16', 'uint8',
'float64', 'float32', 'float16'])
Expand All @@ -688,10 +669,9 @@ def test_td64arr_add_sub_numeric_scalar_invalid(self, box, scalar, tdser):
Series([1, 2, 3])
# TODO: Add DataFrame in here?
], ids=lambda x: type(x).__name__)
def test_td64arr_add_sub_numeric_arr_invalid(self, box, vec, dtype, tdser):
if type(vec) is Series and not dtype.startswith('float'):
pytest.xfail(reason='GH#19123 integer interpreted as nanos')

def test_td64arr_add_sub_numeric_arr_invalid(self, box_df_fail, vec,
dtype, tdser):
box = box_df_fail # tries to broadcast incorrectly
tdser = tm.box_expected(tdser, box)
err = TypeError
if box is pd.Index and not dtype.startswith('float'):
Expand Down Expand Up @@ -865,9 +845,6 @@ def test_td64arr_sub_NaT(self, box):

def test_td64arr_add_timedeltalike(self, delta, box):
# only test adding/sub offsets as + is now numeric
if box is pd.DataFrame and isinstance(delta, pd.DateOffset):
pytest.xfail(reason="Returns object dtype instead of m8[ns]")

rng = timedelta_range('1 days', '10 days')
expected = timedelta_range('1 days 02:00:00', '10 days 02:00:00',
freq='D')
Expand All @@ -879,9 +856,6 @@ def test_td64arr_add_timedeltalike(self, delta, box):

def test_td64arr_sub_timedeltalike(self, delta, box):
# only test adding/sub offsets as - is now numeric
if box is pd.DataFrame and isinstance(delta, pd.DateOffset):
pytest.xfail(reason="Returns object dtype instead of m8[ns]")

rng = timedelta_range('1 days', '10 days')
expected = timedelta_range('0 days 22:00:00', '9 days 22:00:00')

Expand Down Expand Up @@ -929,11 +903,7 @@ def test_timedelta64_operations_with_DateOffset(self):

@pytest.mark.parametrize('box', [
pd.Index,
pytest.param(Series,
marks=pytest.mark.xfail(reason="Index fails to return "
"NotImplemented on "
"reverse op",
strict=True)),
Series,
pytest.param(pd.DataFrame,
marks=pytest.mark.xfail(reason="Tries to broadcast "
"incorrectly",
Expand Down Expand Up @@ -1021,23 +991,12 @@ def test_td64arr_sub_offset_array(self, box_df_fail):
res = tdi - other
tm.assert_equal(res, expected)

@pytest.mark.parametrize('box', [
pd.Index,
pytest.param(Series,
marks=pytest.mark.xfail(reason="object dtype Series "
"fails to return "
"NotImplemented",
strict=True, raises=TypeError)),
pytest.param(pd.DataFrame,
marks=pytest.mark.xfail(reason="tries to broadcast "
"incorrectly",
strict=True, raises=ValueError))
], ids=lambda x: x.__name__)
@pytest.mark.parametrize('names', [(None, None, None),
('foo', 'bar', None),
('foo', 'foo', 'foo')])
def test_td64arr_with_offset_series(self, names, box):
def test_td64arr_with_offset_series(self, names, box_df_fail):
# GH#18849
box = box_df_fail # tries to broadcast incorrectly
box2 = Series if box is pd.Index else box

tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00'],
Expand Down Expand Up @@ -1132,9 +1091,6 @@ def test_td64arr_mul_int(self, box):
tm.assert_equal(result, idx)

def test_td64arr_mul_tdlike_scalar_raises(self, delta, box):
if box is pd.DataFrame and not isinstance(delta, pd.DateOffset):
pytest.xfail(reason="returns m8[ns] instead of raising")

rng = timedelta_range('1 days', '10 days', name='foo')
rng = tm.box_expected(rng, box)
with pytest.raises(TypeError):
Expand Down