diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 317b8b8878308..e87e2d356d393 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -581,12 +581,14 @@ Datetimelike - Bug in :class:`DataFrame` comparisons against ``Timestamp``-like objects failing to raise ``TypeError`` for inequality checks with mismatched types (:issue:`8932`,:issue:`22163`) - Bug in :class:`DataFrame` with mixed dtypes including ``datetime64[ns]`` incorrectly raising ``TypeError`` on equality comparisons (:issue:`13128`,:issue:`22163`) - Bug in :meth:`DataFrame.eq` comparison against ``NaT`` incorrectly returning ``True`` or ``NaN`` (:issue:`15697`,:issue:`22163`) -- Bug in :class:`DataFrame` with ``timedelta64[ns]`` dtype division by ``Timedelta``-like scalar incorrectly returning ``timedelta64[ns]`` dtype instead of ``float64`` dtype (:issue:`20088`,:issue:`22163`) -- Timedelta ^^^^^^^^^ - +- Bug in :class:`DataFrame` with ``timedelta64[ns]`` dtype division by ``Timedelta``-like scalar incorrectly returning ``timedelta64[ns]`` dtype instead of ``float64`` dtype (:issue:`20088`,:issue:`22163`) +- Bug in adding a :class:`Index` with object dtype to a :class:`Series` with ``timedelta64[ns]`` dtype incorrectly raising (:issue:`22390`) +- Bug in multiplying a :class:`Series` with numeric dtype against a ``timedelta`` object (:issue:`22390`) +- Bug in :class:`Series` with numeric dtype when adding or subtracting an an array or ``Series`` with ``timedelta64`` dtype (:issue:`22390`) +- Bug in :class:`Index` with numeric dtype when multiplying or dividing an array with dtype ``timedelta64`` (:issue:`22390`) - - - diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 34cfa0b23f082..2a0f164887543 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -122,6 +122,14 @@ def index_arithmetic_method(self, other): elif isinstance(other, ABCTimedeltaIndex): # Defer to subclass implementation return NotImplemented + elif isinstance(other, np.ndarray) and is_timedelta64_dtype(other): + # GH#22390; wrap in Series for op, this will in turn wrap in + # TimedeltaIndex, but will correctly raise TypeError instead of + # NullFrequencyError for add/sub ops + from pandas import Series + other = Series(other) + out = op(self, other) + return Index(out, name=self.name) other = self._validate_for_numeric_binop(other, op) @@ -2689,6 +2697,8 @@ def argsort(self, *args, **kwargs): return result.argsort(*args, **kwargs) def __add__(self, other): + if isinstance(other, (ABCSeries, ABCDataFrame)): + return NotImplemented return Index(np.array(self) + other) def __radd__(self, other): diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 939ec0b79ac6b..981bfddeadac1 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -8,6 +8,7 @@ from pandas.core.dtypes.common import ( is_integer, is_scalar, + is_timedelta64_dtype, is_int64_dtype) from pandas.core.dtypes.generic import ABCSeries, ABCTimedeltaIndex @@ -596,6 +597,9 @@ def _evaluate_numeric_binop(self, other): # GH#19333 is_integer evaluated True on timedelta64, # so we need to catch these explicitly return op(self._int64index, other) + elif is_timedelta64_dtype(other): + # Must be an np.ndarray; GH#22390 + return op(self._int64index, other) other = self._validate_for_numeric_binop(other, op) attrs = self._get_attributes_dict() diff --git a/pandas/core/ops.py b/pandas/core/ops.py index ddd82de2da5fc..b25809bf074f7 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -107,6 +107,37 @@ def _maybe_match_name(a, b): return None +def maybe_upcast_for_op(obj): + """ + Cast non-pandas objects to pandas types to unify behavior of arithmetic + and comparison operations. + + Parameters + ---------- + obj: object + + Returns + ------- + out : object + + Notes + ----- + Be careful to call this *after* determining the `name` attribute to be + attached to the result of the arithmetic operation. + """ + if type(obj) is datetime.timedelta: + # GH#22390 cast up to Timedelta to rely on Timedelta + # implementation; otherwise operation against numeric-dtype + # raises TypeError + return pd.Timedelta(obj) + elif isinstance(obj, np.ndarray) and is_timedelta64_dtype(obj): + # GH#22390 Unfortunately we need to special-case right-hand + # timedelta64 dtypes because numpy casts integer dtypes to + # timedelta64 when operating with timedelta64 + return pd.TimedeltaIndex(obj) + return obj + + # ----------------------------------------------------------------------------- # Reversed Operations not available in the stdlib operator module. # Defining these instead of using lambdas allows us to reference them by name. @@ -1222,6 +1253,7 @@ def wrapper(left, right): left, right = _align_method_SERIES(left, right) res_name = get_op_result_name(left, right) + right = maybe_upcast_for_op(right) if is_categorical_dtype(left): raise TypeError("{typ} cannot perform the operation " @@ -1244,6 +1276,16 @@ def wrapper(left, right): index=left.index, name=res_name, dtype=result.dtype) + elif is_timedelta64_dtype(right) and not is_scalar(right): + # i.e. exclude np.timedelta64 object + # Note: we cannot use dispatch_to_index_op because + # that may incorrectly raise TypeError when we + # should get NullFrequencyError + result = op(pd.Index(left), right) + return construct_result(left, result, + index=left.index, name=res_name, + dtype=result.dtype) + lvalues = left.values rvalues = right if isinstance(rvalues, ABCSeries): diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 85a0a8dffc55f..9ede1a62aaf2e 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -2,7 +2,6 @@ # Arithmetc tests for DataFrame/Series/Index/Array classes that should # behave identically. # Specifically for numeric dtypes -from datetime import timedelta from decimal import Decimal import operator from collections import Iterable @@ -47,7 +46,61 @@ def test_operator_series_comparison_zerorank(self): # ------------------------------------------------------------------ # Numeric dtypes Arithmetic with Timedelta Scalar -class TestNumericArraylikeArithmeticWithTimedeltaScalar(object): +class TestNumericArraylikeArithmeticWithTimedeltaLike(object): + + # TODO: also check name retentention + @pytest.mark.parametrize('box_cls', [np.array, pd.Index, pd.Series]) + @pytest.mark.parametrize('left', [ + pd.RangeIndex(10, 40, 10)] + [cls([10, 20, 30], dtype=dtype) + for dtype in ['i1', 'i2', 'i4', 'i8', + 'u1', 'u2', 'u4', 'u8', + 'f2', 'f4', 'f8'] + for cls in [pd.Series, pd.Index]], + ids=lambda x: type(x).__name__ + str(x.dtype)) + def test_mul_td64arr(self, left, box_cls): + # GH#22390 + right = np.array([1, 2, 3], dtype='m8[s]') + right = box_cls(right) + + expected = pd.TimedeltaIndex(['10s', '40s', '90s']) + if isinstance(left, pd.Series) or box_cls is pd.Series: + expected = pd.Series(expected) + + result = left * right + tm.assert_equal(result, expected) + + result = right * left + tm.assert_equal(result, expected) + + # TODO: also check name retentention + @pytest.mark.parametrize('box_cls', [np.array, pd.Index, pd.Series]) + @pytest.mark.parametrize('left', [ + pd.RangeIndex(10, 40, 10)] + [cls([10, 20, 30], dtype=dtype) + for dtype in ['i1', 'i2', 'i4', 'i8', + 'u1', 'u2', 'u4', 'u8', + 'f2', 'f4', 'f8'] + for cls in [pd.Series, pd.Index]], + ids=lambda x: type(x).__name__ + str(x.dtype)) + def test_div_td64arr(self, left, box_cls): + # GH#22390 + right = np.array([10, 40, 90], dtype='m8[s]') + right = box_cls(right) + + expected = pd.TimedeltaIndex(['1s', '2s', '3s']) + if isinstance(left, pd.Series) or box_cls is pd.Series: + expected = pd.Series(expected) + + result = right / left + tm.assert_equal(result, expected) + + result = right // left + tm.assert_equal(result, expected) + + with pytest.raises(TypeError): + left / right + + with pytest.raises(TypeError): + left // right # TODO: de-duplicate with test_numeric_arr_mul_tdscalar def test_ops_series(self): @@ -71,11 +124,6 @@ def test_ops_series(self): ids=lambda x: type(x).__name__) def test_numeric_arr_mul_tdscalar(self, scalar_td, index, box): # GH#19333 - - if (box in [Series, pd.DataFrame] and - type(scalar_td) is timedelta and index.dtype == 'f8'): - raise pytest.xfail(reason="Cannot multiply timedelta by float") - expected = pd.timedelta_range('1 days', '10 days') index = tm.box_expected(index, box) @@ -99,12 +147,6 @@ def test_numeric_arr_mul_tdscalar(self, scalar_td, index, box): Timedelta(days=1).to_pytimedelta()], ids=lambda x: type(x).__name__) def test_numeric_arr_rdiv_tdscalar(self, scalar_td, index, box): - - if box is Series and type(scalar_td) is timedelta: - raise pytest.xfail(reason="TODO: Figure out why this case fails") - if box is pd.DataFrame and isinstance(scalar_td, timedelta): - raise pytest.xfail(reason="TODO: Figure out why this case fails") - expected = TimedeltaIndex(['1 Day', '12 Hours']) index = tm.box_expected(index, box) diff --git a/pandas/tests/test_arithmetic.py b/pandas/tests/test_arithmetic.py index ae067faba7929..def7a8be95fc8 100644 --- a/pandas/tests/test_arithmetic.py +++ b/pandas/tests/test_arithmetic.py @@ -573,19 +573,8 @@ def test_td64arr_add_int_series_invalid(self, box, tdser): with pytest.raises(err): tdser + Series([2, 3, 4]) - @pytest.mark.parametrize('box', [ - pd.Index, - pytest.param(Series, - marks=pytest.mark.xfail(reason="GH#19123 integer " - "interpreted as " - "nanoseconds", - strict=True)), - pytest.param(pd.DataFrame, - marks=pytest.mark.xfail(reason="Attempts to broadcast " - "incorrectly", - strict=True, raises=ValueError)) - ], ids=lambda x: x.__name__) - def test_td64arr_radd_int_series_invalid(self, box, tdser): + def test_td64arr_radd_int_series_invalid(self, box_df_fail, tdser): + box = box_df_fail # Tries to broadcast incorrectly tdser = tm.box_expected(tdser, box) err = TypeError if box is not pd.Index else NullFrequencyError with pytest.raises(err): @@ -605,11 +594,11 @@ def test_td64arr_sub_int_series_invalid(self, box, tdser): with pytest.raises(err): tdser - Series([2, 3, 4]) - @pytest.mark.xfail(reason='GH#19123 integer interpreted as nanoseconds', - strict=True) - def test_td64arr_rsub_int_series_invalid(self, box, tdser): + def test_td64arr_rsub_int_series_invalid(self, box_df_fail, tdser): + box = box_df_fail # Tries to broadcast incorrectly tdser = tm.box_expected(tdser, box) - with pytest.raises(TypeError): + err = TypeError if box is not pd.Index else NullFrequencyError + with pytest.raises(err): Series([2, 3, 4]) - tdser @pytest.mark.parametrize('box', [ @@ -671,14 +660,6 @@ def test_td64arr_add_sub_numeric_scalar_invalid(self, box, scalar, tdser): with pytest.raises(err): scalar - tdser - @pytest.mark.parametrize('box', [ - pd.Index, - Series, - pytest.param(pd.DataFrame, - marks=pytest.mark.xfail(reason="Tries to broadcast " - "incorrectly", - strict=True, raises=ValueError)) - ], ids=lambda x: x.__name__) @pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16', 'uint64', 'uint32', 'uint16', 'uint8', 'float64', 'float32', 'float16']) @@ -688,10 +669,9 @@ def test_td64arr_add_sub_numeric_scalar_invalid(self, box, scalar, tdser): Series([1, 2, 3]) # TODO: Add DataFrame in here? ], ids=lambda x: type(x).__name__) - def test_td64arr_add_sub_numeric_arr_invalid(self, box, vec, dtype, tdser): - if type(vec) is Series and not dtype.startswith('float'): - pytest.xfail(reason='GH#19123 integer interpreted as nanos') - + def test_td64arr_add_sub_numeric_arr_invalid(self, box_df_fail, vec, + dtype, tdser): + box = box_df_fail # tries to broadcast incorrectly tdser = tm.box_expected(tdser, box) err = TypeError if box is pd.Index and not dtype.startswith('float'): @@ -865,9 +845,6 @@ def test_td64arr_sub_NaT(self, box): def test_td64arr_add_timedeltalike(self, delta, box): # only test adding/sub offsets as + is now numeric - if box is pd.DataFrame and isinstance(delta, pd.DateOffset): - pytest.xfail(reason="Returns object dtype instead of m8[ns]") - rng = timedelta_range('1 days', '10 days') expected = timedelta_range('1 days 02:00:00', '10 days 02:00:00', freq='D') @@ -879,9 +856,6 @@ def test_td64arr_add_timedeltalike(self, delta, box): def test_td64arr_sub_timedeltalike(self, delta, box): # only test adding/sub offsets as - is now numeric - if box is pd.DataFrame and isinstance(delta, pd.DateOffset): - pytest.xfail(reason="Returns object dtype instead of m8[ns]") - rng = timedelta_range('1 days', '10 days') expected = timedelta_range('0 days 22:00:00', '9 days 22:00:00') @@ -929,11 +903,7 @@ def test_timedelta64_operations_with_DateOffset(self): @pytest.mark.parametrize('box', [ pd.Index, - pytest.param(Series, - marks=pytest.mark.xfail(reason="Index fails to return " - "NotImplemented on " - "reverse op", - strict=True)), + Series, pytest.param(pd.DataFrame, marks=pytest.mark.xfail(reason="Tries to broadcast " "incorrectly", @@ -1021,23 +991,12 @@ def test_td64arr_sub_offset_array(self, box_df_fail): res = tdi - other tm.assert_equal(res, expected) - @pytest.mark.parametrize('box', [ - pd.Index, - pytest.param(Series, - marks=pytest.mark.xfail(reason="object dtype Series " - "fails to return " - "NotImplemented", - strict=True, raises=TypeError)), - pytest.param(pd.DataFrame, - marks=pytest.mark.xfail(reason="tries to broadcast " - "incorrectly", - strict=True, raises=ValueError)) - ], ids=lambda x: x.__name__) @pytest.mark.parametrize('names', [(None, None, None), ('foo', 'bar', None), ('foo', 'foo', 'foo')]) - def test_td64arr_with_offset_series(self, names, box): + def test_td64arr_with_offset_series(self, names, box_df_fail): # GH#18849 + box = box_df_fail # tries to broadcast incorrectly box2 = Series if box is pd.Index else box tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00'], @@ -1132,9 +1091,6 @@ def test_td64arr_mul_int(self, box): tm.assert_equal(result, idx) def test_td64arr_mul_tdlike_scalar_raises(self, delta, box): - if box is pd.DataFrame and not isinstance(delta, pd.DateOffset): - pytest.xfail(reason="returns m8[ns] instead of raising") - rng = timedelta_range('1 days', '10 days', name='foo') rng = tm.box_expected(rng, box) with pytest.raises(TypeError):