diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index c74bcb505b6be..d4b2fefff322f 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1256,7 +1256,7 @@ Timedelta - Bug in :class:`TimedeltaIndex` where adding a timezone-aware datetime scalar incorrectly returned a timezone-naive :class:`DatetimeIndex` (:issue:`23215`) - Bug in :class:`TimedeltaIndex` where adding ``np.timedelta64('NaT')`` incorrectly returned an all-`NaT` :class:`DatetimeIndex` instead of an all-`NaT` :class:`TimedeltaIndex` (:issue:`23215`) - Bug in :class:`Timedelta` and :func:`to_timedelta()` have inconsistencies in supported unit string (:issue:`21762`) - +- Bug in :class:`TimedeltaIndex` division where dividing by another :class:`TimedeltaIndex` raised ``TypeError`` instead of returning a :class:`Float64Index` (:issue:`23829`, :issue:`22631`) Timezones ^^^^^^^^^ diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 83cea51cec9f6..3f14b61c1ea6c 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -7,7 +7,7 @@ import numpy as np -from pandas._libs import algos, tslibs +from pandas._libs import algos, lib, tslibs from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT from pandas._libs.tslibs.fields import get_timedelta_field from pandas._libs.tslibs.timedeltas import ( @@ -177,7 +177,7 @@ def __new__(cls, values, freq=None, dtype=_TD_DTYPE, copy=False): passed=freq.freqstr)) elif freq is None: freq = inferred_freq - freq_infer = False + freq_infer = False result = cls._simple_new(values, freq=freq) # check that we are matching freqs @@ -355,12 +355,108 @@ def _evaluate_with_timedelta_like(self, other, op): __mul__ = _wrap_tdi_op(operator.mul) __rmul__ = __mul__ - __truediv__ = _wrap_tdi_op(operator.truediv) __floordiv__ = _wrap_tdi_op(operator.floordiv) __rfloordiv__ = _wrap_tdi_op(ops.rfloordiv) + def __truediv__(self, other): + # timedelta / X is well-defined for timedelta-like or numeric X + other = lib.item_from_zerodim(other) + + if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): + return NotImplemented + + if isinstance(other, (timedelta, np.timedelta64, Tick)): + other = Timedelta(other) + if other is NaT: + # specifically timedelta64-NaT + result = np.empty(self.shape, dtype=np.float64) + result.fill(np.nan) + return result + + # otherwise, dispatch to Timedelta implementation + return self._data / other + + elif lib.is_scalar(other): + # assume it is numeric + result = self._data / other + freq = None + if self.freq is not None: + # Tick division is not implemented, so operate on Timedelta + freq = self.freq.delta / other + return type(self)(result, freq=freq) + + if not hasattr(other, "dtype"): + # e.g. list, tuple + other = np.array(other) + + if len(other) != len(self): + raise ValueError("Cannot divide vectors with unequal lengths") + + elif is_timedelta64_dtype(other): + # let numpy handle it + return self._data / other + + elif is_object_dtype(other): + # Note: we do not do type inference on the result, so either + # an object array or numeric-dtyped (if numpy does inference) + # will be returned. GH#23829 + result = [self[n] / other[n] for n in range(len(self))] + result = np.array(result) + return result + + else: + result = self._data / other + return type(self)(result) + + def __rtruediv__(self, other): + # X / timedelta is defined only for timedelta-like X + other = lib.item_from_zerodim(other) + + if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): + return NotImplemented + + if isinstance(other, (timedelta, np.timedelta64, Tick)): + other = Timedelta(other) + if other is NaT: + # specifically timedelta64-NaT + result = np.empty(self.shape, dtype=np.float64) + result.fill(np.nan) + return result + + # otherwise, dispatch to Timedelta implementation + return other / self._data + + elif lib.is_scalar(other): + raise TypeError("Cannot divide {typ} by {cls}" + .format(typ=type(other).__name__, + cls=type(self).__name__)) + + if not hasattr(other, "dtype"): + # e.g. list, tuple + other = np.array(other) + + if len(other) != len(self): + raise ValueError("Cannot divide vectors with unequal lengths") + + elif is_timedelta64_dtype(other): + # let numpy handle it + return other / self._data + + elif is_object_dtype(other): + # Note: unlike in __truediv__, we do not _need_ to do type# + # inference on the result. It does not raise, a numeric array + # is returned. GH#23829 + result = [other[n] / self[n] for n in range(len(self))] + return np.array(result) + + else: + raise TypeError("Cannot divide {dtype} data by {cls}" + .format(dtype=other.dtype, + cls=type(self).__name__)) + if compat.PY2: __div__ = __truediv__ + __rdiv__ = __rtruediv__ # Note: TimedeltaIndex overrides this in call to cls._add_numeric_methods def __neg__(self): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e8a2dd4879f20..de59c035b81b5 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5021,11 +5021,14 @@ def _add_numeric_methods_binary(cls): cls.__mod__ = _make_arithmetic_op(operator.mod, cls) cls.__floordiv__ = _make_arithmetic_op(operator.floordiv, cls) cls.__rfloordiv__ = _make_arithmetic_op(ops.rfloordiv, cls) - cls.__truediv__ = _make_arithmetic_op(operator.truediv, cls) - cls.__rtruediv__ = _make_arithmetic_op(ops.rtruediv, cls) - if not compat.PY3: - cls.__div__ = _make_arithmetic_op(operator.div, cls) - cls.__rdiv__ = _make_arithmetic_op(ops.rdiv, cls) + + if not issubclass(cls, ABCTimedeltaIndex): + # GH#23829 TimedeltaIndex defines these directly + cls.__truediv__ = _make_arithmetic_op(operator.truediv, cls) + cls.__rtruediv__ = _make_arithmetic_op(ops.rtruediv, cls) + if not compat.PY3: + cls.__div__ = _make_arithmetic_op(operator.div, cls) + cls.__rdiv__ = _make_arithmetic_op(ops.rdiv, cls) cls.__divmod__ = _make_arithmetic_op(divmod, cls) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 63b1ac6a99503..4be896049176c 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -248,11 +248,8 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): __mul__ = Index.__mul__ __rmul__ = Index.__rmul__ - __truediv__ = Index.__truediv__ __floordiv__ = Index.__floordiv__ __rfloordiv__ = Index.__rfloordiv__ - if compat.PY2: - __div__ = Index.__div__ days = wrap_field_accessor(TimedeltaArray.days) seconds = wrap_field_accessor(TimedeltaArray.seconds) @@ -261,6 +258,26 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): total_seconds = wrap_array_method(TimedeltaArray.total_seconds, True) + def __truediv__(self, other): + oth = other + if isinstance(other, Index): + # TimedeltaArray defers, so we need to unwrap + oth = other._values + result = TimedeltaArray.__truediv__(self, oth) + return wrap_arithmetic_op(self, other, result) + + def __rtruediv__(self, other): + oth = other + if isinstance(other, Index): + # TimedeltaArray defers, so we need to unwrap + oth = other._values + result = TimedeltaArray.__rtruediv__(self, oth) + return wrap_arithmetic_op(self, other, result) + + if compat.PY2: + __div__ = __truediv__ + __rdiv__ = __rtruediv__ + # Compat for frequency inference, see GH#23789 _is_monotonic_increasing = Index.is_monotonic_increasing _is_monotonic_decreasing = Index.is_monotonic_decreasing diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 2b300cb101201..81e7062c23fbe 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -455,10 +455,10 @@ def test_td64arr_add_sub_timestamp(self, box_with_array): ts = Timestamp('2012-01-01') # TODO: parametrize over types of datetime scalar? - tdarr = timedelta_range('1 day', periods=3) + tdi = timedelta_range('1 day', periods=3) expected = pd.date_range('2012-01-02', periods=3) - tdarr = tm.box_expected(tdarr, box_with_array) + tdarr = tm.box_expected(tdi, box_with_array) expected = tm.box_expected(expected, box_with_array) tm.assert_equal(ts + tdarr, expected) @@ -1112,14 +1112,33 @@ def test_tdi_rmul_arraylike(self, other, box_with_array): tm.assert_equal(commute, expected) # ------------------------------------------------------------------ - # __div__ + # __div__, __rdiv__ def test_td64arr_div_nat_invalid(self, box_with_array): # don't allow division by NaT (maybe could in the future) rng = timedelta_range('1 days', '10 days', name='foo') rng = tm.box_expected(rng, box_with_array) - with pytest.raises(TypeError): + + with pytest.raises(TypeError, match='true_divide cannot use operands'): rng / pd.NaT + with pytest.raises(TypeError, match='Cannot divide NaTType by'): + pd.NaT / rng + + def test_td64arr_div_td64nat(self, box_with_array): + # GH#23829 + rng = timedelta_range('1 days', '10 days',) + rng = tm.box_expected(rng, box_with_array) + + other = np.timedelta64('NaT') + + expected = np.array([np.nan] * 10) + expected = tm.box_expected(expected, box_with_array) + + result = rng / other + tm.assert_equal(result, expected) + + result = other / rng + tm.assert_equal(result, expected) def test_td64arr_div_int(self, box_with_array): idx = TimedeltaIndex(np.arange(5, dtype='int64')) @@ -1128,7 +1147,11 @@ def test_td64arr_div_int(self, box_with_array): result = idx / 1 tm.assert_equal(result, idx) - def test_tdi_div_tdlike_scalar(self, two_hours, box_with_array): + with pytest.raises(TypeError, match='Cannot divide'): + # GH#23829 + 1 / idx + + def test_td64arr_div_tdlike_scalar(self, two_hours, box_with_array): # GH#20088, GH#22163 ensure DataFrame returns correct dtype rng = timedelta_range('1 days', '10 days', name='foo') expected = pd.Float64Index((np.arange(10) + 1) * 12, name='foo') @@ -1139,7 +1162,12 @@ def test_tdi_div_tdlike_scalar(self, two_hours, box_with_array): result = rng / two_hours tm.assert_equal(result, expected) - def test_tdi_div_tdlike_scalar_with_nat(self, two_hours, box_with_array): + result = two_hours / rng + expected = 1 / expected + tm.assert_equal(result, expected) + + def test_td64arr_div_tdlike_scalar_with_nat(self, two_hours, + box_with_array): rng = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') expected = pd.Float64Index([12, np.nan, 24], name='foo') @@ -1149,6 +1177,58 @@ def test_tdi_div_tdlike_scalar_with_nat(self, two_hours, box_with_array): result = rng / two_hours tm.assert_equal(result, expected) + result = two_hours / rng + expected = 1 / expected + tm.assert_equal(result, expected) + + def test_td64arr_div_td64_ndarray(self, box_with_array): + # GH#22631 + rng = TimedeltaIndex(['1 days', pd.NaT, '2 days']) + expected = pd.Float64Index([12, np.nan, 24]) + + rng = tm.box_expected(rng, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + other = np.array([2, 4, 2], dtype='m8[h]') + result = rng / other + tm.assert_equal(result, expected) + + result = rng / tm.box_expected(other, box_with_array) + tm.assert_equal(result, expected) + + result = rng / other.astype(object) + tm.assert_equal(result, expected) + + result = rng / list(other) + tm.assert_equal(result, expected) + + # reversed op + expected = 1 / expected + result = other / rng + tm.assert_equal(result, expected) + + result = tm.box_expected(other, box_with_array) / rng + tm.assert_equal(result, expected) + + result = other.astype(object) / rng + tm.assert_equal(result, expected) + + result = list(other) / rng + tm.assert_equal(result, expected) + + def test_tdarr_div_length_mismatch(self, box_with_array): + rng = TimedeltaIndex(['1 days', pd.NaT, '2 days']) + mismatched = [1, 2, 3, 4] + + rng = tm.box_expected(rng, box_with_array) + for obj in [mismatched, mismatched[:2]]: + # one shorter, one longer + for other in [obj, np.array(obj), pd.Index(obj)]: + with pytest.raises(ValueError): + rng / other + with pytest.raises(ValueError): + other / rng + # ------------------------------------------------------------------ # __floordiv__, __rfloordiv__ @@ -1200,6 +1280,10 @@ def test_td64arr_floordiv_int(self, box_with_array): result = idx // 1 tm.assert_equal(result, idx) + pattern = 'floor_divide cannot use operands' + with pytest.raises(TypeError, match=pattern): + 1 // idx + def test_td64arr_floordiv_tdlike_scalar(self, two_hours, box_with_array): tdi = timedelta_range('1 days', '10 days', name='foo') expected = pd.Int64Index((np.arange(10) + 1) * 12, name='foo') @@ -1306,6 +1390,9 @@ def test_td64arr_div_numeric_scalar(self, box_with_array, two): result = tdser / two tm.assert_equal(result, expected) + with pytest.raises(TypeError, match='Cannot divide'): + two / tdser + @pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16', 'uint64', 'uint32', 'uint16', 'uint8', 'float64', 'float32', 'float16']) @@ -1355,9 +1442,28 @@ def test_td64arr_div_numeric_array(self, box_with_array, vector, dtype): result = tdser / vector tm.assert_equal(result, expected) - with pytest.raises(TypeError): + pattern = ('true_divide cannot use operands|' + 'cannot perform __div__|' + 'cannot perform __truediv__|' + 'unsupported operand|' + 'Cannot divide') + with pytest.raises(TypeError, match=pattern): vector / tdser + if not isinstance(vector, pd.Index): + # Index.__rdiv__ won't try to operate elementwise, just raises + result = tdser / vector.astype(object) + if box_with_array is pd.DataFrame: + expected = [tdser.iloc[0, n] / vector[n] + for n in range(len(vector))] + else: + expected = [tdser[n] / vector[n] for n in range(len(tdser))] + expected = tm.box_expected(expected, xbox) + tm.assert_equal(result, expected) + + with pytest.raises(TypeError, match=pattern): + vector.astype(object) / tdser + @pytest.mark.parametrize('names', [(None, None, None), ('Egon', 'Venkman', None), ('NCC1701D', 'NCC1701D', 'NCC1701D')]) @@ -1388,20 +1494,25 @@ def test_td64arr_mul_int_series(self, box_df_fail, names): @pytest.mark.parametrize('names', [(None, None, None), ('Egon', 'Venkman', None), ('NCC1701D', 'NCC1701D', 'NCC1701D')]) - def test_float_series_rdiv_td64arr(self, box, names): + def test_float_series_rdiv_td64arr(self, box_with_array, names): # GH#19042 test for correct name attachment # TODO: the direct operation TimedeltaIndex / Series still # needs to be fixed. + box = box_with_array tdi = TimedeltaIndex(['0days', '1day', '2days', '3days', '4days'], name=names[0]) ser = Series([1.5, 3, 4.5, 6, 7.5], dtype=np.float64, name=names[1]) + xname = names[2] if box is not tm.to_array else names[1] expected = Series([tdi[n] / ser[n] for n in range(len(ser))], dtype='timedelta64[ns]', - name=names[2]) + name=xname) + + xbox = box + if box in [pd.Index, tm.to_array] and type(ser) is Series: + xbox = Series tdi = tm.box_expected(tdi, box) - xbox = Series if (box is pd.Index and type(ser) is Series) else box expected = tm.box_expected(expected, xbox) result = ser.__rdiv__(tdi)