From 7d00a308a4c3e32c44498c690740a5a7d9a5a842 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Sat, 22 Nov 2014 15:40:56 -0800 Subject: [PATCH] BUG/ENH: cleanup for Timedelta arithmetic Fixes GH8813 Fixes GH5963 Fixes GH5436 If the other argument has a dtype attribute, I assume that it is ndarray-like and convert the `Timedelta` into a `np.timedelta64` object. Alternatively, we could just return `NotImplemented` and let the other type handle it, but this has the bonus of making `Timedelta` compatible with ndarrays. I also added a `Timedelta.to_timedelta64()` method to the public API. I couldn't find a listing for `Timedelta` in the API docs -- we should probably add that, right? Next up would be a similar treatment for `Timestamp`. --- doc/source/whatsnew/v0.15.2.txt | 7 ++ pandas/tseries/base.py | 5 + pandas/tseries/tdi.py | 2 +- pandas/tseries/tests/test_timedeltas.py | 98 ++++++++++++++++-- pandas/tslib.pyx | 128 ++++++++++++++---------- 5 files changed, 179 insertions(+), 61 deletions(-) diff --git a/doc/source/whatsnew/v0.15.2.txt b/doc/source/whatsnew/v0.15.2.txt index 6688f106f922e..d559b343e2013 100644 --- a/doc/source/whatsnew/v0.15.2.txt +++ b/doc/source/whatsnew/v0.15.2.txt @@ -66,6 +66,11 @@ Enhancements - Added support for ``utcfromtimestamp()``, ``fromtimestamp()``, and ``combine()`` on `Timestamp` class (:issue:`5351`). - Added Google Analytics (`pandas.io.ga`) basic documentation (:issue:`8835`). See :ref:`here`. - Added flag ``order_categoricals`` to ``StataReader`` and ``read_stata`` to select whether to order imported categorical data (:issue:`8836`). See :ref:`here ` for more information on importing categorical variables from Stata data files. +- ``Timedelta`` arithmetic returns ``NotImplemented`` in unknown cases, allowing extensions +by custom classes (:issue:`8813`). +- ``Timedelta`` now supports arithemtic with ``numpy.ndarray`` objects of the appropriate +dtype (numpy 1.8 or newer only) (:issue:`8884`). +- Added ``Timedelta.to_timedelta64`` method to the public API (:issue:`8884`). .. _whatsnew_0152.performance: @@ -89,6 +94,8 @@ Bug Fixes - Bug in slicing a multi-index with an empty list and at least one boolean indexer (:issue:`8781`) - ``io.data.Options`` now raises ``RemoteDataError`` when no expiry dates are available from Yahoo (:issue:`8761`). - ``Timedelta`` kwargs may now be numpy ints and floats (:issue:`8757`). +- Fixed several outstanding bugs for ``Timedelta`` arithmetic and comparisons +(:issue:`8813`, :issue:`5963`, :issue:`5436`). - ``sql_schema`` now generates dialect appropriate ``CREATE TABLE`` statements (:issue:`8697`) - ``slice`` string method now takes step into account (:issue:`8754`) - Bug in ``BlockManager`` where setting values with different type would break block integrity (:issue:`8850`) diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index d47544149c381..b523fb1d56290 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -321,6 +321,7 @@ def __add__(self, other): else: # pragma: no cover return NotImplemented cls.__add__ = __add__ + cls.__radd__ = __add__ def __sub__(self, other): from pandas.core.index import Index @@ -344,6 +345,10 @@ def __sub__(self, other): return NotImplemented cls.__sub__ = __sub__ + def __rsub__(self, other): + return -self + other + cls.__rsub__ = __rsub__ + cls.__iadd__ = __add__ cls.__isub__ = __sub__ diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index 7fb897aecc809..5a946acac2baa 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -311,7 +311,7 @@ def _evaluate_with_timedelta_like(self, other, op, opstr): result = self._maybe_mask_results(result,convert='float64') return Index(result,name=self.name,copy=False) - raise TypeError("can only perform ops with timedelta like values") + return NotImplemented def _add_datelike(self, other): diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py index 9ad2a090ee0cf..494a9cc95dc49 100644 --- a/pandas/tseries/tests/test_timedeltas.py +++ b/pandas/tseries/tests/test_timedeltas.py @@ -4,6 +4,7 @@ from datetime import datetime, timedelta, time import nose +from distutils.version import LooseVersion import numpy as np import pandas as pd @@ -45,12 +46,12 @@ def test_construction(self): self.assertEqual(Timedelta(days=10,seconds=10).value, expected) self.assertEqual(Timedelta(days=10,milliseconds=10*1000).value, expected) self.assertEqual(Timedelta(days=10,microseconds=10*1000*1000).value, expected) - + # test construction with np dtypes # GH 8757 - timedelta_kwargs = {'days':'D', 'seconds':'s', 'microseconds':'us', + timedelta_kwargs = {'days':'D', 'seconds':'s', 'microseconds':'us', 'milliseconds':'ms', 'minutes':'m', 'hours':'h', 'weeks':'W'} - npdtypes = [np.int64, np.int32, np.int16, + npdtypes = [np.int64, np.int32, np.int16, np.float64, np.float32, np.float16] for npdtype in npdtypes: for pykwarg, npkwarg in timedelta_kwargs.items(): @@ -163,9 +164,17 @@ def test_identity(self): def test_conversion(self): for td in [ Timedelta(10,unit='d'), Timedelta('1 days, 10:11:12.012345') ]: - self.assertTrue(td == Timedelta(td.to_pytimedelta())) - self.assertEqual(td,td.to_pytimedelta()) - self.assertEqual(td,np.timedelta64(td.value,'ns')) + pydt = td.to_pytimedelta() + self.assertTrue(td == Timedelta(pydt)) + self.assertEqual(td, pydt) + self.assertTrue(isinstance(pydt, timedelta) + and not isinstance(pydt, Timedelta)) + + self.assertEqual(td, np.timedelta64(td.value, 'ns')) + td64 = td.to_timedelta64() + self.assertEqual(td64, np.timedelta64(td.value, 'ns')) + self.assertEqual(td, td64) + self.assertTrue(isinstance(td64, np.timedelta64)) # this is NOT equal and cannot be roundtriped (because of the nanos) td = Timedelta('1 days, 10:11:12.012345678') @@ -204,6 +213,15 @@ def test_ops(self): self.assertRaises(TypeError, lambda : td + 2) self.assertRaises(TypeError, lambda : td - 2) + def test_ops_offsets(self): + td = Timedelta(10, unit='d') + self.assertEqual(Timedelta(241, unit='h'), td + pd.offsets.Hour(1)) + self.assertEqual(Timedelta(241, unit='h'), pd.offsets.Hour(1) + td) + self.assertEqual(240, td / pd.offsets.Hour(1)) + self.assertEqual(1 / 240.0, pd.offsets.Hour(1) / td) + self.assertEqual(Timedelta(239, unit='h'), td - pd.offsets.Hour(1)) + self.assertEqual(Timedelta(-239, unit='h'), pd.offsets.Hour(1) - td) + def test_freq_conversion(self): td = Timedelta('1 days 2 hours 3 ns') @@ -214,6 +232,74 @@ def test_freq_conversion(self): result = td / np.timedelta64(1,'ns') self.assertEquals(result, td.value) + def test_ops_ndarray(self): + td = Timedelta('1 day') + + # timedelta, timedelta + other = pd.to_timedelta(['1 day']).values + expected = pd.to_timedelta(['2 days']).values + self.assert_numpy_array_equal(td + other, expected) + if LooseVersion(np.__version__) >= '1.8': + self.assert_numpy_array_equal(other + td, expected) + self.assertRaises(TypeError, lambda: td + np.array([1])) + self.assertRaises(TypeError, lambda: np.array([1]) + td) + + expected = pd.to_timedelta(['0 days']).values + self.assert_numpy_array_equal(td - other, expected) + if LooseVersion(np.__version__) >= '1.8': + self.assert_numpy_array_equal(-other + td, expected) + self.assertRaises(TypeError, lambda: td - np.array([1])) + self.assertRaises(TypeError, lambda: np.array([1]) - td) + + expected = pd.to_timedelta(['2 days']).values + self.assert_numpy_array_equal(td * np.array([2]), expected) + self.assert_numpy_array_equal(np.array([2]) * td, expected) + self.assertRaises(TypeError, lambda: td * other) + self.assertRaises(TypeError, lambda: other * td) + + self.assert_numpy_array_equal(td / other, np.array([1])) + if LooseVersion(np.__version__) >= '1.8': + self.assert_numpy_array_equal(other / td, np.array([1])) + + # timedelta, datetime + other = pd.to_datetime(['2000-01-01']).values + expected = pd.to_datetime(['2000-01-02']).values + self.assert_numpy_array_equal(td + other, expected) + if LooseVersion(np.__version__) >= '1.8': + self.assert_numpy_array_equal(other + td, expected) + + expected = pd.to_datetime(['1999-12-31']).values + self.assert_numpy_array_equal(-td + other, expected) + if LooseVersion(np.__version__) >= '1.8': + self.assert_numpy_array_equal(other - td, expected) + + def test_ops_series(self): + # regression test for GH8813 + td = Timedelta('1 day') + other = pd.Series([1, 2]) + expected = pd.Series(pd.to_timedelta(['1 day', '2 days'])) + tm.assert_series_equal(expected, td * other) + tm.assert_series_equal(expected, other * td) + + def test_compare_timedelta_series(self): + # regresssion test for GH5963 + s = pd.Series([timedelta(days=1), timedelta(days=2)]) + actual = s > timedelta(days=1) + expected = pd.Series([False, True]) + tm.assert_series_equal(actual, expected) + + def test_ops_notimplemented(self): + class Other: + pass + other = Other() + + td = Timedelta('1 day') + self.assertTrue(td.__add__(other) is NotImplemented) + self.assertTrue(td.__sub__(other) is NotImplemented) + self.assertTrue(td.__truediv__(other) is NotImplemented) + self.assertTrue(td.__mul__(other) is NotImplemented) + self.assertTrue(td.__floordiv__(td) is NotImplemented) + def test_fields(self): rng = to_timedelta('1 days, 10:11:12') self.assertEqual(rng.days,1) diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index aed6dea264be6..8efc174d6890b 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -531,6 +531,12 @@ class Timestamp(_Timestamp): self.nanosecond/3600.0/1e+9 )/24.0) + def __radd__(self, other): + # __radd__ on cython extension types like _Timestamp is not used, so + # define it here instead + return self + other + + _nat_strings = set(['NaT','nat','NAT','nan','NaN','NAN']) class NaTType(_NaT): """(N)ot-(A)-(T)ime, the time equivalent of NaN""" @@ -1883,8 +1889,12 @@ class Timedelta(_Timedelta): """ array view compat """ return np.timedelta64(self.value).view(dtype) - def _validate_ops_compat(self, other, op): - # return a boolean if we are compat with operating + def to_timedelta64(self): + """ Returns a numpy.timedelta64 object with 'ns' precision """ + return np.timedelta64(self.value, 'ns') + + def _validate_ops_compat(self, other): + # return True if we are compat with operating if _checknull_with_nat(other): return True elif isinstance(other, (Timedelta, timedelta, np.timedelta64)): @@ -1893,55 +1903,58 @@ class Timedelta(_Timedelta): return True elif hasattr(other,'delta'): return True - raise TypeError("cannot operate add a Timedelta with op {op} for {typ}".format(op=op,typ=type(other))) - - def __add__(self, other): - - # a Timedelta with Series/Index like - if hasattr(other,'_typ'): - return other + self - - # an offset - elif hasattr(other,'delta') and not isinstance(other, Timedelta): - return self + other.delta - - # a datetimelike - elif isinstance(other, (Timestamp, datetime, np.datetime64)): - return Timestamp(other) + self - - self._validate_ops_compat(other,'__add__') - - other = Timedelta(other) - if other is NaT: - return NaT - return Timedelta(self.value + other.value, unit='ns') - - def __sub__(self, other): - - # a Timedelta with Series/Index like - if hasattr(other,'_typ'): - neg_other = -other - return neg_other + self - - # an offset - elif hasattr(other,'delta') and not isinstance(other, Timedelta): - return self - other.delta + return False - self._validate_ops_compat(other,'__sub__') + # higher than np.ndarray and np.matrix + __array_priority__ = 100 + + def _binary_op_method_timedeltalike(op, name): + # define a binary operation that only works if the other argument is + # timedelta like or an array of timedeltalike + def f(self, other): + # an offset + if hasattr(other, 'delta') and not isinstance(other, Timedelta): + return op(self, other.delta) + + # a datetimelike + if (isinstance(other, (datetime, np.datetime64)) + and not isinstance(other, (Timestamp, NaTType))): + return op(self, Timestamp(other)) + + # nd-array like + if hasattr(other, 'dtype'): + if other.dtype.kind not in ['m', 'M']: + # raise rathering than letting numpy return wrong answer + return NotImplemented + return op(self.to_timedelta64(), other) + + if not self._validate_ops_compat(other): + return NotImplemented + + other = Timedelta(other) + if other is NaT: + return NaT + return Timedelta(op(self.value, other.value), unit='ns') + f.__name__ = name + return f - other = Timedelta(other) - if other is NaT: - return NaT - return Timedelta(self.value - other.value, unit='ns') + __add__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__add__') + __radd__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__radd__') + __sub__ = _binary_op_method_timedeltalike(lambda x, y: x - y, '__sub__') + __rsub__ = _binary_op_method_timedeltalike(lambda x, y: y - x, '__rsub__') def __mul__(self, other): + # nd-array like + if hasattr(other, 'dtype'): + return other * self.to_timedelta64() + if other is NaT: return NaT # only integers allowed if not is_integer_object(other): - raise TypeError("cannot multiply a Timedelta with {typ}".format(typ=type(other))) + return NotImplemented return Timedelta(other*self.value, unit='ns') @@ -1949,35 +1962,42 @@ class Timedelta(_Timedelta): def __truediv__(self, other): - # a timedelta64 IS an integer object as well - if is_timedelta64_object(other): - return self.value/float(_delta_to_nanoseconds(other)) + if hasattr(other, 'dtype'): + return self.to_timedelta64() / other # pure integers - elif is_integer_object(other): + if is_integer_object(other): return Timedelta(self.value/other, unit='ns') - self._validate_ops_compat(other,'__div__') + if not self._validate_ops_compat(other): + return NotImplemented other = Timedelta(other) if other is NaT: return NaT - return self.value/float(other.value) - def _make_invalid(opstr): + def __rtruediv__(self, other): + if hasattr(other, 'dtype'): + return other / self.to_timedelta64() - def _invalid(other): - raise TypeError("cannot perform {opstr} with {typ}".format(opstr=opstr,typ=type(other))) + if not self._validate_ops_compat(other): + return NotImplemented - __rtruediv__ = _make_invalid('__rtruediv__') + other = Timedelta(other) + if other is NaT: + return NaT + return float(other.value) / self.value if not PY3: __div__ = __truediv__ - __rdiv__ = _make_invalid('__rtruediv__') + __rdiv__ = __rtruediv__ + + def _not_implemented(self, *args, **kwargs): + return NotImplemented - __floordiv__ = _make_invalid('__floordiv__') - __rfloordiv__ = _make_invalid('__rfloordiv__') + __floordiv__ = _not_implemented + __rfloordiv__ = _not_implemented def _op_unary_method(func, name):