From 513c5c8d7fee7ef26eafca1d52b9eba7b2a1ff2d Mon Sep 17 00:00:00 2001 From: Ka Wo Chen Date: Sat, 5 Dec 2015 10:09:33 -0500 Subject: [PATCH] BUG: GH11349 where Series.apply and Series.map did not box timedelta64 --- doc/source/whatsnew/v0.18.0.txt | 59 ++++++++ pandas/core/ops.py | 94 +++++++----- pandas/core/series.py | 10 +- pandas/tests/test_series.py | 184 ++++++++++++++++++++++-- pandas/tseries/tests/test_timedeltas.py | 16 ++- pandas/tseries/tests/test_timeseries.py | 19 ++- pandas/tseries/tests/test_tslib.py | 39 ++--- pandas/tslib.pyx | 64 ++++++++- 8 files changed, 412 insertions(+), 73 deletions(-) diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 4dd8a1d19c383..58d003b5c9dc7 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -167,6 +167,64 @@ Backwards incompatible API changes - The parameter ``out`` has been removed from the ``Series.round()`` method. (:issue:`11763`) - ``DataFrame.round()`` leaves non-numeric columns unchanged in its return, rather than raises. (:issue:`11885`) +NaT and Timedelta operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``NaT`` and ``Timedelta`` have expanded arithmetic operations, which are extended to ``Series`` +arithmetic where applicable. Operations defined for ``datetime64[ns]`` or ``timedelta64[ns]`` +are now also defined for ``NaT`` (:issue:`11564`). + +``NaT`` now supports arithmetic operations with integers and floats. + +.. ipython:: python + + pd.NaT * 1 + pd.NaT * 1.5 + pd.NaT / 2 + pd.NaT * np.nan + +``NaT`` defines more arithmetic operations with ``datetime64[ns]`` and ``timedelta64[ns]``. + +.. ipython:: python + + pd.NaT / pd.NaT + pd.Timedelta('1s') / pd.NaT + +``NaT`` may represent either a ``datetime64[ns]`` null or a ``timedelta64[ns]`` null. +Given the ambiguity, it is treated as a `timedelta64[ns]`, which allows more operations +to succeed. + +.. ipython:: python + :okexcept: + + pd.NaT + pd.NaT + # same as + pd.Timedelta('1s') + pd.Timedelta('1s') + # as opposed to + pd.Timestamp('1990315') + pd.Timestamp('19900315') + +However, when wrapped in a ``Series`` whose ``dtype`` is ``datetime64[ns]`` or ``timedelta64[ns]``, +the ``dtype`` information is respected. + +.. ipython:: python + + pd.Series([pd.NaT], dtype=' unit less # integer gets converted to timedelta in np < 1.6 @@ -580,7 +606,7 @@ def wrapper(left, right, name=name, na_op=na_op): lvalues, rvalues = left, right dtype = None wrap_results = lambda x: x - elif time_converted == NotImplemented: + elif time_converted is NotImplemented: return NotImplemented else: left, right = time_converted.left, time_converted.right diff --git a/pandas/core/series.py b/pandas/core/series.py index d6eb18396e14c..29abd8f031206 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2078,8 +2078,9 @@ def map(self, arg, na_action=None): same index as caller """ values = self._values - if com.is_datetime64_dtype(values.dtype): - values = lib.map_infer(values, lib.Timestamp) + if needs_i8_conversion(values.dtype): + boxer = i8_boxer(values) + values = lib.map_infer(values, boxer) if na_action == 'ignore': mask = isnull(values) @@ -2210,8 +2211,9 @@ def apply(self, func, convert_dtype=True, args=(), **kwds): return f(self) values = _values_from_object(self) - if com.is_datetime64_dtype(values.dtype): - values = lib.map_infer(values, lib.Timestamp) + if needs_i8_conversion(values.dtype): + boxer = i8_boxer(values) + values = lib.map_infer(values, boxer) mapped = lib.map_infer(values, f, convert=convert_dtype) if len(mapped) and isinstance(mapped[0], Series): diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 099e86a44d188..ea9ee8fc5b235 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -19,7 +19,7 @@ import numpy.ma as ma import pandas as pd -from pandas import (Index, Series, DataFrame, isnull, notnull, bdate_range, +from pandas import (Index, Series, DataFrame, isnull, notnull, bdate_range, NaT, date_range, period_range, timedelta_range, _np_version_under1p8) from pandas.core.index import MultiIndex from pandas.core.indexing import IndexingError @@ -3466,6 +3466,13 @@ def test_timedeltas_with_DateOffset(self): assert_series_equal(result, expected) assert_series_equal(result2, expected) + result = s - pd.offsets.Second(5) + result2 = -pd.offsets.Second(5) + s + expected = Series( + [Timestamp('20130101 9:00:55'), Timestamp('20130101 9:01:55')]) + assert_series_equal(result, expected) + assert_series_equal(result2, expected) + result = s + pd.offsets.Milli(5) result2 = pd.offsets.Milli(5) + s expected = Series( @@ -3500,6 +3507,19 @@ def test_timedeltas_with_DateOffset(self): s + op(5) op(5) + s + def test_timedelta_series_ops(self): + #GH11925 + + s = Series(timedelta_range('1 day', periods=3)) + ts = Timestamp('2012-01-01') + expected = Series(date_range('2012-01-02', periods=3)) + assert_series_equal(ts + s, expected) + assert_series_equal(s + ts, expected) + + expected2 = Series(date_range('2011-12-31', periods=3, freq='-1D')) + assert_series_equal(ts - s, expected2) + assert_series_equal(ts + (-s), expected2) + def test_timedelta64_operations_with_DateOffset(self): # GH 10699 @@ -3619,11 +3639,14 @@ def test_timedelta64_operations_with_integers(self): assert_series_equal(result,expected) # invalid ops - for op in ['__true_div__','__div__','__mul__']: - sop = getattr(s1,op,None) - if sop is not None: - self.assertRaises(TypeError, sop, s2.astype(float)) - self.assertRaises(TypeError, sop, 2.) + assert_series_equal(s1 / s2.astype(float), + Series([Timedelta('2 days 22:48:00'), + Timedelta('1 days 23:12:00'), + Timedelta('NaT')])) + assert_series_equal(s1 / 2.0, + Series([Timedelta('29 days 12:00:00'), + Timedelta('29 days 12:00:00'), + Timedelta('NaT')])) for op in ['__add__','__sub__']: sop = getattr(s1,op,None) @@ -3653,7 +3676,7 @@ def test_timedelta64_conversions(self): assert_series_equal(result, expected) # reverse op - expected = s1.apply(lambda x: np.timedelta64(m,unit) / x) + expected = s1.apply(lambda x: Timedelta(np.timedelta64(m,unit)) / x) result = np.timedelta64(m,unit) / s1 # astype @@ -3759,7 +3782,7 @@ def run_ops(ops, get_ser, test_ser): ### timetimedelta with datetime64 ### ops = ['__sub__', '__mul__', '__floordiv__', '__truediv__', '__div__', - '__pow__', '__rsub__', '__rmul__', '__rfloordiv__', + '__pow__', '__rmul__', '__rfloordiv__', '__rtruediv__', '__rdiv__', '__rpow__'] run_ops(ops, td1, dt1) td1 + dt1 @@ -3825,6 +3848,151 @@ def run_ops(ops, get_ser, test_ser): self.assertRaises(TypeError, lambda: td1 - dt1) self.assertRaises(TypeError, lambda: td2 - dt2) + def test_ops_nat(self): + # GH 11349 + timedelta_series = Series([NaT, Timedelta('1s')]) + datetime_series = Series([NaT, Timestamp('19900315')]) + nat_series_dtype_timedelta = Series([NaT, NaT], dtype='timedelta64[ns]') + nat_series_dtype_timestamp = Series([NaT, NaT], dtype='datetime64[ns]') + single_nat_dtype_datetime = Series([NaT], dtype='datetime64[ns]') + single_nat_dtype_timedelta = Series([NaT], dtype='timedelta64[ns]') + + # subtraction + assert_series_equal(timedelta_series - NaT, nat_series_dtype_timedelta) + assert_series_equal(-NaT + timedelta_series, nat_series_dtype_timedelta) + + assert_series_equal(timedelta_series - single_nat_dtype_timedelta, + nat_series_dtype_timedelta) + assert_series_equal(-single_nat_dtype_timedelta + timedelta_series, + nat_series_dtype_timedelta) + + assert_series_equal(datetime_series - NaT, nat_series_dtype_timestamp) + assert_series_equal(-NaT + datetime_series, nat_series_dtype_timestamp) + + assert_series_equal(datetime_series - single_nat_dtype_datetime, + nat_series_dtype_timedelta) + with tm.assertRaises(TypeError): + -single_nat_dtype_datetime + datetime_series + + assert_series_equal(datetime_series - single_nat_dtype_timedelta, + nat_series_dtype_timestamp) + assert_series_equal(-single_nat_dtype_timedelta + datetime_series , + nat_series_dtype_timestamp) + + # without a Series wrapping the NaT, it is ambiguous + # whether it is a datetime64 or timedelta64 + # defaults to interpreting it as timedelta64 + assert_series_equal(nat_series_dtype_timestamp - NaT, + nat_series_dtype_timestamp) + assert_series_equal(-NaT + nat_series_dtype_timestamp, + nat_series_dtype_timestamp) + + assert_series_equal(nat_series_dtype_timestamp - single_nat_dtype_datetime, + nat_series_dtype_timedelta) + with tm.assertRaises(TypeError): + -single_nat_dtype_datetime + nat_series_dtype_timestamp + + assert_series_equal(nat_series_dtype_timestamp - single_nat_dtype_timedelta, + nat_series_dtype_timestamp) + assert_series_equal(-single_nat_dtype_timedelta + nat_series_dtype_timestamp, + nat_series_dtype_timestamp) + + with tm.assertRaises(TypeError): + timedelta_series - single_nat_dtype_datetime + + # addition + assert_series_equal(nat_series_dtype_timestamp + NaT, + nat_series_dtype_timestamp) + assert_series_equal(NaT + nat_series_dtype_timestamp, + nat_series_dtype_timestamp) + + assert_series_equal(nat_series_dtype_timestamp + single_nat_dtype_timedelta, + nat_series_dtype_timestamp) + assert_series_equal(single_nat_dtype_timedelta + nat_series_dtype_timestamp, + nat_series_dtype_timestamp) + + assert_series_equal(nat_series_dtype_timedelta + NaT, + nat_series_dtype_timedelta) + assert_series_equal(NaT + nat_series_dtype_timedelta, + nat_series_dtype_timedelta) + + assert_series_equal(nat_series_dtype_timedelta + single_nat_dtype_timedelta, + nat_series_dtype_timedelta) + assert_series_equal(single_nat_dtype_timedelta + nat_series_dtype_timedelta, + nat_series_dtype_timedelta) + + assert_series_equal(timedelta_series + NaT, nat_series_dtype_timedelta) + assert_series_equal(NaT + timedelta_series, nat_series_dtype_timedelta) + + assert_series_equal(timedelta_series + single_nat_dtype_timedelta, + nat_series_dtype_timedelta) + assert_series_equal(single_nat_dtype_timedelta + timedelta_series, + nat_series_dtype_timedelta) + + assert_series_equal(nat_series_dtype_timestamp + NaT, + nat_series_dtype_timestamp) + assert_series_equal(NaT + nat_series_dtype_timestamp, + nat_series_dtype_timestamp) + + assert_series_equal(nat_series_dtype_timestamp + single_nat_dtype_timedelta, + nat_series_dtype_timestamp) + assert_series_equal(single_nat_dtype_timedelta + nat_series_dtype_timestamp, + nat_series_dtype_timestamp) + + assert_series_equal(nat_series_dtype_timedelta + NaT, + nat_series_dtype_timedelta) + assert_series_equal(NaT + nat_series_dtype_timedelta, + nat_series_dtype_timedelta) + + assert_series_equal(nat_series_dtype_timedelta + single_nat_dtype_timedelta, + nat_series_dtype_timedelta) + assert_series_equal(single_nat_dtype_timedelta + nat_series_dtype_timedelta, + nat_series_dtype_timedelta) + + assert_series_equal(nat_series_dtype_timedelta + single_nat_dtype_datetime, + nat_series_dtype_timestamp) + assert_series_equal(single_nat_dtype_datetime + nat_series_dtype_timedelta, + nat_series_dtype_timestamp) + + # multiplication + assert_series_equal(nat_series_dtype_timedelta * 1.0, + nat_series_dtype_timedelta) + assert_series_equal(1.0 * nat_series_dtype_timedelta, + nat_series_dtype_timedelta) + + assert_series_equal(timedelta_series * 1, timedelta_series) + assert_series_equal(1 * timedelta_series, timedelta_series) + + assert_series_equal(timedelta_series * 1.5, + Series([NaT, Timedelta('1.5s')])) + assert_series_equal(1.5 * timedelta_series, + Series([NaT, Timedelta('1.5s')])) + + assert_series_equal(timedelta_series * nan, nat_series_dtype_timedelta) + assert_series_equal(nan * timedelta_series, nat_series_dtype_timedelta) + + with tm.assertRaises(TypeError): + datetime_series * 1 + with tm.assertRaises(TypeError): + nat_series_dtype_timestamp * 1 + with tm.assertRaises(TypeError): + datetime_series * 1.0 + with tm.assertRaises(TypeError): + nat_series_dtype_timestamp * 1.0 + + # division + assert_series_equal(timedelta_series / 2, + Series([NaT, Timedelta('0.5s')])) + assert_series_equal(timedelta_series / 2.0, + Series([NaT, Timedelta('0.5s')])) + assert_series_equal(timedelta_series / nan, + nat_series_dtype_timedelta) + with tm.assertRaises(TypeError): + nat_series_dtype_timestamp / 1.0 + with tm.assertRaises(TypeError): + nat_series_dtype_timestamp / 1 + + def test_ops_datetimelike_align(self): # GH 7500 # datetimelike ops need to align diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py index 67f1b12ec8ead..cb050f2589673 100644 --- a/pandas/tseries/tests/test_timedeltas.py +++ b/pandas/tseries/tests/test_timedeltas.py @@ -270,7 +270,7 @@ def test_ops(self): self.assertEqual(abs(td), td) self.assertEqual(abs(-td), td) self.assertEqual(td / td, 1) - self.assertTrue((td / pd.NaT) is pd.NaT) + self.assertTrue((td / pd.NaT) is np.nan) # invert self.assertEqual(-td,Timedelta('-10d')) @@ -881,20 +881,32 @@ def test_timedelta_ops_with_missing_values(self): actual = s1 + scalar1 assert_series_equal(actual, s2) + actual = scalar1 + s1 + assert_series_equal(actual, s2) actual = s2 - scalar1 assert_series_equal(actual, s1) + actual = -scalar1 + s2 + assert_series_equal(actual, s1) actual = s1 + timedelta_NaT assert_series_equal(actual, sn) + actual = timedelta_NaT + s1 + assert_series_equal(actual, sn) actual = s1 - timedelta_NaT assert_series_equal(actual, sn) + actual = -timedelta_NaT + s1 + assert_series_equal(actual, sn) actual = s1 + NA assert_series_equal(actual, sn) + actual = NA + s1 + assert_series_equal(actual, sn) actual = s1 - NA assert_series_equal(actual, sn) + actual = -NA + s1 + assert_series_equal(actual, sn) - actual = s1 + pd.NaT # NaT is datetime, not timedelta + actual = s1 + pd.NaT assert_series_equal(actual, sn) actual = s2 - pd.NaT assert_series_equal(actual, sn) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 74b9f52a7eb0a..cf970807999e0 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -11,7 +11,8 @@ from pandas import (Index, Series, DataFrame, isnull, date_range, Timestamp, Period, DatetimeIndex, - Int64Index, to_datetime, bdate_range, Float64Index, TimedeltaIndex, NaT) + Int64Index, to_datetime, bdate_range, Float64Index, + TimedeltaIndex, NaT, timedelta_range, Timedelta) import pandas.core.datetools as datetools import pandas.tseries.offsets as offsets @@ -369,6 +370,12 @@ def test_series_box_timestamp(self): tm.assertIsInstance(s.iat[5], Timestamp) + def test_series_box_timedelta(self): + rng = timedelta_range('1 day 1 s',periods=5,freq='h') + s = Series(rng) + tm.assertIsInstance(s[1], Timedelta) + tm.assertIsInstance(s.iat[2], Timedelta) + def test_date_range_ambiguous_arguments(self): # #2538 start = datetime(2011, 1, 1, 5, 3, 40) @@ -2086,6 +2093,16 @@ def f(x): s.apply(f) DataFrame(s).applymap(f) + def test_series_map_box_timedelta(self): + # GH 11349 + s = Series(timedelta_range('1 day 1 s',periods=5,freq='h')) + + def f(x): + return x.total_seconds() + s.map(f) + s.apply(f) + DataFrame(s).applymap(f) + def test_concat_datetime_datetime64_frame(self): # #2624 rows = [] diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index 7e772aeb14f6e..d27bddf8879db 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -860,31 +860,34 @@ def test_nat_arithmetic(self): t = Timestamp('2014-01-01') dt = datetime.datetime(2014, 1, 1) delta = datetime.timedelta(3600) - - # Timestamp / datetime - for (left, right) in [(nat, nat), (nat, t), (dt, nat)]: - # NaT + Timestamp-like should raise TypeError - with tm.assertRaises(TypeError): - left + right + td = Timedelta('5s') + i = 2 + f = 1.5 + + for (left, right) in [(nat, i), (nat, f), (nat, np.nan)]: + self.assertTrue((left / right) is nat) + self.assertTrue((left * right) is nat) + self.assertTrue((right * left) is nat) with tm.assertRaises(TypeError): - right + left + right / left - # NaT - Timestamp-like (or inverse) returns NaT - self.assertTrue((left - right) is tslib.NaT) - self.assertTrue((right - left) is tslib.NaT) + # Timestamp / datetime + for (left, right) in [(nat, nat), (nat, t), (nat, dt)]: + # NaT __add__ or __sub__ Timestamp-like (or inverse) returns NaT + self.assertTrue((right + left) is nat) + self.assertTrue((left + right) is nat) + self.assertTrue((left - right) is nat) + self.assertTrue((right - left) is nat) # timedelta-like # offsets are tested in test_offsets.py - for (left, right) in [(nat, delta)]: + for (left, right) in [(nat, delta), (nat, td)]: # NaT + timedelta-like returns NaT - self.assertTrue((left + right) is tslib.NaT) - # timedelta-like + NaT should raise TypeError - with tm.assertRaises(TypeError): - right + left + self.assertTrue((right + left) is nat) + self.assertTrue((left + right) is nat) + self.assertTrue((right - left) is nat) + self.assertTrue((left - right) is nat) - self.assertTrue((left - right) is tslib.NaT) - with tm.assertRaises(TypeError): - right - left class TestTslib(tm.TestCase): diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index a6908a0c36ad4..43f3c3add160a 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -665,6 +665,21 @@ class NaTType(_NaT): # GH 10939 return np.nan + def __rdiv__(self, other): + return _nat_rdivide_op(self, other) + + def __rtruediv__(self, other): + return _nat_rdivide_op(self, other) + + def __rfloordiv__(self, other): + return _nat_rdivide_op(self, other) + + def __rmul__(self, other): + if is_integer_object(other) or is_float_object(other): + return NaT + return NotImplemented + + fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', 'second', 'millisecond', 'microsecond', 'nanosecond', @@ -1001,7 +1016,7 @@ cdef class _Timestamp(datetime): # index/series like elif hasattr(other, '_typ'): - return other + self + return NotImplemented result = datetime.__add__(self, other) if isinstance(result, datetime): @@ -1081,6 +1096,18 @@ _nat_scalar_rules[Py_GT] = False _nat_scalar_rules[Py_GE] = False +cdef _nat_divide_op(self, other): + if isinstance(other, (Timedelta, np.timedelta64)) or other is NaT: + return np.nan + if is_integer_object(other) or is_float_object(other): + return NaT + return NotImplemented + +cdef _nat_rdivide_op(self, other): + if isinstance(other, Timedelta): + return np.nan + return NotImplemented + cdef class _NaT(_Timestamp): def __hash__(_NaT self): @@ -1103,6 +1130,8 @@ cdef class _NaT(_Timestamp): def __add__(self, other): try: + if isinstance(other, datetime): + return NaT result = _Timestamp.__add__(self, other) if result is NotImplemented: return result @@ -1112,6 +1141,9 @@ cdef class _NaT(_Timestamp): def __sub__(self, other): + if other is NaT: + return NaT + if type(self) is datetime: other, self = self, other try: @@ -1122,6 +1154,26 @@ cdef class _NaT(_Timestamp): pass return NaT + def __pos__(self): + return NaT + + def __neg__(self): + return NaT + + def __div__(self, other): + return _nat_divide_op(self, other) + + def __truediv__(self, other): + return _nat_divide_op(self, other) + + def __floordiv__(self, other): + return _nat_divide_op(self, other) + + def __mul__(self, other): + if is_integer_object(other) or is_float_object(other): + return NaT + return NotImplemented + def _delta_to_nanoseconds(delta): if isinstance(delta, np.ndarray): @@ -2541,8 +2593,8 @@ class Timedelta(_Timedelta): if other is NaT: return NaT - # only integers allowed - if not is_integer_object(other): + # only integers and floats allowed + if not (is_integer_object(other) or is_float_object(other)): return NotImplemented return Timedelta(other*self.value, unit='ns') @@ -2554,8 +2606,8 @@ class Timedelta(_Timedelta): if hasattr(other, 'dtype'): return self.to_timedelta64() / other - # pure integers - if is_integer_object(other): + # integers or floats + if is_integer_object(other) or is_float_object(other): return Timedelta(self.value/other, unit='ns') if not self._validate_ops_compat(other): @@ -2563,7 +2615,7 @@ class Timedelta(_Timedelta): other = Timedelta(other) if other is NaT: - return NaT + return np.nan return self.value/float(other.value) def __rtruediv__(self, other):