From b1dce26c6792a2e3cec4a6cebfd11436fda0983b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 5 Aug 2019 18:29:41 -0700 Subject: [PATCH 1/6] BUG: simplify NaT add/sub, fix str bug --- pandas/_libs/tslibs/nattype.pyx | 55 +++++++++++++++++--------------- pandas/core/indexes/datetimes.py | 6 ++-- pandas/tests/scalar/test_nat.py | 21 ++++++++++-- 3 files changed, 53 insertions(+), 29 deletions(-) diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 7f35a11e57b71..4e0fcec255c2e 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -115,46 +115,51 @@ cdef class _NaT(datetime): return PyObject_RichCompare(other, self, op) def __add__(self, other): + if self is not c_NaT: + # cython __radd__ semantics + self, other = other, self + if PyDateTime_Check(other): return c_NaT - + elif PyDelta_Check(other): + return c_NaT + elif is_datetime64_object(other) or is_timedelta64_object(other): + return c_NaT elif hasattr(other, 'delta'): # Timedelta, offsets.Tick, offsets.Week return c_NaT - elif getattr(other, '_typ', None) in ['dateoffset', 'series', - 'period', 'datetimeindex', - 'timedeltaindex']: - # Duplicate logic in _Timestamp.__add__ to avoid needing - # to subclass; allows us to @final(_Timestamp.__add__) - return NotImplemented - return c_NaT + + elif is_integer_object(other) or util.is_period_object(other): + # For Period compat + # TODO: the integer behavior is deprecated, remove it + return c_NaT + + return NotImplemented # TODO: need to handle ndarrays def __sub__(self, other): # Duplicate some logic from _Timestamp.__sub__ to avoid needing # to subclass; allows us to @final(_Timestamp.__sub__) - if PyDateTime_Check(other): - return NaT - elif PyDelta_Check(other): - return NaT - - elif getattr(other, '_typ', None) == 'datetimeindex': - # a Timestamp-DatetimeIndex -> yields a negative TimedeltaIndex - return -other.__sub__(self) - elif getattr(other, '_typ', None) == 'timedeltaindex': - # a Timestamp-TimedeltaIndex -> yields a negative TimedeltaIndex - return (-other).__add__(self) + if self is not c_NaT: + # cython __rsub__ semantics + self, other = other, self + if PyDateTime_Check(other): + return c_NaT + elif PyDelta_Check(other): + return c_NaT + elif is_datetime64_object(other) or is_timedelta64_object(other): + return c_NaT elif hasattr(other, 'delta'): # offsets.Tick, offsets.Week - neg_other = -other - return self + neg_other + return c_NaT - elif getattr(other, '_typ', None) in ['period', 'series', - 'periodindex', 'dateoffset']: - return NotImplemented + elif is_integer_object(other) or util.is_period_object(other): + # For Period compat + # TODO: the integer behavior is deprecated, remove it + return c_NaT - return NaT + return NotImplemented # TODO: need to handle ndarrays def __pos__(self): return NaT diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index d6f0008a2646f..18027defdc332 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -4,7 +4,7 @@ import numpy as np -from pandas._libs import Timestamp, index as libindex, lib, tslib as libts +from pandas._libs import NaT, Timestamp, index as libindex, lib, tslib as libts import pandas._libs.join as libjoin from pandas._libs.tslibs import ccalendar, fields, parsing, timezones from pandas.util._decorators import Appender, Substitution, cache_readonly @@ -1281,7 +1281,9 @@ def insert(self, loc, item): raise ValueError("Passed item and index have different timezone") # check freq can be preserved on edge cases if self.size and self.freq is not None: - if (loc == 0 or loc == -len(self)) and item + self.freq == self[0]: + if item is NaT: + pass + elif (loc == 0 or loc == -len(self)) and item + self.freq == self[0]: freq = self.freq elif (loc == len(self)) and item - self.freq == self[-1]: freq = self.freq diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index f935a7fa880c7..0a7d62dc9f571 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -331,8 +331,9 @@ def test_nat_doc_strings(compare): "value,val_type", [ (2, "scalar"), - (1.5, "scalar"), - (np.nan, "scalar"), + (1.5, "floating"), + (np.nan, "floating"), + ("foo", "str"), (timedelta(3600), "timedelta"), (Timedelta("5s"), "timedelta"), (datetime(2014, 1, 1), "timestamp"), @@ -346,6 +347,14 @@ def test_nat_arithmetic_scalar(op_name, value, val_type): # see gh-6873 invalid_ops = { "scalar": {"right_div_left"}, + "floating": { + "right_div_left", + "left_minus_right", + "right_minus_left", + "left_plus_right", + "right_plus_left", + }, + "str": set(_ops.keys()), "timedelta": {"left_times_right", "right_times_left"}, "timestamp": { "left_times_right", @@ -364,6 +373,14 @@ def test_nat_arithmetic_scalar(op_name, value, val_type): and isinstance(value, Timedelta) ): msg = "Cannot multiply" + elif val_type == "str": + # un-specific check here because the message comes from str + # and varies by method + msg = ( + "can only concatenate str|" + "unsupported operand type|" + "can't multiply sequence" + ) else: msg = "unsupported operand type" From 00f0ea6b16c7bcfb409ca93ec2422a127e9ef22d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 7 Aug 2019 11:04:32 -0700 Subject: [PATCH 2/6] BUG: fix+test NaT ops with ndarray --- pandas/_libs/tslibs/nattype.pyx | 40 +++++++++++++++++++++++++++++++-- pandas/tests/scalar/test_nat.py | 24 ++++++++++++++++++++ 2 files changed, 62 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 4e0fcec255c2e..3854a02589bcb 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -92,6 +92,9 @@ cdef class _NaT(datetime): # int64_t value # object freq + # higher than np.ndarray and np.matrix + __array_priority__ = 100 + def __hash__(_NaT self): # py3k needs this defined here return hash(self.value) @@ -134,15 +137,26 @@ cdef class _NaT(datetime): # TODO: the integer behavior is deprecated, remove it return c_NaT - return NotImplemented # TODO: need to handle ndarrays + elif util.is_array(other): + if other.dtype.kind in 'mM': + # If we are adding to datetime64, we treat NaT as timedelta + # Either way, result dtype is datetime64 + result = np.empty(other.shape, dtype="datetime64[ns]") + result.fill("NaT") + return result + + return NotImplemented def __sub__(self, other): # Duplicate some logic from _Timestamp.__sub__ to avoid needing # to subclass; allows us to @final(_Timestamp.__sub__) + cdef: + bint is_rsub = False if self is not c_NaT: # cython __rsub__ semantics self, other = other, self + is_rsub = True if PyDateTime_Check(other): return c_NaT @@ -159,7 +173,29 @@ cdef class _NaT(datetime): # TODO: the integer behavior is deprecated, remove it return c_NaT - return NotImplemented # TODO: need to handle ndarrays + elif util.is_array(other): + if other.dtype.kind == 'm': + if not is_rsub: + # NaT - timedelta64 we treat NaT as datetime64, so result + # is datetime64 + result = np.empty(other.shape, dtype="datetime64[ns]") + result.fill("NaT") + return result + + # timedelta64 - NaT we have to treat NaT as timedelta64 + # for this to be meaningful, and the result is timedelta64 + result = np.empty(other.shape, dtype="timedelta64[ns]") + result.fill("NaT") + return result + + elif other.dtype.kind == 'M': + # We treat NaT as a datetime, so regardless of whether this is + # NaT - other or other - NaT, the result is timedelta64 + result = np.empty(other.shape, dtype="timedelta64[ns]") + result.fill("NaT") + return result + + return NotImplemented def __pos__(self): return NaT diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index 0a7d62dc9f571..20a211c1fb023 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -1,4 +1,5 @@ from datetime import datetime, timedelta +import operator import numpy as np import pytest @@ -19,6 +20,7 @@ isna, ) from pandas.core.arrays import PeriodArray +from pandas.core.ops import roperator from pandas.util import testing as tm @@ -443,6 +445,28 @@ def test_nat_arithmetic_td64_vector(op_name, box): tm.assert_equal(_ops[op_name](vec, NaT), box_nat) +@pytest.mark.parametrize( + "dtype,op,out_dtype", + [ + ("datetime64[ns]", operator.add, "datetime64[ns]"), + ("datetime64[ns]", roperator.radd, "datetime64[ns]"), + ("datetime64[ns]", operator.sub, "timedelta64[ns]"), + ("datetime64[ns]", roperator.rsub, "timedelta64[ns]"), + ("timedelta64[ns]", operator.add, "datetime64[ns]"), + ("timedelta64[ns]", roperator.radd, "datetime64[ns]"), + ("timedelta64[ns]", operator.sub, "datetime64[ns]"), + ("timedelta64[ns]", roperator.rsub, "timedelta64[ns]"), + ], +) +def test_nat_arithmetic_ndarray(dtype, op, out_dtype): + other = np.arange(10).astype(dtype) + result = op(NaT, other) + + expected = np.empty(other.shape, dtype=out_dtype) + expected.fill("NaT") + tm.assert_numpy_array_equal(result, expected) + + def test_nat_pinned_docstrings(): # see gh-17327 assert NaT.ctime.__doc__ == datetime.ctime.__doc__ From c1935d233a0578a878e9b9e9d3d47c645c61e47c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 7 Aug 2019 13:12:52 -0700 Subject: [PATCH 3/6] fix tests --- pandas/_libs/tslibs/nattype.pyx | 6 +++ pandas/tests/arithmetic/test_timedelta64.py | 2 +- pandas/tests/scalar/period/test_period.py | 46 ++++----------------- 3 files changed, 14 insertions(+), 40 deletions(-) diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 3854a02589bcb..ac28974e17926 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -106,12 +106,18 @@ cdef class _NaT(datetime): if ndim == -1: return _nat_scalar_rules[op] + if util.is_array(other): + result = np.empty(other.shape, dtype=np.bool_) + result.fill(_nat_scalar_rules[op]) + return result + if ndim == 0: if is_datetime64_object(other): return _nat_scalar_rules[op] else: raise TypeError('Cannot compare type %r with type %r' % (type(self).__name__, type(other).__name__)) + # Note: instead of passing "other, self, _reverse_ops[op]", we observe # that `_nat_scalar_rules` is invariant under `_reverse_ops`, # rendering it unnecessary. diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 4f5e00bc5a37d..6cf90de86ad14 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -1631,7 +1631,7 @@ def test_td64arr_div_nat_invalid(self, box_with_array): rng = timedelta_range("1 days", "10 days", name="foo") rng = tm.box_expected(rng, box_with_array) - with pytest.raises(TypeError, match="'?true_divide'? cannot use operands"): + with pytest.raises(TypeError, match="unsupported operand type"): rng / pd.NaT with pytest.raises(TypeError, match="Cannot divide NaTType by"): pd.NaT / rng diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index b57b817461788..6da4d556ea07e 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -1298,23 +1298,13 @@ def test_add_offset_nat(self): timedelta(365), ]: assert p + o is NaT - - if isinstance(o, np.timedelta64): - with pytest.raises(TypeError): - o + p - else: - assert o + p is NaT + assert o + p is NaT for freq in ["M", "2M", "3M"]: p = Period("NaT", freq=freq) for o in [offsets.MonthEnd(2), offsets.MonthEnd(12)]: assert p + o is NaT - - if isinstance(o, np.timedelta64): - with pytest.raises(TypeError): - o + p - else: - assert o + p is NaT + assert o + p is NaT for o in [ offsets.YearBegin(2), @@ -1324,12 +1314,7 @@ def test_add_offset_nat(self): timedelta(365), ]: assert p + o is NaT - - if isinstance(o, np.timedelta64): - with pytest.raises(TypeError): - o + p - else: - assert o + p is NaT + assert o + p is NaT # freq is Tick for freq in ["D", "2D", "3D"]: @@ -1343,12 +1328,7 @@ def test_add_offset_nat(self): timedelta(hours=48), ]: assert p + o is NaT - - if isinstance(o, np.timedelta64): - with pytest.raises(TypeError): - o + p - else: - assert o + p is NaT + assert o + p is NaT for o in [ offsets.YearBegin(2), @@ -1358,12 +1338,7 @@ def test_add_offset_nat(self): timedelta(hours=23), ]: assert p + o is NaT - - if isinstance(o, np.timedelta64): - with pytest.raises(TypeError): - o + p - else: - assert o + p is NaT + assert o + p is NaT for freq in ["H", "2H", "3H"]: p = Period("NaT", freq=freq) @@ -1376,9 +1351,7 @@ def test_add_offset_nat(self): timedelta(days=4, minutes=180), ]: assert p + o is NaT - - if not isinstance(o, np.timedelta64): - assert o + p is NaT + assert o + p is NaT for o in [ offsets.YearBegin(2), @@ -1388,12 +1361,7 @@ def test_add_offset_nat(self): timedelta(hours=23, minutes=30), ]: assert p + o is NaT - - if isinstance(o, np.timedelta64): - with pytest.raises(TypeError): - o + p - else: - assert o + p is NaT + assert o + p is NaT def test_sub_offset(self): # freq is DateOffset From 3d20bfa8fe20e350740b2f78b1ef915ef4d2abf1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 7 Aug 2019 14:37:38 -0700 Subject: [PATCH 4/6] msg compat for platform/numpy --- pandas/tests/scalar/test_nat.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index a83b1d1a8a3e6..c7192926ca091 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -383,7 +383,8 @@ def test_nat_arithmetic_scalar(op_name, value, val_type): msg = ( "can only concatenate str|" "unsupported operand type|" - "can't multiply sequence" + "can't multiply sequence|" + "Can't convert 'NaTType'" ) else: msg = "unsupported operand type" From 5e9db8e7f0fdbb8d21ede2c666a7dc96397e0b5b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 7 Aug 2019 15:13:50 -0700 Subject: [PATCH 5/6] windows compat --- pandas/tests/scalar/test_nat.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index c7192926ca091..5b1c4f92bf341 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -384,7 +384,8 @@ def test_nat_arithmetic_scalar(op_name, value, val_type): "can only concatenate str|" "unsupported operand type|" "can't multiply sequence|" - "Can't convert 'NaTType'" + "Can't convert 'NaTType'|" + "must be str, not NaTType" ) else: msg = "unsupported operand type" From 10c481795314b826bb7115ea6d45886ced7a0490 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 8 Aug 2019 06:38:34 -0700 Subject: [PATCH 6/6] if -->elif --- pandas/_libs/tslibs/nattype.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index ac28974e17926..020d1acf0b4ce 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -106,12 +106,12 @@ cdef class _NaT(datetime): if ndim == -1: return _nat_scalar_rules[op] - if util.is_array(other): + elif util.is_array(other): result = np.empty(other.shape, dtype=np.bool_) result.fill(_nat_scalar_rules[op]) return result - if ndim == 0: + elif ndim == 0: if is_datetime64_object(other): return _nat_scalar_rules[op] else: