Skip to content

BUG: fix+test op(NaT, ndarray), also simplify #27807

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Aug 15, 2019
104 changes: 74 additions & 30 deletions pandas/_libs/tslibs/nattype.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ cdef class _NaT(datetime):
# int64_t value
# object freq

# higher than np.ndarray and np.matrix
__array_priority__ = 100

def __hash__(_NaT self):
# py3k needs this defined here
return hash(self.value)
Expand All @@ -103,61 +106,102 @@ cdef class _NaT(datetime):
if ndim == -1:
return _nat_scalar_rules[op]

if ndim == 0:
elif util.is_array(other):
result = np.empty(other.shape, dtype=np.bool_)
result.fill(_nat_scalar_rules[op])
return result

elif ndim == 0:
if is_datetime64_object(other):
return _nat_scalar_rules[op]
else:
raise TypeError('Cannot compare type %r with type %r' %
(type(self).__name__, type(other).__name__))

# Note: instead of passing "other, self, _reverse_ops[op]", we observe
# that `_nat_scalar_rules` is invariant under `_reverse_ops`,
# rendering it unnecessary.
return PyObject_RichCompare(other, self, op)

def __add__(self, other):
if self is not c_NaT:
# cython __radd__ semantics
self, other = other, self

if PyDateTime_Check(other):
return c_NaT

elif PyDelta_Check(other):
return c_NaT
elif is_datetime64_object(other) or is_timedelta64_object(other):
return c_NaT
elif hasattr(other, 'delta'):
# Timedelta, offsets.Tick, offsets.Week
return c_NaT
elif getattr(other, '_typ', None) in ['dateoffset', 'series',
'period', 'datetimeindex',
'datetimearray',
'timedeltaindex',
'timedeltaarray']:
# Duplicate logic in _Timestamp.__add__ to avoid needing
# to subclass; allows us to @final(_Timestamp.__add__)
return NotImplemented
return c_NaT

elif is_integer_object(other) or util.is_period_object(other):
# For Period compat
# TODO: the integer behavior is deprecated, remove it
return c_NaT

elif util.is_array(other):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what if there is a tz?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is specifically for ndarray, so there cant be a tz

if other.dtype.kind in 'mM':
# If we are adding to datetime64, we treat NaT as timedelta
# Either way, result dtype is datetime64
result = np.empty(other.shape, dtype="datetime64[ns]")
result.fill("NaT")
return result

return NotImplemented

def __sub__(self, other):
# Duplicate some logic from _Timestamp.__sub__ to avoid needing
# to subclass; allows us to @final(_Timestamp.__sub__)
cdef:
bint is_rsub = False

if self is not c_NaT:
# cython __rsub__ semantics
self, other = other, self
is_rsub = True

if PyDateTime_Check(other):
return NaT
return c_NaT
elif PyDelta_Check(other):
return NaT
return c_NaT
elif is_datetime64_object(other) or is_timedelta64_object(other):
return c_NaT
elif hasattr(other, 'delta'):
# offsets.Tick, offsets.Week
return c_NaT

elif getattr(other, '_typ', None) == 'datetimeindex':
# a Timestamp-DatetimeIndex -> yields a negative TimedeltaIndex
return -other.__sub__(self)
elif is_integer_object(other) or util.is_period_object(other):
# For Period compat
# TODO: the integer behavior is deprecated, remove it
return c_NaT

elif getattr(other, '_typ', None) == 'timedeltaindex':
# a Timestamp-TimedeltaIndex -> yields a negative TimedeltaIndex
return (-other).__add__(self)
elif util.is_array(other):
if other.dtype.kind == 'm':
if not is_rsub:
# NaT - timedelta64 we treat NaT as datetime64, so result
# is datetime64
result = np.empty(other.shape, dtype="datetime64[ns]")
result.fill("NaT")
return result

# timedelta64 - NaT we have to treat NaT as timedelta64
# for this to be meaningful, and the result is timedelta64
result = np.empty(other.shape, dtype="timedelta64[ns]")
result.fill("NaT")
return result

elif other.dtype.kind == 'M':
# We treat NaT as a datetime, so regardless of whether this is
# NaT - other or other - NaT, the result is timedelta64
result = np.empty(other.shape, dtype="timedelta64[ns]")
result.fill("NaT")
return result

elif hasattr(other, 'delta'):
# offsets.Tick, offsets.Week
neg_other = -other
return self + neg_other

elif getattr(other, '_typ', None) in ['period', 'series',
'periodindex', 'dateoffset',
'datetimearray',
'timedeltaarray']:
return NotImplemented
return NaT
return NotImplemented

def __pos__(self):
return NaT
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import numpy as np

from pandas._libs import Timestamp, index as libindex, lib, tslib as libts
from pandas._libs import NaT, Timestamp, index as libindex, lib, tslib as libts
import pandas._libs.join as libjoin
from pandas._libs.tslibs import ccalendar, fields, parsing, timezones
from pandas.util._decorators import Appender, Substitution, cache_readonly
Expand Down Expand Up @@ -1281,7 +1281,9 @@ def insert(self, loc, item):
raise ValueError("Passed item and index have different timezone")
# check freq can be preserved on edge cases
if self.size and self.freq is not None:
if (loc == 0 or loc == -len(self)) and item + self.freq == self[0]:
if item is NaT:
pass
elif (loc == 0 or loc == -len(self)) and item + self.freq == self[0]:
freq = self.freq
elif (loc == len(self)) and item - self.freq == self[-1]:
freq = self.freq
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arithmetic/test_timedelta64.py
Original file line number Diff line number Diff line change
Expand Up @@ -1631,7 +1631,7 @@ def test_td64arr_div_nat_invalid(self, box_with_array):
rng = timedelta_range("1 days", "10 days", name="foo")
rng = tm.box_expected(rng, box_with_array)

with pytest.raises(TypeError, match="'?true_divide'? cannot use operands"):
with pytest.raises(TypeError, match="unsupported operand type"):
rng / pd.NaT
with pytest.raises(TypeError, match="Cannot divide NaTType by"):
pd.NaT / rng
Expand Down
46 changes: 7 additions & 39 deletions pandas/tests/scalar/period/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -1298,23 +1298,13 @@ def test_add_offset_nat(self):
timedelta(365),
]:
assert p + o is NaT

if isinstance(o, np.timedelta64):
with pytest.raises(TypeError):
o + p
else:
assert o + p is NaT
assert o + p is NaT

for freq in ["M", "2M", "3M"]:
p = Period("NaT", freq=freq)
for o in [offsets.MonthEnd(2), offsets.MonthEnd(12)]:
assert p + o is NaT

if isinstance(o, np.timedelta64):
with pytest.raises(TypeError):
o + p
else:
assert o + p is NaT
assert o + p is NaT

for o in [
offsets.YearBegin(2),
Expand All @@ -1324,12 +1314,7 @@ def test_add_offset_nat(self):
timedelta(365),
]:
assert p + o is NaT

if isinstance(o, np.timedelta64):
with pytest.raises(TypeError):
o + p
else:
assert o + p is NaT
assert o + p is NaT

# freq is Tick
for freq in ["D", "2D", "3D"]:
Expand All @@ -1343,12 +1328,7 @@ def test_add_offset_nat(self):
timedelta(hours=48),
]:
assert p + o is NaT

if isinstance(o, np.timedelta64):
with pytest.raises(TypeError):
o + p
else:
assert o + p is NaT
assert o + p is NaT

for o in [
offsets.YearBegin(2),
Expand All @@ -1358,12 +1338,7 @@ def test_add_offset_nat(self):
timedelta(hours=23),
]:
assert p + o is NaT

if isinstance(o, np.timedelta64):
with pytest.raises(TypeError):
o + p
else:
assert o + p is NaT
assert o + p is NaT

for freq in ["H", "2H", "3H"]:
p = Period("NaT", freq=freq)
Expand All @@ -1376,9 +1351,7 @@ def test_add_offset_nat(self):
timedelta(days=4, minutes=180),
]:
assert p + o is NaT

if not isinstance(o, np.timedelta64):
assert o + p is NaT
assert o + p is NaT

for o in [
offsets.YearBegin(2),
Expand All @@ -1388,12 +1361,7 @@ def test_add_offset_nat(self):
timedelta(hours=23, minutes=30),
]:
assert p + o is NaT

if isinstance(o, np.timedelta64):
with pytest.raises(TypeError):
o + p
else:
assert o + p is NaT
assert o + p is NaT

def test_sub_offset(self):
# freq is DateOffset
Expand Down
47 changes: 45 additions & 2 deletions pandas/tests/scalar/test_nat.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from datetime import datetime, timedelta
import operator

import numpy as np
import pytest
Expand All @@ -21,6 +22,7 @@
isna,
)
from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray
from pandas.core.ops import roperator
from pandas.util import testing as tm


Expand Down Expand Up @@ -333,8 +335,9 @@ def test_nat_doc_strings(compare):
"value,val_type",
[
(2, "scalar"),
(1.5, "scalar"),
(np.nan, "scalar"),
(1.5, "floating"),
(np.nan, "floating"),
("foo", "str"),
(timedelta(3600), "timedelta"),
(Timedelta("5s"), "timedelta"),
(datetime(2014, 1, 1), "timestamp"),
Expand All @@ -348,6 +351,14 @@ def test_nat_arithmetic_scalar(op_name, value, val_type):
# see gh-6873
invalid_ops = {
"scalar": {"right_div_left"},
"floating": {
"right_div_left",
"left_minus_right",
"right_minus_left",
"left_plus_right",
"right_plus_left",
},
"str": set(_ops.keys()),
"timedelta": {"left_times_right", "right_times_left"},
"timestamp": {
"left_times_right",
Expand All @@ -366,6 +377,16 @@ def test_nat_arithmetic_scalar(op_name, value, val_type):
and isinstance(value, Timedelta)
):
msg = "Cannot multiply"
elif val_type == "str":
# un-specific check here because the message comes from str
# and varies by method
msg = (
"can only concatenate str|"
"unsupported operand type|"
"can't multiply sequence|"
"Can't convert 'NaTType'|"
"must be str, not NaTType"
)
else:
msg = "unsupported operand type"

Expand Down Expand Up @@ -435,6 +456,28 @@ def test_nat_arithmetic_td64_vector(op_name, box):
tm.assert_equal(_ops[op_name](vec, NaT), box_nat)


@pytest.mark.parametrize(
"dtype,op,out_dtype",
[
("datetime64[ns]", operator.add, "datetime64[ns]"),
("datetime64[ns]", roperator.radd, "datetime64[ns]"),
("datetime64[ns]", operator.sub, "timedelta64[ns]"),
("datetime64[ns]", roperator.rsub, "timedelta64[ns]"),
("timedelta64[ns]", operator.add, "datetime64[ns]"),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't these last for yield timedelta64[ns]?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No.

dt64, radd --> dt64 + Nat --> only valid if NaT is treated as td64 --> dt64
dt64, sub --> NaT - dt64 --> only valid if NaT is treated as dt64 --> td64
dt64, rsub --> dt64 - NaT --> valid either way, default to treat as dt64 --> td64
td64, add --> NaT + td64 --> valid either way, defalt to treat as dt64 --> dt64

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, yeah these rules are ok, but if you can put them down somewhere explicity in the code

("timedelta64[ns]", roperator.radd, "datetime64[ns]"),
("timedelta64[ns]", operator.sub, "datetime64[ns]"),
("timedelta64[ns]", roperator.rsub, "timedelta64[ns]"),
],
)
def test_nat_arithmetic_ndarray(dtype, op, out_dtype):
other = np.arange(10).astype(dtype)
result = op(NaT, other)

expected = np.empty(other.shape, dtype=out_dtype)
expected.fill("NaT")
tm.assert_numpy_array_equal(result, expected)


def test_nat_pinned_docstrings():
# see gh-17327
assert NaT.ctime.__doc__ == datetime.ctime.__doc__
Expand Down