Skip to content

BUG: fix+test PA+all-NaT TDA #27739

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 4, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -714,7 +714,12 @@ def _add_delta_tdi(self, other):
"""
assert isinstance(self.freq, Tick) # checked by calling function

delta = self._check_timedeltalike_freq_compat(other)
if not np.all(isna(other)):
delta = self._check_timedeltalike_freq_compat(other)
else:
# all-NaT TimedeltaIndex is equivalent to a single scalar td64 NaT
return self + np.timedelta64("NaT")

return self._addsub_int_array(delta, operator.add).asi8

def _add_delta(self, other):
Expand Down
53 changes: 21 additions & 32 deletions pandas/core/ops/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"""
import datetime
import operator
from typing import Any, Callable
from typing import Any, Callable, Tuple

import numpy as np

Expand Down Expand Up @@ -42,7 +42,6 @@
ABCSeries,
ABCSparseArray,
ABCSparseSeries,
ABCTimedeltaArray,
)
from pandas.core.dtypes.missing import isna, notna

Expand Down Expand Up @@ -134,14 +133,15 @@ def _maybe_match_name(a, b):
return None


def maybe_upcast_for_op(obj):
def maybe_upcast_for_op(obj, shape: Tuple[int, ...]):
"""
Cast non-pandas objects to pandas types to unify behavior of arithmetic
and comparison operations.

Parameters
----------
obj: object
shape : tuple[int]

Returns
-------
Expand All @@ -157,13 +157,22 @@ def maybe_upcast_for_op(obj):
# implementation; otherwise operation against numeric-dtype
# raises TypeError
return Timedelta(obj)
elif isinstance(obj, np.timedelta64) and not isna(obj):
elif isinstance(obj, np.timedelta64):
if isna(obj):
# wrapping timedelta64("NaT") in Timedelta returns NaT,
# which would incorrectly be treated as a datetime-NaT, so
# we broadcast and wrap in a Series
right = np.broadcast_to(obj, shape)

# Note: we use Series instead of TimedeltaIndex to avoid having
# to worry about catching NullFrequencyError.
return pd.Series(right)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rather than this can you make a safe method that deals with null frequency, this is a bit odd

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This basically is the safe method to deal with null frequency. I think there will be room to simplify it after we merge dispatch_to_index_op into dispatch_to_extension_op

(I agree that I'd prefer these methods not need to know about Series)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok fair enough, maybe make an issue about this? (or just add to your list) :->


# In particular non-nanosecond timedelta64 needs to be cast to
# nanoseconds, or else we get undesired behavior like
# np.timedelta64(3, 'D') / 2 == np.timedelta64(1, 'D')
# The isna check is to avoid casting timedelta64("NaT"), which would
# return NaT and incorrectly be treated as a datetime-NaT.
return Timedelta(obj)

elif isinstance(obj, np.ndarray) and is_timedelta64_dtype(obj):
# GH#22390 Unfortunately we need to special-case right-hand
# timedelta64 dtypes because numpy casts integer dtypes to
Expand Down Expand Up @@ -975,7 +984,7 @@ def wrapper(left, right):

left, right = _align_method_SERIES(left, right)
res_name = get_op_result_name(left, right)
right = maybe_upcast_for_op(right)
right = maybe_upcast_for_op(right, left.shape)

if is_categorical_dtype(left):
raise TypeError(
Expand Down Expand Up @@ -1003,31 +1012,11 @@ def wrapper(left, right):
return construct_result(left, result, index=left.index, name=res_name)

elif is_timedelta64_dtype(right):
# We should only get here with non-scalar or timedelta64('NaT')
# values for right
# Note: we cannot use dispatch_to_index_op because
# that may incorrectly raise TypeError when we
# should get NullFrequencyError
orig_right = right
if is_scalar(right):
# broadcast and wrap in a TimedeltaIndex
assert np.isnat(right)
right = np.broadcast_to(right, left.shape)
right = pd.TimedeltaIndex(right)

assert isinstance(right, (pd.TimedeltaIndex, ABCTimedeltaArray, ABCSeries))
try:
result = op(left._values, right)
except NullFrequencyError:
if orig_right is not right:
# i.e. scalar timedelta64('NaT')
# We get a NullFrequencyError because we broadcast to
# TimedeltaIndex, but this should be TypeError.
raise TypeError(
"incompatible type for a datetime/timedelta "
"operation [{name}]".format(name=op.__name__)
)
raise
# We should only get here with non-scalar values for right
# upcast by maybe_upcast_for_op
assert not isinstance(right, (np.timedelta64, np.ndarray))

result = op(left._values, right)

# We do not pass dtype to ensure that the Series constructor
# does inference in the case where `result` has object-dtype.
Expand Down
28 changes: 28 additions & 0 deletions pandas/tests/arithmetic/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import pandas as pd
from pandas import Period, PeriodIndex, Series, period_range
from pandas.core import ops
from pandas.core.arrays import TimedeltaArray
import pandas.util.testing as tm

from pandas.tseries.frequencies import to_offset
Expand Down Expand Up @@ -1013,6 +1014,33 @@ def test_parr_add_sub_td64_nat(self, box_transpose_fail):
with pytest.raises(TypeError):
other - obj

@pytest.mark.parametrize(
"other",
[
np.array(["NaT"] * 9, dtype="m8[ns]"),
TimedeltaArray._from_sequence(["NaT"] * 9),
],
)
def test_parr_add_sub_tdt64_nat_array(self, box_df_fail, other):
# FIXME: DataFrame fails because when when operating column-wise
# timedelta64 entries become NaT and are treated like datetimes
box = box_df_fail

pi = pd.period_range("1994-04-01", periods=9, freq="19D")
expected = pd.PeriodIndex(["NaT"] * 9, freq="19D")

obj = tm.box_expected(pi, box)
expected = tm.box_expected(expected, box)

result = obj + other
tm.assert_equal(result, expected)
result = other + obj
tm.assert_equal(result, expected)
result = obj - other
tm.assert_equal(result, expected)
with pytest.raises(TypeError):
other - obj


class TestPeriodSeriesArithmetic:
def test_ops_series_timedelta(self):
Expand Down