Skip to content

REF: stop allowing iNaT in TimedeltaBlock methods #27411

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 22, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 7 additions & 14 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2597,6 +2597,7 @@ class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock):
is_timedelta = True
_can_hold_na = True
is_numeric = False
fill_value = np.timedelta64("NaT", "ns")

def __init__(self, values, placement, ndim=None):
if values.dtype != _TD_DTYPE:
Expand All @@ -2617,15 +2618,11 @@ def _box_func(self):
def _can_hold_element(self, element):
tipo = maybe_infer_dtype_type(element)
if tipo is not None:
# TODO: remove the np.int64 support once coerce_values and
# _try_coerce_args both coerce to m8[ns] and not i8.
return issubclass(tipo.type, (np.timedelta64, np.int64))
return issubclass(tipo.type, np.timedelta64)
elif element is NaT:
return True
elif isinstance(element, (timedelta, np.timedelta64)):
return True
elif is_integer(element):
return element == tslibs.iNaT
return is_valid_nat_for_dtype(element, self.dtype)

def fillna(self, value, **kwargs):
Expand All @@ -2645,9 +2642,6 @@ def fillna(self, value, **kwargs):
value = Timedelta(value, unit="s")
return super().fillna(value, **kwargs)

def _coerce_values(self, values):
return values.view("i8")

def _try_coerce_args(self, other):
"""
Coerce values and other to int64, with null values converted to
Expand All @@ -2663,13 +2657,12 @@ def _try_coerce_args(self, other):
"""

if is_valid_nat_for_dtype(other, self.dtype):
other = tslibs.iNaT
elif is_integer(other) and other == tslibs.iNaT:
pass
other = np.timedelta64("NaT", "ns")
elif isinstance(other, (timedelta, np.timedelta64)):
other = Timedelta(other).value
other = Timedelta(other).to_timedelta64()
elif hasattr(other, "dtype") and is_timedelta64_dtype(other):
other = other.astype("i8", copy=False).view("i8")
# TODO: can we get here with non-nano dtype?
pass
else:
# coercion issues
# let higher levels handle
Expand All @@ -2683,7 +2676,7 @@ def _try_coerce_result(self, result):
mask = isna(result)
if result.dtype.kind in ["i", "f"]:
result = result.astype("m8[ns]")
result[mask] = tslibs.iNaT
result[mask] = np.timedelta64("NaT", "ns")

elif isinstance(result, (np.integer, np.float)):
result = self._box_func(result)
Expand Down
8 changes: 8 additions & 0 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1362,6 +1362,14 @@ def _nanpercentile_1d(values, mask, q, na_value, interpolation):
quantiles : scalar or array
"""
# mask is Union[ExtensionArray, ndarray]
if values.dtype.kind == "m":
# need to cast to integer to avoid rounding errors in numpy
result = _nanpercentile_1d(values.view("i8"), mask, q, na_value, interpolation)

# Note: we have to do do `astype` and not view because in general we
# have float result at this point, not i8
return result.astype(values.dtype)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are you not letting this then fall thru, e.g. if there is masking here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we just return result here, at this point it is in general float64. In the calling function we're going to do a result.view(values.dtype), which will be incorrect

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right my point is to do this later in the function after masking and checking for 0-len.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just tried that, breaks because it ends up double-masking

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i wouldnt hang up on this too much, since we're going to have to move this to earlier in the call stack in #27428

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, if you can clean this up later would be great.


values = values[~mask]

if len(values) == 0:
Expand Down
7 changes: 5 additions & 2 deletions pandas/tests/series/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -780,9 +780,11 @@ def test_timedelta64_nan(self):
td1[0] = td[0]
assert not isna(td1[0])

# GH#16674 iNaT is treated as an integer when given by the user
td1[1] = iNaT
assert isna(td1[1])
assert td1[1].value == iNaT
assert not isna(td1[1])
assert td1.dtype == np.object_
assert td1[1] == iNaT
td1[1] = td[1]
assert not isna(td1[1])

Expand All @@ -792,6 +794,7 @@ def test_timedelta64_nan(self):
td1[2] = td[2]
assert not isna(td1[2])

# FIXME: don't leave commented-out
# boolean setting
# this doesn't work, not sure numpy even supports it
# result = td[(td>np.timedelta64(timedelta(days=3))) &
Expand Down