From 1043a205d197739500d6c75a9dbc40abf713837e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 15 Jul 2019 18:36:16 -0700 Subject: [PATCH 1/5] stop conflating iNaT with td64-NaT --- pandas/core/internals/blocks.py | 21 +++++++-------------- pandas/core/nanops.py | 8 ++++++++ pandas/tests/series/test_missing.py | 7 +++++-- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 897a82f9a1968..c0d3368c652ec 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2597,6 +2597,7 @@ class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock): is_timedelta = True _can_hold_na = True is_numeric = False + fill_value = np.timedelta64("NaT", "ns") def __init__(self, values, placement, ndim=None): if values.dtype != _TD_DTYPE: @@ -2617,15 +2618,11 @@ def _box_func(self): def _can_hold_element(self, element): tipo = maybe_infer_dtype_type(element) if tipo is not None: - # TODO: remove the np.int64 support once coerce_values and - # _try_coerce_args both coerce to m8[ns] and not i8. - return issubclass(tipo.type, (np.timedelta64, np.int64)) + return issubclass(tipo.type, np.timedelta64) elif element is NaT: return True elif isinstance(element, (timedelta, np.timedelta64)): return True - elif is_integer(element): - return element == tslibs.iNaT return is_valid_nat_for_dtype(element, self.dtype) def fillna(self, value, **kwargs): @@ -2645,9 +2642,6 @@ def fillna(self, value, **kwargs): value = Timedelta(value, unit="s") return super().fillna(value, **kwargs) - def _coerce_values(self, values): - return values.view("i8") - def _try_coerce_args(self, other): """ Coerce values and other to int64, with null values converted to @@ -2663,13 +2657,12 @@ def _try_coerce_args(self, other): """ if is_valid_nat_for_dtype(other, self.dtype): - other = tslibs.iNaT - elif is_integer(other) and other == tslibs.iNaT: - pass + other = np.timedelta64("NaT", "ns") elif isinstance(other, (timedelta, np.timedelta64)): - other = Timedelta(other).value + other = Timedelta(other).to_timedelta64() elif hasattr(other, "dtype") and is_timedelta64_dtype(other): - other = other.astype("i8", copy=False).view("i8") + # TODO: can we get here with non-nano dtype? + pass else: # coercion issues # let higher levels handle @@ -2683,7 +2676,7 @@ def _try_coerce_result(self, result): mask = isna(result) if result.dtype.kind in ["i", "f"]: result = result.astype("m8[ns]") - result[mask] = tslibs.iNaT + result[mask] = np.timedelta64("NaT", "ns") elif isinstance(result, (np.integer, np.float)): result = self._box_func(result) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index ce14cb22a88ce..aa255d03f9db7 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1362,6 +1362,14 @@ def _nanpercentile_1d(values, mask, q, na_value, interpolation): quantiles : scalar or array """ # mask is Union[ExtensionArray, ndarray] + if values.dtype.kind == "m": + # need to cast to integer to avoid rounding errors in numpy + result = _nanpercentile_1d(values.view("i8"), mask, q, na_value, interpolation) + + # Note: we have to do do `astype` and not view because in general we + # have float result at this point, not i8 + return result.astype(values.dtype) + values = values[~mask] if len(values) == 0: diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index f8a44b7f5639e..adb23fc6b94ea 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -780,9 +780,11 @@ def test_timedelta64_nan(self): td1[0] = td[0] assert not isna(td1[0]) + # GH#16674 iNaT is treated as an integer when given by the user td1[1] = iNaT - assert isna(td1[1]) - assert td1[1].value == iNaT + assert not isna(td1[1]) + assert td1.dtype == np.object_ + assert td1[1] == iNaT td1[1] = td[1] assert not isna(td1[1]) @@ -792,6 +794,7 @@ def test_timedelta64_nan(self): td1[2] = td[2] assert not isna(td1[2]) + # FIXME: don't leave commented-out # boolean setting # this doesn't work, not sure numpy even supports it # result = td[(td>np.timedelta64(timedelta(days=3))) & From f35754c12cba5245d939bf633623571535763d49 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 16 Jul 2019 17:01:12 -0700 Subject: [PATCH 2/5] dont allow iNaT in DatetimeBlock --- pandas/core/internals/blocks.py | 31 ++++++++---------------- pandas/core/nanops.py | 18 ++++++++------ pandas/tests/frame/test_indexing.py | 7 ++++-- pandas/tests/internals/test_internals.py | 2 +- 4 files changed, 26 insertions(+), 32 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index c0d3368c652ec..722acbcbb5b59 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2180,7 +2180,7 @@ def _holder(self): @property def fill_value(self): - return tslibs.iNaT + return np.datetime64("NaT", "ns") def get_values(self, dtype=None): """ @@ -2257,14 +2257,9 @@ def _can_hold_element(self, element): if self.is_datetimetz: return tz_compare(element.tzinfo, self.dtype.tz) return element.tzinfo is None - elif is_integer(element): - return element == tslibs.iNaT return is_valid_nat_for_dtype(element, self.dtype) - def _coerce_values(self, values): - return values.view("i8") - def _try_coerce_args(self, other): """ Coerce other to dtype 'i8'. NaN and NaT convert to @@ -2281,16 +2276,15 @@ def _try_coerce_args(self, other): base-type other """ if is_valid_nat_for_dtype(other, self.dtype): - other = tslibs.iNaT - elif is_integer(other) and other == tslibs.iNaT: - pass + other = np.datetime64("NaT", "ns") elif isinstance(other, (datetime, np.datetime64, date)): other = self._box_func(other) if getattr(other, "tz") is not None: raise TypeError("cannot coerce a Timestamp with a tz on a naive Block") - other = other.asm8.view("i8") + other = other.asm8 elif hasattr(other, "dtype") and is_datetime64_dtype(other): - other = other.astype("i8", copy=False).view("i8") + # TODO: can we get here with non-nano? + pass else: # coercion issues # let higher levels handle @@ -2449,8 +2443,7 @@ def _slice(self, slicer): return self.values[slicer] def _coerce_values(self, values): - # asi8 is a view, needs copy - return _block_shape(values.view("i8"), ndim=self.ndim) + return _block_shape(values, ndim=self.ndim) def _try_coerce_args(self, other): """ @@ -2475,21 +2468,17 @@ def _try_coerce_args(self, other): other = self._holder(other, dtype=self.dtype) elif is_valid_nat_for_dtype(other, self.dtype): - other = tslibs.iNaT - elif is_integer(other) and other == tslibs.iNaT: - pass + other = np.datetime64("NaT", "ns") elif isinstance(other, self._holder): - if other.tz != self.values.tz: + if not tz_compare(other.tz, self.values.tz): raise ValueError("incompatible or non tz-aware value") - other = _block_shape(other.asi8, ndim=self.ndim) + elif isinstance(other, (np.datetime64, datetime, date)): other = tslibs.Timestamp(other) - tz = getattr(other, "tz", None) # test we can have an equal time zone - if tz is None or str(tz) != str(self.values.tz): + if not tz_compare(other.tz, self.values.tz): raise ValueError("incompatible or non tz-aware value") - other = other.value else: raise TypeError(other) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index aa255d03f9db7..507bfa2345e04 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1362,14 +1362,6 @@ def _nanpercentile_1d(values, mask, q, na_value, interpolation): quantiles : scalar or array """ # mask is Union[ExtensionArray, ndarray] - if values.dtype.kind == "m": - # need to cast to integer to avoid rounding errors in numpy - result = _nanpercentile_1d(values.view("i8"), mask, q, na_value, interpolation) - - # Note: we have to do do `astype` and not view because in general we - # have float result at this point, not i8 - return result.astype(values.dtype) - values = values[~mask] if len(values) == 0: @@ -1401,6 +1393,16 @@ def nanpercentile(values, q, axis, na_value, mask, ndim, interpolation): ------- quantiles : scalar or array """ + if values.dtype.kind in ["m", "M"]: + # need to cast to integer to avoid rounding errors in numpy + result = nanpercentile( + values.view("i8"), q, axis, na_value.view("i8"), mask, ndim, interpolation + ) + + # Note: we have to do do `astype` and not view because in general we + # have float result at this point, not i8 + return result.astype(values.dtype) + if not lib.is_scalar(mask) and mask.any(): if ndim == 1: return _nanpercentile_1d( diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 0cb7db0e47123..756a6159fc7c5 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -1150,6 +1150,7 @@ def test_fancy_index_int_labels_exceptions(self, float_frame): with pytest.raises(KeyError, match=msg): float_frame.ix[:, ["E"]] = 1 + # FIXME: don't leave commented-out # partial setting now allows this GH2578 # pytest.raises(KeyError, float_frame.ix.__setitem__, # (slice(None, None), 'E'), 1) @@ -1676,9 +1677,11 @@ def test_setitem_single_column_mixed_datetime(self): ) assert_series_equal(result, expected) - # set an allowable datetime64 type + # GH#16674 iNaT is treated as an integer when given by the user df.loc["b", "timestamp"] = iNaT - assert isna(df.loc["b", "timestamp"]) + assert not isna(df.loc["b", "timestamp"]) + assert df["timestamp"].dtype == np.object_ + assert df.loc["b", "timestamp"] == iNaT # allow this syntax df.loc["c", "timestamp"] = np.nan diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 655e484bc34d1..ae572ae1df8a2 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -338,7 +338,7 @@ def test_try_coerce_arg(self): vals = (np.datetime64("2010-10-10"), datetime(2010, 10, 10), date(2010, 10, 10)) for val in vals: coerced = block._try_coerce_args(val) - assert np.int64 == type(coerced) + assert np.datetime64 == type(coerced) assert pd.Timestamp("2010-10-10") == pd.Timestamp(coerced) From 0a4ed9c24d28b6d28c42f8349f1cea325083b9b3 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 16 Jul 2019 17:03:05 -0700 Subject: [PATCH 3/5] fix docstring --- pandas/core/internals/blocks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 722acbcbb5b59..d1b6f0f44bc4b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2633,8 +2633,8 @@ def fillna(self, value, **kwargs): def _try_coerce_args(self, other): """ - Coerce values and other to int64, with null values converted to - iNaT. values is always ndarray-like, other may not be + Coerce values and other to datetime64[ns], with null values + converted to datetime64("NaT", "ns"). Parameters ---------- From 613a07fdc4a14a158900326f82929b36ae3f3af8 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 22 Jul 2019 14:50:49 -0700 Subject: [PATCH 4/5] remove duplicate dispatch --- pandas/core/nanops.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 61f056b5fb22f..4db0f75586ead 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1360,14 +1360,6 @@ def _nanpercentile_1d(values, mask, q, na_value, interpolation): quantiles : scalar or array """ # mask is Union[ExtensionArray, ndarray] - if values.dtype.kind == "m": - # need to cast to integer to avoid rounding errors in numpy - result = _nanpercentile_1d(values.view("i8"), mask, q, na_value, interpolation) - - # Note: we have to do do `astype` and not view because in general we - # have float result at this point, not i8 - return result.astype(values.dtype) - values = values[~mask] if len(values) == 0: From c44f7dd11acf19b65a635461d3cb94d9a31f7ba6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 22 Jul 2019 16:11:43 -0700 Subject: [PATCH 5/5] dummy commit to force CI