From e28cd033d0de4acbe8e0aaa06646698cdc435fa4 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 11 Feb 2021 19:59:48 -0800 Subject: [PATCH 1/2] BUG: incorrectly accepting datetime64(nat) for dt64tz --- pandas/core/arrays/datetimelike.py | 6 +++ pandas/core/arrays/datetimes.py | 6 +-- pandas/core/arrays/interval.py | 9 ++++- pandas/core/dtypes/missing.py | 6 ++- pandas/core/internals/blocks.py | 3 +- pandas/tests/indexes/datetimes/test_insert.py | 4 ++ pandas/tests/series/indexing/test_indexing.py | 2 +- pandas/tests/series/indexing/test_setitem.py | 37 +++++++++++++++++++ 8 files changed, 64 insertions(+), 9 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 162a69370bc61..c77b6c9a87fbb 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -562,6 +562,12 @@ def _validate_scalar( # GH#18295 value = NaT + elif isna(value): + # if we are dt64tz and value is dt64("NaT"), dont cast to NaT, + # or else we'll fail to raise in _unbox_scalar + msg = self._validation_error_message(value, allow_listlike) + raise TypeError(msg) + elif isinstance(value, self._recognized_scalars): # error: Too many arguments for "object" [call-arg] value = self._scalar_type(value) # type: ignore[call-arg] diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 144a7186f5826..70c2015c6d41c 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -464,10 +464,8 @@ def _generate_range( def _unbox_scalar(self, value, setitem: bool = False) -> np.datetime64: if not isinstance(value, self._scalar_type) and value is not NaT: raise ValueError("'value' should be a Timestamp.") - if not isna(value): - self._check_compatible_with(value, setitem=setitem) - return value.asm8 - return np.datetime64(value.value, "ns") + self._check_compatible_with(value, setitem=setitem) + return value.asm8 def _scalar_from_string(self, value): return Timestamp(value, tz=self.tz) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 0f3e028c34c05..d92c10a2a4a1b 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -9,6 +9,7 @@ from pandas._config import get_option +from pandas._libs import NaT from pandas._libs.interval import ( VALID_CLOSED, Interval, @@ -23,7 +24,8 @@ from pandas.core.dtypes.cast import maybe_convert_platform from pandas.core.dtypes.common import ( is_categorical_dtype, - is_datetime64_any_dtype, + is_datetime64_dtype, + is_datetime64tz_dtype, is_dtype_equal, is_float_dtype, is_integer_dtype, @@ -999,9 +1001,12 @@ def _validate_setitem_value(self, value): if is_integer_dtype(self.dtype.subtype): # can't set NaN on a numpy integer array needs_float_conversion = True - elif is_datetime64_any_dtype(self.dtype.subtype): + elif is_datetime64_dtype(self.dtype.subtype): # need proper NaT to set directly on the numpy array value = np.datetime64("NaT") + elif is_datetime64tz_dtype(self.dtype.subtype): + # need proper NaT to set directly on the DatetimeArray array + value = NaT elif is_timedelta64_dtype(self.dtype.subtype): # need proper NaT to set directly on the numpy array value = np.timedelta64("NaT") diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index ef645313de614..7ebbbdc9ce7f9 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -604,7 +604,11 @@ def is_valid_na_for_dtype(obj, dtype: DtypeObj) -> bool: if not lib.is_scalar(obj) or not isna(obj): return False if dtype.kind == "M": - return not isinstance(obj, np.timedelta64) + if isinstance(dtype, np.dtype): + # i.e. not tzaware + return not isinstance(obj, np.timedelta64) + # we have to rule out tznaive dt64("NaT") + return not isinstance(obj, (np.timedelta64, np.datetime64)) if dtype.kind == "m": return not isinstance(obj, np.datetime64) if dtype.kind in ["i", "u", "f", "c"]: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index e99caac9eaace..51af97458d751 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -8,6 +8,7 @@ from pandas._libs import ( Interval, + NaT, Period, Timestamp, algos as libalgos, @@ -2102,7 +2103,7 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock): _can_hold_element = DatetimeBlock._can_hold_element to_native_types = DatetimeBlock.to_native_types diff = DatetimeBlock.diff - fill_value = np.datetime64("NaT", "ns") + fill_value = NaT where = DatetimeBlock.where putmask = DatetimeLikeBlockMixin.putmask diff --git a/pandas/tests/indexes/datetimes/test_insert.py b/pandas/tests/indexes/datetimes/test_insert.py index 684c6b813b48f..6dbd1287b7306 100644 --- a/pandas/tests/indexes/datetimes/test_insert.py +++ b/pandas/tests/indexes/datetimes/test_insert.py @@ -13,8 +13,12 @@ class TestInsert: @pytest.mark.parametrize("tz", [None, "UTC", "US/Eastern"]) def test_insert_nat(self, tz, null): # GH#16537, GH#18295 (test missing) + idx = DatetimeIndex(["2017-01-01"], tz=tz) expected = DatetimeIndex(["NaT", "2017-01-01"], tz=tz) + if tz is not None and isinstance(null, np.datetime64): + expected = Index([null, idx[0]], dtype=object) + res = idx.insert(0, null) tm.assert_index_equal(res, expected) diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index d97410562083c..e047317acd24d 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -559,7 +559,7 @@ def test_dt64_series_assign_nat(nat_val, tz, indexer_sli): base = Series(dti) expected = Series([pd.NaT] + list(dti[1:]), dtype=dti.dtype) - should_cast = nat_val is pd.NaT or base.dtype.kind == nat_val.dtype.kind + should_cast = nat_val is pd.NaT or base.dtype == nat_val.dtype if not should_cast: expected = expected.astype(object) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 3a9ec0948b29a..36948c3dc05f3 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -671,6 +671,43 @@ def key(self): return 0 +class TestSetitemNADatetime64Dtype(SetitemCastingEquivalents): + # some nat-like values should be cast to datetime64 when inserting + # into a datetime64 series. Others should coerce to object + # and retain their dtypes. + + @pytest.fixture(params=[None, "UTC", "US/Central"]) + def obj(self, request): + tz = request.param + dti = date_range("2016-01-01", periods=3, tz=tz) + return Series(dti) + + @pytest.fixture( + params=[NaT, np.timedelta64("NaT", "ns"), np.datetime64("NaT", "ns")] + ) + def val(self, request): + return request.param + + @pytest.fixture + def is_inplace(self, val, obj): + if obj._values.tz is None: + # cast to object iff val is timedelta64("NaT") + return val is NaT or val.dtype.kind == "M" + + # otherwise we have to exclude tznaive dt64("NaT") + return val is NaT + + @pytest.fixture + def expected(self, obj, val, is_inplace): + dtype = obj.dtype if is_inplace else object + expected = Series([val] + list(obj[1:]), dtype=dtype) + return expected + + @pytest.fixture + def key(self): + return 0 + + class TestSetitemMismatchedTZCastsToObject(SetitemCastingEquivalents): # GH#24024 @pytest.fixture From 2ff2f10b81ea92b1b9d1a67fa08a71e7c9eae618 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 12 Feb 2021 10:06:55 -0800 Subject: [PATCH 2/2] Whatsnew --- doc/source/whatsnew/v1.3.0.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 99ae60859b68c..4fe033b2c4344 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -346,7 +346,9 @@ Indexing - Bug in setting ``timedelta64`` or ``datetime64`` values into numeric :class:`Series` failing to cast to object dtype (:issue:`39086`, issue:`39619`) - Bug in setting :class:`Interval` values into a :class:`Series` or :class:`DataFrame` with mismatched :class:`IntervalDtype` incorrectly casting the new values to the existing dtype (:issue:`39120`) - Bug in setting ``datetime64`` values into a :class:`Series` with integer-dtype incorrect casting the datetime64 values to integers (:issue:`39266`) +- Bug in setting ``np.datetime64("NaT")`` into a :class:`Series` with :class:`Datetime64TZDtype` incorrectly treating the timezone-naive value as timezone-aware (:issue:`39769`) - Bug in :meth:`Index.get_loc` not raising ``KeyError`` when method is specified for ``NaN`` value when ``NaN`` is not in :class:`Index` (:issue:`39382`) +- Bug in :meth:`DatetimeIndex.insert` when inserting ``np.datetime64("NaT")`` into a timezone-aware index incorrectly treating the timezone-naive value as timezone-aware (:issue:`39769`) - Bug in incorrectly raising in :meth:`Index.insert`, when setting a new column that cannot be held in the existing ``frame.columns``, or in :meth:`Series.reset_index` or :meth:`DataFrame.reset_index` instead of casting to a compatible dtype (:issue:`39068`) - Bug in :meth:`RangeIndex.append` where a single object of length 1 was concatenated incorrectly (:issue:`39401`) - Bug in setting ``numpy.timedelta64`` values into an object-dtype :class:`Series` using a boolean indexer (:issue:`39488`)