From 4a862dd2f42c945c148d3a8443a804a2f75ad997 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 10 Jul 2019 17:05:23 -0700 Subject: [PATCH 1/6] BUG: fix+test assigning invalid NAT-like to DTA/TDA/PA --- pandas/core/arrays/datetimelike.py | 26 ++++++++++++++++- pandas/tests/arrays/test_datetimelike.py | 36 ++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 540442b7eaed4..b9925c1fe3c38 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -492,7 +492,7 @@ def __setitem__( elif isinstance(value, self._scalar_type): self._check_compatible_with(value) value = self._unbox_scalar(value) - elif isna(value) or value == iNaT: + elif is_valid_na(value, self.dtype) or value == iNaT: value = iNaT else: msg = ( @@ -1679,3 +1679,27 @@ def _ensure_datetimelike_to_i8(other, to_utc=False): # period array cannot be coerced to int other = Index(other) return other.asi8 + + +def is_valid_na(obj, dtype): + """ + isna check that excludes incompatible dtypes + + Parameters + ---------- + obj : object + dtype : np.datetime64, np.timedelta64, DatetimeTZDtype, or PeriodDtype + + Returns + ------- + bool + """ + if not isna(obj): + return False + if dtype.kind == "M": + return not isinstance(obj, np.timedelta64) + if dtype.kind == "m": + return not isinstance(obj, np.datetime64) + + # must be PeriodDType + return not isinstance(obj, (np.datetime64, np.timedelta64)) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 34fae1f4b1ab4..d7219ec46a2c7 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -651,3 +651,39 @@ def test_array_interface(self, period_index): result = np.asarray(arr, dtype="S20") expected = np.asarray(arr).astype("S20") tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "array", + [ + pd.TimedeltaIndex(["1 Day", "3 Hours", "NaT"])._data, + pd.date_range("2000-01-01", periods=3, freq="D")._data, + pd.period_range("2000-01-01", periods=3, freq="D")._data, + ], + ids=lambda x: type(x).__name__, +) +def test_nat_assignment_array(array): + expected = type(array)._from_sequence([pd.NaT, array[1], array[2]]) + + all_nats = [pd.NaT, np.timedelta64("NaT", "ns"), np.datetime64("NaT", "ns")] + casting_nats = { + TimedeltaArray: [pd.NaT, np.timedelta64("NaT", "ns")], + DatetimeArray: [pd.NaT, np.datetime64("NaT", "ns")], + PeriodArray: [pd.NaT], + }[type(array)] + non_casting_nats = { + TimedeltaArray: [np.datetime64("NaT", "ns")], + DatetimeArray: [np.timedelta64("NaT", "ns")], + PeriodArray: [np.timedelta64("NaT", "ns"), np.datetime64("NaT", "ns")], + }[type(array)] + + for nat in casting_nats: + arr = array.copy() + arr[0] = nat + + tm.assert_equal(arr, expected) + + for nat in non_casting_nats: + arr = array.copy() + with pytest.raises(TypeError): + arr[0] = nat From 74a425499d8d2dce898050ed2d2289e8641b2b32 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 10 Jul 2019 18:14:49 -0700 Subject: [PATCH 2/6] flake8 fixup --- pandas/tests/arrays/test_datetimelike.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index d7219ec46a2c7..7837ccedb32cc 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -665,7 +665,6 @@ def test_array_interface(self, period_index): def test_nat_assignment_array(array): expected = type(array)._from_sequence([pd.NaT, array[1], array[2]]) - all_nats = [pd.NaT, np.timedelta64("NaT", "ns"), np.datetime64("NaT", "ns")] casting_nats = { TimedeltaArray: [pd.NaT, np.timedelta64("NaT", "ns")], DatetimeArray: [pd.NaT, np.datetime64("NaT", "ns")], From 20eca07a4bbeea014ea7c8ebb9ec0e6fc9c212fd Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 10 Jul 2019 18:20:16 -0700 Subject: [PATCH 3/6] Avoid numpy deprecation warning --- pandas/core/arrays/datetimelike.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index b9925c1fe3c38..b3888c3659ad0 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -492,7 +492,10 @@ def __setitem__( elif isinstance(value, self._scalar_type): self._check_compatible_with(value) value = self._unbox_scalar(value) - elif is_valid_na(value, self.dtype) or value == iNaT: + elif is_valid_na(value, self.dtype): + value = iNaT + elif not isna(value) and lib.is_integer(value) and value == iNaT: + # exclude misc e.g. object() and any NAs not allowed above value = iNaT else: msg = ( From a81b9ad8daf7fc2744e2cd96edc47bcdeacae511 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 10 Jul 2019 20:21:16 -0700 Subject: [PATCH 4/6] rename and move is_valid_nat_for_dtype --- pandas/core/arrays/datetimelike.py | 28 ++-------------------------- pandas/core/dtypes/missing.py | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index b3888c3659ad0..df17388856117 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -36,7 +36,7 @@ ) from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries from pandas.core.dtypes.inference import is_array_like -from pandas.core.dtypes.missing import isna +from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna from pandas._typing import DatetimeLikeScalar from pandas.core import missing, nanops @@ -492,7 +492,7 @@ def __setitem__( elif isinstance(value, self._scalar_type): self._check_compatible_with(value) value = self._unbox_scalar(value) - elif is_valid_na(value, self.dtype): + elif is_valid_nat_for_dtype(value, self.dtype): value = iNaT elif not isna(value) and lib.is_integer(value) and value == iNaT: # exclude misc e.g. object() and any NAs not allowed above @@ -1682,27 +1682,3 @@ def _ensure_datetimelike_to_i8(other, to_utc=False): # period array cannot be coerced to int other = Index(other) return other.asi8 - - -def is_valid_na(obj, dtype): - """ - isna check that excludes incompatible dtypes - - Parameters - ---------- - obj : object - dtype : np.datetime64, np.timedelta64, DatetimeTZDtype, or PeriodDtype - - Returns - ------- - bool - """ - if not isna(obj): - return False - if dtype.kind == "M": - return not isinstance(obj, np.timedelta64) - if dtype.kind == "m": - return not isinstance(obj, np.datetime64) - - # must be PeriodDType - return not isinstance(obj, (np.datetime64, np.timedelta64)) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index f540e9297738a..6a681954fd902 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -559,3 +559,27 @@ def remove_na_arraylike(arr): return arr[notna(arr)] else: return arr[notna(lib.values_from_object(arr))] + + +def is_valid_nat_for_dtype(obj, dtype): + """ + isna check that excludes incompatible dtypes + + Parameters + ---------- + obj : object + dtype : np.datetime64, np.timedelta64, DatetimeTZDtype, or PeriodDtype + + Returns + ------- + bool + """ + if not isna(obj): + return False + if dtype.kind == "M": + return not isinstance(obj, np.timedelta64) + if dtype.kind == "m": + return not isinstance(obj, np.datetime64) + + # must be PeriodDType + return not isinstance(obj, (np.datetime64, np.timedelta64)) From d55c5083e5dd5a74f6dd235b6b030f8355a1062d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 11 Jul 2019 08:15:04 -0700 Subject: [PATCH 5/6] simplify --- pandas/tests/arrays/test_datetimelike.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 7837ccedb32cc..778e950166e7f 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -665,16 +665,10 @@ def test_array_interface(self, period_index): def test_nat_assignment_array(array): expected = type(array)._from_sequence([pd.NaT, array[1], array[2]]) - casting_nats = { - TimedeltaArray: [pd.NaT, np.timedelta64("NaT", "ns")], - DatetimeArray: [pd.NaT, np.datetime64("NaT", "ns")], - PeriodArray: [pd.NaT], - }[type(array)] - non_casting_nats = { - TimedeltaArray: [np.datetime64("NaT", "ns")], - DatetimeArray: [np.timedelta64("NaT", "ns")], - PeriodArray: [np.timedelta64("NaT", "ns"), np.datetime64("NaT", "ns")], - }[type(array)] + all_np_nats = [np.datetime64("NaT", "ns"), np.timedelta64("NaT", "ns")] + casting_nats = [x for x in all_np_nats if x.dtype.kind == array.dtype.kind] + casting_nats.append(pd.NaT) + non_casting_nats = [x for x in all_np_nats if x.dtype.kind != array.dtype.kind] for nat in casting_nats: arr = array.copy() From 228174953999b1185afbf08e7015790494bf1a03 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 11 Jul 2019 08:20:00 -0700 Subject: [PATCH 6/6] Separate tests --- pandas/tests/arrays/test_datetimelike.py | 45 +++++++++++++++++------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 778e950166e7f..d9646feaf661e 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -654,29 +654,48 @@ def test_array_interface(self, period_index): @pytest.mark.parametrize( - "array", + "array,casting_nats", [ - pd.TimedeltaIndex(["1 Day", "3 Hours", "NaT"])._data, - pd.date_range("2000-01-01", periods=3, freq="D")._data, - pd.period_range("2000-01-01", periods=3, freq="D")._data, + ( + pd.TimedeltaIndex(["1 Day", "3 Hours", "NaT"])._data, + (pd.NaT, np.timedelta64("NaT", "ns")), + ), + ( + pd.date_range("2000-01-01", periods=3, freq="D")._data, + (pd.NaT, np.datetime64("NaT", "ns")), + ), + (pd.period_range("2000-01-01", periods=3, freq="D")._data, (pd.NaT,)), ], ids=lambda x: type(x).__name__, ) -def test_nat_assignment_array(array): +def test_casting_nat_setitem_array(array, casting_nats): expected = type(array)._from_sequence([pd.NaT, array[1], array[2]]) - all_np_nats = [np.datetime64("NaT", "ns"), np.timedelta64("NaT", "ns")] - casting_nats = [x for x in all_np_nats if x.dtype.kind == array.dtype.kind] - casting_nats.append(pd.NaT) - non_casting_nats = [x for x in all_np_nats if x.dtype.kind != array.dtype.kind] - for nat in casting_nats: arr = array.copy() arr[0] = nat - tm.assert_equal(arr, expected) + +@pytest.mark.parametrize( + "array,non_casting_nats", + [ + ( + pd.TimedeltaIndex(["1 Day", "3 Hours", "NaT"])._data, + (np.datetime64("NaT", "ns"),), + ), + ( + pd.date_range("2000-01-01", periods=3, freq="D")._data, + (np.timedelta64("NaT", "ns"),), + ), + ( + pd.period_range("2000-01-01", periods=3, freq="D")._data, + (np.datetime64("NaT", "ns"), np.timedelta64("NaT", "ns")), + ), + ], + ids=lambda x: type(x).__name__, +) +def test_invalid_nat_setitem_array(array, non_casting_nats): for nat in non_casting_nats: - arr = array.copy() with pytest.raises(TypeError): - arr[0] = nat + array[0] = nat