From df2a9a74bab9129c71df4361d7b328196e287a2b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 21 Apr 2020 08:25:55 -0700 Subject: [PATCH 1/4] BUG: DTI/TDI.insert doing invalid casting --- pandas/core/arrays/datetimelike.py | 5 ++- pandas/core/indexes/datetimelike.py | 33 ++++++++----------- pandas/tests/indexes/datetimes/test_insert.py | 21 ++++++++++++ .../tests/indexes/timedeltas/test_insert.py | 10 ++++++ 4 files changed, 48 insertions(+), 21 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 27b2ed822a49f..8bb74bdb6cacb 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -847,10 +847,13 @@ def _validate_setitem_value(self, value): def _validate_insert_value(self, value): if isinstance(value, self._recognized_scalars): value = self._scalar_type(value) + self._check_compatible_with(value, setitem=True) + # TODO: if we dont have compat, should we raise or astype(object)? + # PeriodIndex does astype(object) elif is_valid_nat_for_dtype(value, self.dtype): # GH#18295 value = NaT - elif lib.is_scalar(value) and isna(value): + else: raise TypeError( f"cannot insert {type(self).__name__} with incompatible label" ) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 3a721d8c8c320..ce9f8d3b08446 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -905,37 +905,30 @@ def insert(self, loc, item): ------- new_index : Index """ + if isinstance(item, str): + # TODO: Why are strings special? + # TODO: Should we attempt _scalar_from_string? + return self.astype(object).insert(loc, item) + item = self._data._validate_insert_value(item) freq = None - if isinstance(item, self._data._scalar_type) or item is NaT: - self._data._check_compatible_with(item, setitem=True) - - # check freq can be preserved on edge cases - if self.size and self.freq is not None: + # check freq can be preserved on edge cases + if self.freq is not None: + if self.size: if item is NaT: pass elif (loc == 0 or loc == -len(self)) and item + self.freq == self[0]: freq = self.freq elif (loc == len(self)) and item - self.freq == self[-1]: freq = self.freq - elif self.freq is not None: + else: # Adding a single item to an empty index may preserve freq if self.freq.is_on_offset(item): freq = self.freq - item = item.asm8 - try: - new_i8s = np.concatenate( - (self[:loc].asi8, [item.view(np.int64)], self[loc:].asi8) - ) - arr = type(self._data)._simple_new(new_i8s, dtype=self.dtype, freq=freq) - return type(self)._simple_new(arr, name=self.name) - except (AttributeError, TypeError) as err: + item = self._data._unbox_scalar(item) - # fall back to object index - if isinstance(item, str): - return self.astype(object).insert(loc, item) - raise TypeError( - f"cannot insert {type(self).__name__} with incompatible label" - ) from err + new_i8s = np.concatenate([self[:loc].asi8, [item], self[loc:].asi8]) + arr = type(self._data)._simple_new(new_i8s, dtype=self.dtype, freq=freq) + return type(self)._simple_new(arr, name=self.name) diff --git a/pandas/tests/indexes/datetimes/test_insert.py b/pandas/tests/indexes/datetimes/test_insert.py index 034e1c6a4e1b0..8c2002a7f679f 100644 --- a/pandas/tests/indexes/datetimes/test_insert.py +++ b/pandas/tests/indexes/datetimes/test_insert.py @@ -165,3 +165,24 @@ def test_insert(self): assert result.name == expected.name assert result.tz == expected.tz assert result.freq is None + + @pytest.mark.parametrize( + "value", [0, np.int64(0), np.float64(0), np.array(0), np.timedelta64(456)] + ) + def test_insert_mismatched_types(self, tz_aware_fixture, value): + tz = tz_aware_fixture + dti = date_range("2019-11-04", periods=9, freq="-1D", name=9, tz=tz) + + msg = "incompatible label" + with pytest.raises(TypeError, match=msg): + dti.insert(1, value) + + def test_insert_object_casting(self, tz_aware_fixture): + tz = tz_aware_fixture + dti = date_range("2019-11-04", periods=3, freq="-1D", name=9, tz=tz) + + # ATM we treat this as a string, but we could plausibly wrap it in Timestamp + value = "2019-11-05" + result = dti.insert(0, value) + expected = Index(["2019-11-05"] + list(dti), dtype=object, name=9) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/timedeltas/test_insert.py b/pandas/tests/indexes/timedeltas/test_insert.py index e65c871428bab..f7020c5d1a71b 100644 --- a/pandas/tests/indexes/timedeltas/test_insert.py +++ b/pandas/tests/indexes/timedeltas/test_insert.py @@ -82,6 +82,16 @@ def test_insert_invalid_na(self): with pytest.raises(TypeError, match="incompatible label"): idx.insert(0, np.datetime64("NaT")) + @pytest.mark.parametrize( + "value", [0, np.int64(0), np.float64(0), np.array(0), np.datetime64(456, "us")] + ) + def test_insert_mismatched_types(self, value): + tdi = TimedeltaIndex(["4day", "1day", "2day"], name="idx") + + msg = "incompatible label" + with pytest.raises(TypeError, match=msg): + tdi.insert(1, value) + def test_insert_dont_cast_strings(self): # To match DatetimeIndex and PeriodIndex behavior, dont try to # parse strings to Timedelta From bf55f68835230d68af9ba793c359a2dd00da72fc Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 21 Apr 2020 08:30:09 -0700 Subject: [PATCH 2/4] gh refs --- pandas/tests/indexes/datetimes/test_insert.py | 2 ++ pandas/tests/indexes/timedeltas/test_insert.py | 1 + 2 files changed, 3 insertions(+) diff --git a/pandas/tests/indexes/datetimes/test_insert.py b/pandas/tests/indexes/datetimes/test_insert.py index 8c2002a7f679f..623dc4b4ec738 100644 --- a/pandas/tests/indexes/datetimes/test_insert.py +++ b/pandas/tests/indexes/datetimes/test_insert.py @@ -170,6 +170,7 @@ def test_insert(self): "value", [0, np.int64(0), np.float64(0), np.array(0), np.timedelta64(456)] ) def test_insert_mismatched_types(self, tz_aware_fixture, value): + # GH#33703 dont cast these to dt64 tz = tz_aware_fixture dti = date_range("2019-11-04", periods=9, freq="-1D", name=9, tz=tz) @@ -178,6 +179,7 @@ def test_insert_mismatched_types(self, tz_aware_fixture, value): dti.insert(1, value) def test_insert_object_casting(self, tz_aware_fixture): + # GH#33703 tz = tz_aware_fixture dti = date_range("2019-11-04", periods=3, freq="-1D", name=9, tz=tz) diff --git a/pandas/tests/indexes/timedeltas/test_insert.py b/pandas/tests/indexes/timedeltas/test_insert.py index f7020c5d1a71b..b27b766f650e5 100644 --- a/pandas/tests/indexes/timedeltas/test_insert.py +++ b/pandas/tests/indexes/timedeltas/test_insert.py @@ -86,6 +86,7 @@ def test_insert_invalid_na(self): "value", [0, np.int64(0), np.float64(0), np.array(0), np.datetime64(456, "us")] ) def test_insert_mismatched_types(self, value): + # GH#33703 dont cast these to td64 tdi = TimedeltaIndex(["4day", "1day", "2day"], name="idx") msg = "incompatible label" From 14f7937ad3f2a0d5b75b35db829cfcd0201faa07 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 21 Apr 2020 10:48:00 -0700 Subject: [PATCH 3/4] update exception messages --- pandas/tests/indexing/test_coercion.py | 6 +++--- pandas/tests/indexing/test_partial.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index c390347236ad3..3df71eef13d34 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -447,7 +447,7 @@ def test_insert_index_datetimes(self, fill_val, exp_dtype): with pytest.raises(TypeError, match=msg): obj.insert(1, pd.Timestamp("2012-01-01", tz="Asia/Tokyo")) - msg = "cannot insert DatetimeIndex with incompatible label" + msg = "cannot insert DatetimeArray with incompatible label" with pytest.raises(TypeError, match=msg): obj.insert(1, 1) @@ -464,12 +464,12 @@ def test_insert_index_timedelta64(self): ) # ToDo: must coerce to object - msg = "cannot insert TimedeltaIndex with incompatible label" + msg = "cannot insert TimedeltaArray with incompatible label" with pytest.raises(TypeError, match=msg): obj.insert(1, pd.Timestamp("2012-01-01")) # ToDo: must coerce to object - msg = "cannot insert TimedeltaIndex with incompatible label" + msg = "cannot insert TimedeltaArray with incompatible label" with pytest.raises(TypeError, match=msg): obj.insert(1, 1) diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 2e691c6fd76d8..813828d876079 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -335,7 +335,7 @@ def test_partial_set_invalid(self): df = orig.copy() # don't allow not string inserts - msg = "cannot insert DatetimeIndex with incompatible label" + msg = "cannot insert DatetimeArray with incompatible label" with pytest.raises(TypeError, match=msg): df.loc[100.0, :] = df.iloc[0] From 1c206abaa2d2f423a71fc3e64385cf01247af2a7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 21 Apr 2020 13:41:27 -0700 Subject: [PATCH 4/4] update per comments --- pandas/tests/indexes/datetimes/test_insert.py | 6 +++--- pandas/tests/indexes/timedeltas/test_insert.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_insert.py b/pandas/tests/indexes/datetimes/test_insert.py index 623dc4b4ec738..b4f6cc3798f4f 100644 --- a/pandas/tests/indexes/datetimes/test_insert.py +++ b/pandas/tests/indexes/datetimes/test_insert.py @@ -167,16 +167,16 @@ def test_insert(self): assert result.freq is None @pytest.mark.parametrize( - "value", [0, np.int64(0), np.float64(0), np.array(0), np.timedelta64(456)] + "item", [0, np.int64(0), np.float64(0), np.array(0), np.timedelta64(456)] ) - def test_insert_mismatched_types(self, tz_aware_fixture, value): + def test_insert_mismatched_types_raises(self, tz_aware_fixture, item): # GH#33703 dont cast these to dt64 tz = tz_aware_fixture dti = date_range("2019-11-04", periods=9, freq="-1D", name=9, tz=tz) msg = "incompatible label" with pytest.raises(TypeError, match=msg): - dti.insert(1, value) + dti.insert(1, item) def test_insert_object_casting(self, tz_aware_fixture): # GH#33703 diff --git a/pandas/tests/indexes/timedeltas/test_insert.py b/pandas/tests/indexes/timedeltas/test_insert.py index b27b766f650e5..1ebc0a4b1eca0 100644 --- a/pandas/tests/indexes/timedeltas/test_insert.py +++ b/pandas/tests/indexes/timedeltas/test_insert.py @@ -83,15 +83,15 @@ def test_insert_invalid_na(self): idx.insert(0, np.datetime64("NaT")) @pytest.mark.parametrize( - "value", [0, np.int64(0), np.float64(0), np.array(0), np.datetime64(456, "us")] + "item", [0, np.int64(0), np.float64(0), np.array(0), np.datetime64(456, "us")] ) - def test_insert_mismatched_types(self, value): + def test_insert_mismatched_types_raises(self, item): # GH#33703 dont cast these to td64 tdi = TimedeltaIndex(["4day", "1day", "2day"], name="idx") msg = "incompatible label" with pytest.raises(TypeError, match=msg): - tdi.insert(1, value) + tdi.insert(1, item) def test_insert_dont_cast_strings(self): # To match DatetimeIndex and PeriodIndex behavior, dont try to