diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index cbbba84da6ae6..06a583fe381d0 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -286,6 +286,7 @@ Indexing - Bug in :meth:`DataFrame.loc` dropping levels of :class:`MultiIndex` when :class:`DataFrame` used as input has only one row (:issue:`10521`) - Bug in setting ``timedelta64`` values into numeric :class:`Series` failing to cast to object dtype (:issue:`39086`) - Bug in setting :class:`Interval` values into a :class:`Series` or :class:`DataFrame` with mismatched :class:`IntervalDtype` incorrectly casting the new values to the existing dtype (:issue:`39120`) +- Bug in incorrectly raising in :meth:`Index.insert`, when setting a new column that cannot be held in the existing ``frame.columns``, or in :meth:`Series.reset_index` or :meth:`DataFrame.reset_index` instead of casting to a compatible dtype (:issue:`39068`) Missing ^^^^^^^ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index aa1a05aac9ad1..f38b49a15c120 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5717,6 +5717,7 @@ def insert(self, loc: int, item): """ # Note: this method is overridden by all ExtensionIndex subclasses, # so self is never backed by an EA. + item = lib.item_from_zerodim(item) try: item = self._validate_fill_value(item) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 02a3548ec304e..268474446d6f6 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -600,7 +600,11 @@ def delete(self, loc): @doc(NDArrayBackedExtensionIndex.insert) def insert(self, loc: int, item): - result = super().insert(loc, item) + try: + result = super().insert(loc, item) + except (ValueError, TypeError): + # i.e. self._data._validate_scalar raised + return self.astype(object).insert(loc, item) result._data._freq = self._get_insert_freq(loc, item) return result @@ -865,15 +869,3 @@ def join( def _maybe_utc_convert(self: _T, other: Index) -> Tuple[_T, Index]: # Overridden by DatetimeIndex return self, other - - # -------------------------------------------------------------------- - # List-Like Methods - - @Appender(DatetimeIndexOpsMixin.insert.__doc__) - def insert(self, loc, item): - if isinstance(item, str): - # TODO: Why are strings special? - # TODO: Should we attempt _scalar_from_string? - return self.astype(object).insert(loc, item) - - return DatetimeIndexOpsMixin.insert(self, loc, item) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 48488fd867da7..2eda9f9ba41ac 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -422,12 +422,6 @@ def inferred_type(self) -> str: # indexing return "period" - def insert(self, loc: int, item): - if not isinstance(item, Period) or self.freq != item.freq: - return self.astype(object).insert(loc, item) - - return DatetimeIndexOpsMixin.insert(self, loc, item) - # ------------------------------------------------------------------------ # Indexing Methods diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index e43eb3fb47b7e..8644f56e4f253 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -314,18 +314,45 @@ def test_reset_index_multiindex_nan(self): rs = df.set_index(["A", "B"]).reset_index() tm.assert_frame_equal(rs, df) - def test_reset_index_with_datetimeindex_cols(self): + @pytest.mark.parametrize( + "name", + [ + None, + "foo", + 2, + 3.0, + pd.Timedelta(6), + Timestamp("2012-12-30", tz="UTC"), + "2012-12-31", + ], + ) + def test_reset_index_with_datetimeindex_cols(self, name): # GH#5818 + warn = None + if isinstance(name, Timestamp) and name.tz is not None: + # _deprecate_mismatched_indexing + warn = FutureWarning + df = DataFrame( [[1, 2], [3, 4]], columns=date_range("1/1/2013", "1/2/2013"), index=["A", "B"], ) + df.index.name = name + + with tm.assert_produces_warning(warn, check_stacklevel=False): + result = df.reset_index() + + item = name if name is not None else "index" + columns = Index([item, datetime(2013, 1, 1), datetime(2013, 1, 2)]) + if isinstance(item, str) and item == "2012-12-31": + columns = columns.astype("datetime64[ns]") + else: + assert columns.dtype == object - result = df.reset_index() expected = DataFrame( [["A", 1, 2], ["B", 3, 4]], - columns=["index", datetime(2013, 1, 1), datetime(2013, 1, 2)], + columns=columns, ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_insert.py b/pandas/tests/indexes/datetimes/test_insert.py index d2c999f61b4bb..684c6b813b48f 100644 --- a/pandas/tests/indexes/datetimes/test_insert.py +++ b/pandas/tests/indexes/datetimes/test_insert.py @@ -21,9 +21,11 @@ def test_insert_nat(self, tz, null): @pytest.mark.parametrize("tz", [None, "UTC", "US/Eastern"]) def test_insert_invalid_na(self, tz): idx = DatetimeIndex(["2017-01-01"], tz=tz) - msg = "value should be a 'Timestamp' or 'NaT'. Got 'timedelta64' instead." - with pytest.raises(TypeError, match=msg): - idx.insert(0, np.timedelta64("NaT")) + + item = np.timedelta64("NaT") + result = idx.insert(0, item) + expected = Index([item] + list(idx), dtype=object) + tm.assert_index_equal(result, expected) def test_insert_empty_preserves_freq(self, tz_naive_fixture): # GH#33573 @@ -114,17 +116,6 @@ def test_insert(self): assert result.name == expected.name assert result.freq is None - # see gh-7299 - idx = date_range("1/1/2000", periods=3, freq="D", tz="Asia/Tokyo", name="idx") - with pytest.raises(TypeError, match="Cannot compare tz-naive and tz-aware"): - idx.insert(3, Timestamp("2000-01-04")) - with pytest.raises(TypeError, match="Cannot compare tz-naive and tz-aware"): - idx.insert(3, datetime(2000, 1, 4)) - with pytest.raises(ValueError, match="Timezones don't match"): - idx.insert(3, Timestamp("2000-01-04", tz="US/Eastern")) - with pytest.raises(ValueError, match="Timezones don't match"): - idx.insert(3, datetime(2000, 1, 4, tzinfo=pytz.timezone("US/Eastern"))) - for tz in ["US/Pacific", "Asia/Singapore"]: idx = date_range("1/1/2000 09:00", periods=6, freq="H", tz=tz, name="idx") # preserve freq @@ -167,6 +158,48 @@ def test_insert(self): assert result.tz == expected.tz assert result.freq is None + # TODO: also changes DataFrame.__setitem__ with expansion + def test_insert_mismatched_tzawareness(self): + # see GH#7299 + idx = date_range("1/1/2000", periods=3, freq="D", tz="Asia/Tokyo", name="idx") + + # mismatched tz-awareness + item = Timestamp("2000-01-04") + result = idx.insert(3, item) + expected = Index( + list(idx[:3]) + [item] + list(idx[3:]), dtype=object, name="idx" + ) + tm.assert_index_equal(result, expected) + + # mismatched tz-awareness + item = datetime(2000, 1, 4) + result = idx.insert(3, item) + expected = Index( + list(idx[:3]) + [item] + list(idx[3:]), dtype=object, name="idx" + ) + tm.assert_index_equal(result, expected) + + # TODO: also changes DataFrame.__setitem__ with expansion + def test_insert_mismatched_tz(self): + # see GH#7299 + idx = date_range("1/1/2000", periods=3, freq="D", tz="Asia/Tokyo", name="idx") + + # mismatched tz -> cast to object (could reasonably cast to same tz or UTC) + item = Timestamp("2000-01-04", tz="US/Eastern") + result = idx.insert(3, item) + expected = Index( + list(idx[:3]) + [item] + list(idx[3:]), dtype=object, name="idx" + ) + tm.assert_index_equal(result, expected) + + # mismatched tz -> cast to object (could reasonably cast to same tz) + item = datetime(2000, 1, 4, tzinfo=pytz.timezone("US/Eastern")) + result = idx.insert(3, item) + expected = Index( + list(idx[:3]) + [item] + list(idx[3:]), dtype=object, name="idx" + ) + tm.assert_index_equal(result, expected) + @pytest.mark.parametrize( "item", [0, np.int64(0), np.float64(0), np.array(0), np.timedelta64(456)] ) @@ -175,17 +208,36 @@ def test_insert_mismatched_types_raises(self, tz_aware_fixture, item): tz = tz_aware_fixture dti = date_range("2019-11-04", periods=9, freq="-1D", name=9, tz=tz) - msg = "value should be a 'Timestamp' or 'NaT'. Got '.*' instead" - with pytest.raises(TypeError, match=msg): - dti.insert(1, item) + result = dti.insert(1, item) + + if isinstance(item, np.ndarray): + # FIXME: without doing .item() here this segfaults + assert item.item() == 0 + expected = Index([dti[0], 0] + list(dti[1:]), dtype=object, name=9) + else: + expected = Index([dti[0], item] + list(dti[1:]), dtype=object, name=9) + + tm.assert_index_equal(result, expected) - def test_insert_object_casting(self, tz_aware_fixture): + def test_insert_castable_str(self, tz_aware_fixture): # GH#33703 tz = tz_aware_fixture dti = date_range("2019-11-04", periods=3, freq="-1D", name=9, tz=tz) - # ATM we treat this as a string, but we could plausibly wrap it in Timestamp value = "2019-11-05" result = dti.insert(0, value) - expected = Index(["2019-11-05"] + list(dti), dtype=object, name=9) + + ts = Timestamp(value).tz_localize(tz) + expected = DatetimeIndex([ts] + list(dti), dtype=dti.dtype, name=9) + tm.assert_index_equal(result, expected) + + def test_insert_non_castable_str(self, tz_aware_fixture): + # GH#33703 + tz = tz_aware_fixture + dti = date_range("2019-11-04", periods=3, freq="-1D", name=9, tz=tz) + + value = "foo" + result = dti.insert(0, value) + + expected = Index(["foo"] + list(dti), dtype=object, name=9) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/timedeltas/test_insert.py b/pandas/tests/indexes/timedeltas/test_insert.py index 66fec2310e50c..d501f81fd9dce 100644 --- a/pandas/tests/indexes/timedeltas/test_insert.py +++ b/pandas/tests/indexes/timedeltas/test_insert.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._libs import lib + import pandas as pd from pandas import Index, Timedelta, TimedeltaIndex, timedelta_range import pandas._testing as tm @@ -79,9 +81,14 @@ def test_insert_nat(self, null): def test_insert_invalid_na(self): idx = TimedeltaIndex(["4day", "1day", "2day"], name="idx") - msg = r"value should be a 'Timedelta' or 'NaT'\. Got 'datetime64' instead\." - with pytest.raises(TypeError, match=msg): - idx.insert(0, np.datetime64("NaT")) + + # FIXME: assert_index_equal fails if we pass a different + # instance of np.datetime64("NaT") + item = np.datetime64("NaT") + result = idx.insert(0, item) + + expected = Index([item] + list(idx), dtype=object, name="idx") + tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "item", [0, np.int64(0), np.float64(0), np.array(0), np.datetime64(456, "us")] @@ -90,18 +97,30 @@ def test_insert_mismatched_types_raises(self, item): # GH#33703 dont cast these to td64 tdi = TimedeltaIndex(["4day", "1day", "2day"], name="idx") - msg = r"value should be a 'Timedelta' or 'NaT'\. Got '.*' instead\." - with pytest.raises(TypeError, match=msg): - tdi.insert(1, item) + result = tdi.insert(1, item) + + expected = Index( + [tdi[0], lib.item_from_zerodim(item)] + list(tdi[1:]), + dtype=object, + name="idx", + ) + tm.assert_index_equal(result, expected) - def test_insert_dont_cast_strings(self): - # To match DatetimeIndex and PeriodIndex behavior, dont try to - # parse strings to Timedelta + def test_insert_castable_str(self): idx = timedelta_range("1day", "3day") result = idx.insert(0, "1 Day") - assert result.dtype == object - assert result[0] == "1 Day" + + expected = TimedeltaIndex([idx[0]] + list(idx)) + tm.assert_index_equal(result, expected) + + def test_insert_non_castable_str(self): + idx = timedelta_range("1day", "3day") + + result = idx.insert(0, "foo") + + expected = Index(["foo"] + list(idx), dtype=object) + tm.assert_index_equal(result, expected) def test_insert_empty(self): # Corner case inserting with length zero doesnt raise IndexError diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index ac2e300f9f8d6..b342942cd9f5f 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -441,14 +441,6 @@ def test_insert_index_float64(self, insert, coerced_val, coerced_dtype): [pd.Timestamp("2012-01-01"), pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), 1], ) def test_insert_index_datetimes(self, request, fill_val, exp_dtype, insert_value): - if not hasattr(insert_value, "tz"): - request.node.add_marker( - pytest.mark.xfail(reason="ToDo: must coerce to object") - ) - elif fill_val.tz != insert_value.tz: - request.node.add_marker( - pytest.mark.xfail(reason="GH 37605 - require tz equality?") - ) obj = pd.DatetimeIndex( ["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"], tz=fill_val.tz @@ -461,7 +453,36 @@ def test_insert_index_datetimes(self, request, fill_val, exp_dtype, insert_value ) self._assert_insert_conversion(obj, fill_val, exp, exp_dtype) - obj.insert(1, insert_value) + if fill_val.tz: + + # mismatched tzawareness + ts = pd.Timestamp("2012-01-01") + result = obj.insert(1, ts) + expected = obj.astype(object).insert(1, ts) + assert expected.dtype == object + tm.assert_index_equal(result, expected) + + # mismatched tz --> cast to object (could reasonably cast to commom tz) + ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo") + result = obj.insert(1, ts) + expected = obj.astype(object).insert(1, ts) + assert expected.dtype == object + tm.assert_index_equal(result, expected) + + else: + # mismatched tzawareness + ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo") + result = obj.insert(1, ts) + expected = obj.astype(object).insert(1, ts) + assert expected.dtype == object + tm.assert_index_equal(result, expected) + + item = 1 + result = obj.insert(1, item) + expected = obj.astype(object).insert(1, item) + assert expected[1] == item + assert expected.dtype == object + tm.assert_index_equal(result, expected) def test_insert_index_timedelta64(self): obj = pd.TimedeltaIndex(["1 day", "2 day", "3 day", "4 day"]) @@ -473,15 +494,11 @@ def test_insert_index_timedelta64(self): obj, pd.Timedelta("10 day"), exp, "timedelta64[ns]" ) - # ToDo: must coerce to object - msg = "value should be a 'Timedelta' or 'NaT'. Got 'Timestamp' instead." - with pytest.raises(TypeError, match=msg): - obj.insert(1, pd.Timestamp("2012-01-01")) - - # ToDo: must coerce to object - msg = "value should be a 'Timedelta' or 'NaT'. Got 'int' instead." - with pytest.raises(TypeError, match=msg): - obj.insert(1, 1) + for item in [pd.Timestamp("2012-01-01"), 1]: + result = obj.insert(1, item) + expected = obj.astype(object).insert(1, item) + assert expected.dtype == object + tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "insert, coerced_val, coerced_dtype", @@ -506,7 +523,23 @@ def test_insert_index_period(self, insert, coerced_val, coerced_dtype): if isinstance(insert, pd.Period): exp = pd.PeriodIndex(data, freq="M") self._assert_insert_conversion(obj, insert, exp, coerced_dtype) + + # string that can be parsed to appropriate PeriodDtype + self._assert_insert_conversion(obj, str(insert), exp, coerced_dtype) + else: + result = obj.insert(0, insert) + expected = obj.astype(object).insert(0, insert) + tm.assert_index_equal(result, expected) + + # TODO: ATM inserting '2012-01-01 00:00:00' when we have obj.freq=="M" + # casts that string to Period[M], not clear that is desirable + if not isinstance(insert, pd.Timestamp): + # non-castable string + result = obj.insert(0, str(insert)) + expected = obj.astype(object).insert(0, str(insert)) + tm.assert_index_equal(result, expected) + msg = r"Unexpected keyword arguments {'freq'}" with pytest.raises(TypeError, match=msg): with tm.assert_produces_warning(FutureWarning): diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 0251fb4a0ebd6..ad2d7250d9d6c 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -326,22 +326,24 @@ def test_series_partial_set_with_name(self): result = ser.iloc[[1, 1, 0, 0]] tm.assert_series_equal(result, expected, check_index_type=True) + @pytest.mark.parametrize("key", [100, 100.0]) + def test_setitem_with_expansion_numeric_into_datetimeindex(self, key): + # GH#4940 inserting non-strings + orig = tm.makeTimeDataFrame() + df = orig.copy() + + df.loc[key, :] = df.iloc[0] + ex_index = Index(list(orig.index) + [key], dtype=object, name=orig.index.name) + ex_data = np.concatenate([orig.values, df.iloc[[0]].values], axis=0) + expected = DataFrame(ex_data, index=ex_index, columns=orig.columns) + tm.assert_frame_equal(df, expected) + def test_partial_set_invalid(self): # GH 4940 # allow only setting of 'valid' values orig = tm.makeTimeDataFrame() - df = orig.copy() - - # don't allow not string inserts - msg = r"value should be a 'Timestamp' or 'NaT'\. Got '.*' instead\." - - with pytest.raises(TypeError, match=msg): - df.loc[100.0, :] = df.iloc[0] - - with pytest.raises(TypeError, match=msg): - df.loc[100, :] = df.iloc[0] # allow object conversion here df = orig.copy()