From 1d068d434e5b0b51595d34591f14a2ef8bf58fd7 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 28 Mar 2024 19:06:28 -0700 Subject: [PATCH] DEPR: Index.insert dtype inference --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/indexes/base.py | 24 +++---------------- pandas/core/indexing.py | 19 ++------------- pandas/core/internals/managers.py | 9 +------ pandas/tests/indexes/test_old_base.py | 10 ++------ pandas/tests/indexing/test_loc.py | 4 ++-- .../tests/series/indexing/test_set_value.py | 6 ++--- pandas/tests/series/indexing/test_setitem.py | 4 ++-- 8 files changed, 16 insertions(+), 61 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index c91cc6ab7acdb..ccac930d533fd 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -209,6 +209,7 @@ Removal of prior version deprecations/changes - Removed "freq" keyword from :class:`PeriodArray` constructor, use "dtype" instead (:issue:`52462`) - Removed deprecated "method" and "limit" keywords from :meth:`Series.replace` and :meth:`DataFrame.replace` (:issue:`53492`) - Removed the "closed" and "normalize" keywords in :meth:`DatetimeIndex.__new__` (:issue:`52628`) +- Stopped performing dtype inference with in :meth:`Index.insert` with object-dtype index; this often affects the index/columns that result when setting new entries into an empty :class:`Series` or :class:`DataFrame` (:issue:`51363`) - Removed the "closed" and "unit" keywords in :meth:`TimedeltaIndex.__new__` (:issue:`52628`, :issue:`55499`) - All arguments in :meth:`Index.sort_values` are now keyword only (:issue:`56493`) - All arguments in :meth:`Series.to_dict` are now keyword only (:issue:`56493`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 76dd19a9424f5..76f36f1fbac5f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -20,10 +20,7 @@ import numpy as np -from pandas._config import ( - get_option, - using_pyarrow_string_dtype, -) +from pandas._config import get_option from pandas._libs import ( NaT, @@ -6614,23 +6611,8 @@ def insert(self, loc: int, item) -> Index: loc = loc if loc >= 0 else loc - 1 new_values[loc] = item - out = Index._with_infer(new_values, name=self.name) - if ( - using_pyarrow_string_dtype() - and is_string_dtype(out.dtype) - and new_values.dtype == object - ): - out = out.astype(new_values.dtype) - if self.dtype == object and out.dtype != object: - # GH#51363 - warnings.warn( - "The behavior of Index.insert with object-dtype is deprecated, " - "in a future version this will return an object-dtype Index " - "instead of inferring a non-object dtype. To retain the old " - "behavior, do `idx.insert(loc, item).infer_objects(copy=False)`", - FutureWarning, - stacklevel=find_stack_level(), - ) + # GH#51363 stopped doing dtype inference here + out = Index(new_values, dtype=new_values.dtype, name=self.name) return out def drop( diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 6b4070ed6349c..982e305b7e471 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1896,15 +1896,7 @@ def _setitem_with_indexer(self, indexer, value, name: str = "iloc") -> None: # just replacing the block manager here # so the object is the same index = self.obj._get_axis(i) - with warnings.catch_warnings(): - # TODO: re-issue this with setitem-specific message? - warnings.filterwarnings( - "ignore", - "The behavior of Index.insert with object-dtype " - "is deprecated", - category=FutureWarning, - ) - labels = index.insert(len(index), key) + labels = index.insert(len(index), key) # We are expanding the Series/DataFrame values to match # the length of thenew index `labels`. GH#40096 ensure @@ -2222,14 +2214,7 @@ def _setitem_with_indexer_missing(self, indexer, value): # and set inplace if self.ndim == 1: index = self.obj.index - with warnings.catch_warnings(): - # TODO: re-issue this with setitem-specific message? - warnings.filterwarnings( - "ignore", - "The behavior of Index.insert with object-dtype is deprecated", - category=FutureWarning, - ) - new_index = index.insert(len(index), indexer) + new_index = index.insert(len(index), indexer) # we have a coerced indexer, e.g. a float # that matches in an int64 Index, so diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index af851e1fc8224..8fda9cd23b508 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1480,14 +1480,7 @@ def insert(self, loc: int, item: Hashable, value: ArrayLike, refs=None) -> None: value : np.ndarray or ExtensionArray refs : The reference tracking object of the value to set. """ - with warnings.catch_warnings(): - # TODO: re-issue this with setitem-specific message? - warnings.filterwarnings( - "ignore", - "The behavior of Index.insert with object-dtype is deprecated", - category=FutureWarning, - ) - new_axis = self.items.insert(loc, item) + new_axis = self.items.insert(loc, item) if value.ndim == 2: value = value.T diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py index 85eec7b7c018d..2c0e257efa9c3 100644 --- a/pandas/tests/indexes/test_old_base.py +++ b/pandas/tests/indexes/test_old_base.py @@ -409,19 +409,13 @@ def test_where(self, listlike_box, simple_index): tm.assert_index_equal(result, expected) def test_insert_base(self, index): + # GH#51363 trimmed = index[1:4] if not len(index): pytest.skip("Not applicable for empty index") - # test 0th element - warn = None - if index.dtype == object and index.inferred_type == "boolean": - # GH#51363 - warn = FutureWarning - msg = "The behavior of Index.insert with object-dtype is deprecated" - with tm.assert_produces_warning(warn, match=msg): - result = trimmed.insert(0, index[0]) + result = trimmed.insert(0, index[0]) assert index[0:4].equals(result) @pytest.mark.skipif( diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index c01a8647dd07d..01dab14c7e528 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2025,7 +2025,7 @@ def test_loc_setitem_incremental_with_dst(self): ids=["self", "to_datetime64", "to_pydatetime", "np.datetime64"], ) def test_loc_setitem_datetime_keys_cast(self, conv): - # GH#9516 + # GH#9516, GH#51363 changed in 3.0 to not cast on Index.insert dt1 = Timestamp("20130101 09:00:00") dt2 = Timestamp("20130101 10:00:00") df = DataFrame() @@ -2034,7 +2034,7 @@ def test_loc_setitem_datetime_keys_cast(self, conv): expected = DataFrame( {"one": [100.0, 200.0]}, - index=[dt1, dt2], + index=Index([conv(dt1), conv(dt2)], dtype=object), columns=Index(["one"], dtype=object), ) tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/series/indexing/test_set_value.py b/pandas/tests/series/indexing/test_set_value.py index cbe1a8bf296c8..99e71fa4b804b 100644 --- a/pandas/tests/series/indexing/test_set_value.py +++ b/pandas/tests/series/indexing/test_set_value.py @@ -3,17 +3,17 @@ import numpy as np from pandas import ( - DatetimeIndex, + Index, Series, ) import pandas._testing as tm def test_series_set_value(): - # GH#1561 + # GH#1561, GH#51363 as of 3.0 we do not do inference in Index.insert dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)] - index = DatetimeIndex(dates) + index = Index(dates, dtype=object) s = Series(dtype=object) s._set_value(dates[0], 1.0) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 6be325073bb67..99535f273075c 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -495,11 +495,11 @@ def test_setitem_callable_other(self): class TestSetitemWithExpansion: def test_setitem_empty_series(self): - # GH#10193 + # GH#10193, GH#51363 changed in 3.0 to not do inference in Index.insert key = Timestamp("2012-01-01") series = Series(dtype=object) series[key] = 47 - expected = Series(47, [key]) + expected = Series(47, Index([key], dtype=object)) tm.assert_series_equal(series, expected) def test_setitem_empty_series_datetimeindex_preserves_freq(self):