pandas-dev · mroeschke · Mar 29, 2024 · Mar 29, 2024
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -209,6 +209,7 @@ Removal of prior version deprecations/changes
 - Removed "freq" keyword from :class:`PeriodArray` constructor, use "dtype" instead (:issue:`52462`)
 - Removed deprecated "method" and "limit" keywords from :meth:`Series.replace` and :meth:`DataFrame.replace` (:issue:`53492`)
 - Removed the "closed" and "normalize" keywords in :meth:`DatetimeIndex.__new__` (:issue:`52628`)
+- Stopped performing dtype inference with in :meth:`Index.insert` with object-dtype index; this often affects the index/columns that result when setting new entries into an empty :class:`Series` or :class:`DataFrame` (:issue:`51363`)
 - Removed the "closed" and "unit" keywords in :meth:`TimedeltaIndex.__new__` (:issue:`52628`, :issue:`55499`)
 - All arguments in :meth:`Index.sort_values` are now keyword only (:issue:`56493`)
 - All arguments in :meth:`Series.to_dict` are now keyword only (:issue:`56493`)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -20,10 +20,7 @@
 
 import numpy as np
 
-from pandas._config import (
-    get_option,
-    using_pyarrow_string_dtype,
-)
+from pandas._config import get_option
 
 from pandas._libs import (
     NaT,
@@ -6614,23 +6611,8 @@ def insert(self, loc: int, item) -> Index:
             loc = loc if loc >= 0 else loc - 1
             new_values[loc] = item
 
-        out = Index._with_infer(new_values, name=self.name)
-        if (
-            using_pyarrow_string_dtype()
-            and is_string_dtype(out.dtype)
-            and new_values.dtype == object
-        ):
-            out = out.astype(new_values.dtype)
-        if self.dtype == object and out.dtype != object:
-            # GH#51363
-            warnings.warn(
-                "The behavior of Index.insert with object-dtype is deprecated, "
-                "in a future version this will return an object-dtype Index "
-                "instead of inferring a non-object dtype. To retain the old "
-                "behavior, do `idx.insert(loc, item).infer_objects(copy=False)`",
-                FutureWarning,
-                stacklevel=find_stack_level(),
-            )
+        # GH#51363 stopped doing dtype inference here
+        out = Index(new_values, dtype=new_values.dtype, name=self.name)
         return out
 
     def drop(

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -1896,15 +1896,7 @@ def _setitem_with_indexer(self, indexer, value, name: str = "iloc") -> None:
                     # just replacing the block manager here
                     # so the object is the same
                     index = self.obj._get_axis(i)
-                    with warnings.catch_warnings():
-                        # TODO: re-issue this with setitem-specific message?
-                        warnings.filterwarnings(
-                            "ignore",
-                            "The behavior of Index.insert with object-dtype "
-                            "is deprecated",
-                            category=FutureWarning,
-                        )
-                        labels = index.insert(len(index), key)
+                    labels = index.insert(len(index), key)
 
                     # We are expanding the Series/DataFrame values to match
                     #  the length of thenew index `labels`.  GH#40096 ensure
@@ -2222,14 +2214,7 @@ def _setitem_with_indexer_missing(self, indexer, value):
         # and set inplace
         if self.ndim == 1:
             index = self.obj.index
-            with warnings.catch_warnings():
-                # TODO: re-issue this with setitem-specific message?
-                warnings.filterwarnings(
-                    "ignore",
-                    "The behavior of Index.insert with object-dtype is deprecated",
-                    category=FutureWarning,
-                )
-                new_index = index.insert(len(index), indexer)
+            new_index = index.insert(len(index), indexer)
 
             # we have a coerced indexer, e.g. a float
             # that matches in an int64 Index, so

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -1480,14 +1480,7 @@ def insert(self, loc: int, item: Hashable, value: ArrayLike, refs=None) -> None:
         value : np.ndarray or ExtensionArray
         refs : The reference tracking object of the value to set.
         """
-        with warnings.catch_warnings():
-            # TODO: re-issue this with setitem-specific message?
-            warnings.filterwarnings(
-                "ignore",
-                "The behavior of Index.insert with object-dtype is deprecated",
-                category=FutureWarning,
-            )
-            new_axis = self.items.insert(loc, item)
+        new_axis = self.items.insert(loc, item)
 
         if value.ndim == 2:
             value = value.T

diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py
@@ -409,19 +409,13 @@ def test_where(self, listlike_box, simple_index):
         tm.assert_index_equal(result, expected)
 
     def test_insert_base(self, index):
+        # GH#51363
         trimmed = index[1:4]
 
         if not len(index):
             pytest.skip("Not applicable for empty index")
 
-        # test 0th element
-        warn = None
-        if index.dtype == object and index.inferred_type == "boolean":
-            # GH#51363
-            warn = FutureWarning
-        msg = "The behavior of Index.insert with object-dtype is deprecated"
-        with tm.assert_produces_warning(warn, match=msg):
-            result = trimmed.insert(0, index[0])
+        result = trimmed.insert(0, index[0])
         assert index[0:4].equals(result)
 
     @pytest.mark.skipif(

diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
@@ -2025,7 +2025,7 @@ def test_loc_setitem_incremental_with_dst(self):
         ids=["self", "to_datetime64", "to_pydatetime", "np.datetime64"],
     )
     def test_loc_setitem_datetime_keys_cast(self, conv):
-        # GH#9516
+        # GH#9516, GH#51363 changed in 3.0 to not cast on Index.insert
         dt1 = Timestamp("20130101 09:00:00")
         dt2 = Timestamp("20130101 10:00:00")
         df = DataFrame()
@@ -2034,7 +2034,7 @@ def test_loc_setitem_datetime_keys_cast(self, conv):
 
         expected = DataFrame(
             {"one": [100.0, 200.0]},
-            index=[dt1, dt2],
+            index=Index([conv(dt1), conv(dt2)], dtype=object),
             columns=Index(["one"], dtype=object),
         )
         tm.assert_frame_equal(df, expected)

diff --git a/pandas/tests/series/indexing/test_set_value.py b/pandas/tests/series/indexing/test_set_value.py
@@ -3,17 +3,17 @@
 import numpy as np
 
 from pandas import (
-    DatetimeIndex,
+    Index,
     Series,
 )
 import pandas._testing as tm
 
 
 def test_series_set_value():
-    # GH#1561
+    # GH#1561, GH#51363 as of 3.0 we do not do inference in Index.insert
 
     dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)]
-    index = DatetimeIndex(dates)
+    index = Index(dates, dtype=object)
 
     s = Series(dtype=object)
     s._set_value(dates[0], 1.0)

diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py
@@ -495,11 +495,11 @@ def test_setitem_callable_other(self):
 
 class TestSetitemWithExpansion:
     def test_setitem_empty_series(self):
-        # GH#10193
+        # GH#10193, GH#51363 changed in 3.0 to not do inference in Index.insert
         key = Timestamp("2012-01-01")
         series = Series(dtype=object)
         series[key] = 47
-        expected = Series(47, [key])
+        expected = Series(47, Index([key], dtype=object))
         tm.assert_series_equal(series, expected)
 
     def test_setitem_empty_series_datetimeindex_preserves_freq(self):