From 386c421a5b8ebaef199bbd87df8bc161e4d0e84e Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 26 Jan 2025 15:10:15 +0100
Subject: [PATCH 01/10] API: ignore empty range/object dtype in Index setop
 operations (string dtype compat)

---
 pandas/core/indexes/base.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index e2f9c5e9868a9..d7159cd7eb16d 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -19,7 +19,10 @@
 
 import numpy as np
 
-from pandas._config import get_option
+from pandas._config import (
+    get_option,
+    using_string_dtype,
+)
 
 from pandas._libs import (
     NaT,
@@ -6233,8 +6236,16 @@ def _find_common_type_compat(self, target) -> DtypeObj:
         Implementation of find_common_type that adjusts for Index-specific
         special cases.
         """
+        # breakpoint()
         target_dtype, _ = infer_dtype_from(target)
 
+        if using_string_dtype():
+            from pandas.core.indexes.range import RangeIndex
+
+            if len(self) == 0 or self.isna().all():
+                if isinstance(self, RangeIndex) or self.dtype == np.object_:
+                    return target_dtype
+
         # special case: if one dtype is uint64 and the other a signed int, return object
         # See https://github.com/pandas-dev/pandas/issues/26778 for discussion
         # Now it's:

From e809ac2802012f433d9633ef9e99391b6b6de367 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 26 Jan 2025 19:28:12 +0100
Subject: [PATCH 02/10] only for empty + fix tests

---
 pandas/core/indexes/base.py                   | 15 ++++++++---
 pandas/tests/frame/indexing/test_coercion.py  |  7 +----
 pandas/tests/frame/indexing/test_indexing.py  |  2 +-
 pandas/tests/frame/indexing/test_insert.py    |  3 +--
 pandas/tests/frame/indexing/test_setitem.py   | 12 +++------
 pandas/tests/frame/methods/test_dropna.py     |  3 ---
 .../tests/frame/methods/test_reset_index.py   |  3 ---
 pandas/tests/groupby/test_groupby.py          |  2 +-
 .../tests/indexes/base_class/test_setops.py   |  4 +--
 pandas/tests/indexes/test_setops.py           |  4 +++
 pandas/tests/indexing/test_at.py              |  7 +----
 pandas/tests/indexing/test_loc.py             | 27 +++++++++----------
 pandas/tests/indexing/test_partial.py         | 18 +++++--------
 pandas/tests/series/indexing/test_indexing.py | 14 +++-------
 .../tests/series/indexing/test_set_value.py   |  8 ++++--
 pandas/tests/series/indexing/test_setitem.py  | 13 +++++----
 16 files changed, 60 insertions(+), 82 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index d7159cd7eb16d..167c03cc49596 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -6236,15 +6236,22 @@ def _find_common_type_compat(self, target) -> DtypeObj:
         Implementation of find_common_type that adjusts for Index-specific
         special cases.
         """
-        # breakpoint()
         target_dtype, _ = infer_dtype_from(target)
 
         if using_string_dtype():
+            # special case: if left or right is a zero-length RangeIndex or
+            # Index[object], those can be created by the default empty constructors
+            # -> for that case ignore this dtype and always return the other
             from pandas.core.indexes.range import RangeIndex
 
-            if len(self) == 0 or self.isna().all():
-                if isinstance(self, RangeIndex) or self.dtype == np.object_:
-                    return target_dtype
+            if len(self) == 0 and (
+                isinstance(self, RangeIndex) or self.dtype == np.object_
+            ):
+                return target_dtype
+            if len(target) == 0 and (
+                isinstance(target, RangeIndex) or target_dtype == np.object_
+            ):
+                return self.dtype
 
         # special case: if one dtype is uint64 and the other a signed int, return object
         # See https://github.com/pandas-dev/pandas/issues/26778 for discussion
diff --git a/pandas/tests/frame/indexing/test_coercion.py b/pandas/tests/frame/indexing/test_coercion.py
index 1a454351b7085..472bfb7772a80 100644
--- a/pandas/tests/frame/indexing/test_coercion.py
+++ b/pandas/tests/frame/indexing/test_coercion.py
@@ -88,12 +88,7 @@ def test_26395(indexer_al):
     df["D"] = 0
 
     indexer_al(df)["C", "D"] = 2
-    expected = DataFrame(
-        {"D": [0, 0, 2]},
-        index=["A", "B", "C"],
-        columns=pd.Index(["D"], dtype=object),
-        dtype=np.int64,
-    )
+    expected = DataFrame({"D": [0, 0, 2]}, index=["A", "B", "C"], dtype=np.int64)
     tm.assert_frame_equal(df, expected)
 
     with pytest.raises(TypeError, match="Invalid value"):
diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
index a9bc485283985..0c99b08cb30c4 100644
--- a/pandas/tests/frame/indexing/test_indexing.py
+++ b/pandas/tests/frame/indexing/test_indexing.py
@@ -1138,7 +1138,7 @@ def test_loc_setitem_datetimelike_with_inference(self):
         result = df.dtypes
         expected = Series(
             [np.dtype("timedelta64[ns]")] * 6 + [np.dtype("datetime64[ns]")] * 2,
-            index=Index(list("ABCDEFGH"), dtype=object),
+            index=list("ABCDEFGH"),
         )
         tm.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/frame/indexing/test_insert.py b/pandas/tests/frame/indexing/test_insert.py
index a1d60eb9626d6..b530cb98ef46c 100644
--- a/pandas/tests/frame/indexing/test_insert.py
+++ b/pandas/tests/frame/indexing/test_insert.py
@@ -68,8 +68,7 @@ def test_insert_with_columns_dups(self):
         df.insert(0, "A", ["d", "e", "f"], allow_duplicates=True)
         df.insert(0, "A", ["a", "b", "c"], allow_duplicates=True)
         exp = DataFrame(
-            [["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]],
-            columns=Index(["A", "A", "A"], dtype=object),
+            [["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]], columns=["A", "A", "A"]
         )
         tm.assert_frame_equal(df, exp)
 
diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py
index cfd7e91c4ceab..d0f2eeae62320 100644
--- a/pandas/tests/frame/indexing/test_setitem.py
+++ b/pandas/tests/frame/indexing/test_setitem.py
@@ -150,9 +150,7 @@ def test_setitem_empty_columns(self):
         df["X"] = df.index
         df["X"] = ["x", "y", "z"]
         exp = DataFrame(
-            data={"X": ["x", "y", "z"]},
-            index=["A", "B", "C"],
-            columns=Index(["X"], dtype=object),
+            data={"X": ["x", "y", "z"]}, index=["A", "B", "C"], columns=["X"]
         )
         tm.assert_frame_equal(df, exp)
 
@@ -169,9 +167,7 @@ def test_setitem_timestamp_empty_columns(self):
         df["now"] = Timestamp("20130101", tz="UTC")
 
         expected = DataFrame(
-            [[Timestamp("20130101", tz="UTC")]] * 3,
-            index=range(3),
-            columns=Index(["now"], dtype=object),
+            [[Timestamp("20130101", tz="UTC")]] * 3, index=range(3), columns=["now"]
         )
         tm.assert_frame_equal(df, expected)
 
@@ -210,7 +206,7 @@ def test_setitem_period_preserves_dtype(self):
         result = DataFrame([])
         result["a"] = data
 
-        expected = DataFrame({"a": data}, columns=Index(["a"], dtype=object))
+        expected = DataFrame({"a": data}, columns=["a"])
 
         tm.assert_frame_equal(result, expected)
 
@@ -930,7 +926,7 @@ def test_setitem_scalars_no_index(self):
         # GH#16823 / GH#17894
         df = DataFrame()
         df["foo"] = 1
-        expected = DataFrame(columns=Index(["foo"], dtype=object)).astype(np.int64)
+        expected = DataFrame(columns=["foo"]).astype(np.int64)
         tm.assert_frame_equal(df, expected)
 
     def test_setitem_newcol_tuple_key(self, float_frame):
diff --git a/pandas/tests/frame/methods/test_dropna.py b/pandas/tests/frame/methods/test_dropna.py
index 4a60dc09cfe07..11893d7fac1a4 100644
--- a/pandas/tests/frame/methods/test_dropna.py
+++ b/pandas/tests/frame/methods/test_dropna.py
@@ -4,8 +4,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -184,7 +182,6 @@ def test_dropna_multiple_axes(self):
         with pytest.raises(TypeError, match="supplying multiple axes"):
             inp.dropna(how="all", axis=(0, 1), inplace=True)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_dropna_tz_aware_datetime(self):
         # GH13407
         df = DataFrame()
diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py
index 0b320075ed2d2..80da849cc59d4 100644
--- a/pandas/tests/frame/methods/test_reset_index.py
+++ b/pandas/tests/frame/methods/test_reset_index.py
@@ -4,8 +4,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.core.dtypes.common import (
     is_float_dtype,
     is_integer_dtype,
@@ -644,7 +642,6 @@ def test_rest_index_multiindex_categorical_with_missing_values(self, codes):
         tm.assert_frame_equal(res, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string) - GH#60338")
 @pytest.mark.parametrize(
     "array, dtype",
     [
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 5bae9b1fd9882..4ff3db6fe089e 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -1278,7 +1278,7 @@ def test_groupby_2d_malformed():
     d["label"] = ["l1", "l2"]
     tmp = d.groupby(["group"]).mean(numeric_only=True)
     res_values = np.array([[0.0, 1.0], [0.0, 1.0]])
-    tm.assert_index_equal(tmp.columns, Index(["zeros", "ones"], dtype=object))
+    tm.assert_index_equal(tmp.columns, Index(["zeros", "ones"]))
     tm.assert_numpy_array_equal(tmp.values, res_values)
 
 
diff --git a/pandas/tests/indexes/base_class/test_setops.py b/pandas/tests/indexes/base_class/test_setops.py
index 0e9fb77d6e8dd..4038e3b136ceb 100644
--- a/pandas/tests/indexes/base_class/test_setops.py
+++ b/pandas/tests/indexes/base_class/test_setops.py
@@ -240,7 +240,7 @@ def test_tuple_union_bug(self, method, expected, sort):
     def test_union_name_preservation(
         self, first_list, second_list, first_name, second_name, expected_name, sort
     ):
-        expected_dtype = object if not first_list or not second_list else "str"
+        # expected_dtype = object if not first_list or not second_list else "str"
         first = Index(first_list, name=first_name)
         second = Index(second_list, name=second_name)
         union = first.union(second, sort=sort)
@@ -251,7 +251,7 @@ def test_union_name_preservation(
             expected = Index(sorted(vals), name=expected_name)
             tm.assert_index_equal(union, expected)
         else:
-            expected = Index(vals, name=expected_name, dtype=expected_dtype)
+            expected = Index(vals, name=expected_name)
             tm.assert_index_equal(union.sort_values(), expected.sort_values())
 
     @pytest.mark.parametrize(
diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py
index 58b69d79c65ce..436c862db310e 100644
--- a/pandas/tests/indexes/test_setops.py
+++ b/pandas/tests/indexes/test_setops.py
@@ -539,10 +539,14 @@ def test_setop_with_categorical(index_flat, sort, method):
 
     result = getattr(index, method)(other, sort=sort)
     expected = getattr(index, method)(index, sort=sort)
+    if index.empty and method in ("union", "symmetric_difference"):
+        expected = expected.astype("category")
     tm.assert_index_equal(result, expected, exact=exact)
 
     result = getattr(index, method)(other[:5], sort=sort)
     expected = getattr(index, method)(index[:5], sort=sort)
+    if index.empty and method in ("union", "symmetric_difference"):
+        expected = expected.astype("category")
     tm.assert_index_equal(result, expected, exact=exact)
 
 
diff --git a/pandas/tests/indexing/test_at.py b/pandas/tests/indexing/test_at.py
index 10a8fa88b4b5e..e80acc230a320 100644
--- a/pandas/tests/indexing/test_at.py
+++ b/pandas/tests/indexing/test_at.py
@@ -13,7 +13,6 @@
     CategoricalIndex,
     DataFrame,
     DatetimeIndex,
-    Index,
     MultiIndex,
     Series,
     Timestamp,
@@ -67,11 +66,7 @@ def test_at_setitem_item_cache_cleared(self):
         df.at[0, "x"] = 4
         df.at[0, "cost"] = 789
 
-        expected = DataFrame(
-            {"x": [4], "cost": 789},
-            index=[0],
-            columns=Index(["x", "cost"], dtype=object),
-        )
+        expected = DataFrame({"x": [4], "cost": 789}, index=[0])
         tm.assert_frame_equal(df, expected)
 
         # And in particular, check that the _item_cache has updated correctly.
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index 7aeded5a6cb7f..7c8e6026ad27c 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -766,9 +766,9 @@ def test_loc_setitem_empty_frame(self):
         #  is inplace, so that dtype is retained
         sera = Series(val1, index=keys1, dtype=np.float64)
         serb = Series(val2, index=keys2)
-        expected = DataFrame(
-            {"A": sera, "B": serb}, columns=Index(["A", "B"], dtype=object)
-        ).reindex(index=index)
+        expected = DataFrame({"A": sera, "B": serb}, columns=Index(["A", "B"])).reindex(
+            index=index
+        )
         tm.assert_frame_equal(df, expected)
 
     def test_loc_setitem_frame(self):
@@ -966,7 +966,7 @@ def test_setitem_new_key_tz(self, indexer_sl):
             to_datetime(42).tz_localize("UTC"),
             to_datetime(666).tz_localize("UTC"),
         ]
-        expected = Series(vals, index=Index(["foo", "bar"], dtype=object))
+        expected = Series(vals, index=Index(["foo", "bar"]))
 
         ser = Series(dtype=object)
         indexer_sl(ser)["foo"] = vals[0]
@@ -1966,15 +1966,11 @@ def test_loc_setitem_empty_series_str_idx(self):
         # partially set with an empty object series
         ser = Series(dtype=object)
         ser.loc["foo"] = 1
-        tm.assert_series_equal(ser, Series([1], index=Index(["foo"], dtype=object)))
+        tm.assert_series_equal(ser, Series([1], index=Index(["foo"])))
         ser.loc["bar"] = 3
-        tm.assert_series_equal(
-            ser, Series([1, 3], index=Index(["foo", "bar"], dtype=object))
-        )
+        tm.assert_series_equal(ser, Series([1, 3], index=Index(["foo", "bar"])))
         ser.loc[3] = 4
-        tm.assert_series_equal(
-            ser, Series([1, 3, 4], index=Index(["foo", "bar", 3], dtype=object))
-        )
+        tm.assert_series_equal(ser, Series([1, 3, 4], index=Index(["foo", "bar", 3])))
 
     def test_loc_setitem_incremental_with_dst(self):
         # GH#20724
@@ -1996,7 +1992,7 @@ def test_loc_setitem_incremental_with_dst(self):
         ],
         ids=["self", "to_datetime64", "to_pydatetime", "np.datetime64"],
     )
-    def test_loc_setitem_datetime_keys_cast(self, conv):
+    def test_loc_setitem_datetime_keys_cast(self, conv, using_infer_string):
         # GH#9516, GH#51363 changed in 3.0 to not cast on Index.insert
         dt1 = Timestamp("20130101 09:00:00")
         dt2 = Timestamp("20130101 10:00:00")
@@ -2004,10 +2000,13 @@ def test_loc_setitem_datetime_keys_cast(self, conv):
         df.loc[conv(dt1), "one"] = 100
         df.loc[conv(dt2), "one"] = 200
 
+        # breakpoint()
         expected = DataFrame(
             {"one": [100.0, 200.0]},
-            index=Index([conv(dt1), conv(dt2)], dtype=object),
-            columns=Index(["one"], dtype=object),
+            index=Index(
+                [conv(dt1), conv(dt2)], dtype=None if using_infer_string else object
+            ),
+            columns=Index(["one"]),
         )
         tm.assert_frame_equal(df, expected)
 
diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py
index 3dbdedbb94618..6f20d0e4e7cbf 100644
--- a/pandas/tests/indexing/test_partial.py
+++ b/pandas/tests/indexing/test_partial.py
@@ -30,7 +30,7 @@ def test_empty_frame_setitem_index_name_retained(self):
         expected = DataFrame(
             {"series": [1.23] * 4},
             index=pd.RangeIndex(4, name="df_index"),
-            columns=Index(["series"], dtype=object),
+            columns=Index(["series"]),
         )
 
         tm.assert_frame_equal(df, expected)
@@ -43,7 +43,7 @@ def test_empty_frame_setitem_index_name_inherited(self):
         expected = DataFrame(
             {"series": [1.23] * 4},
             index=pd.RangeIndex(4, name="series_index"),
-            columns=Index(["series"], dtype=object),
+            columns=Index(["series"]),
         )
         tm.assert_frame_equal(df, expected)
 
@@ -96,9 +96,7 @@ def test_partial_set_empty_frame2(self):
         # these work as they don't really change
         # anything but the index
         # GH#5632
-        expected = DataFrame(
-            columns=Index(["foo"], dtype=object), index=Index([], dtype="object")
-        )
+        expected = DataFrame(columns=Index(["foo"]), index=Index([], dtype="object"))
 
         df = DataFrame(index=Index([], dtype="object"))
         df["foo"] = Series([], dtype="object")
@@ -116,9 +114,7 @@ def test_partial_set_empty_frame2(self):
         tm.assert_frame_equal(df, expected)
 
     def test_partial_set_empty_frame3(self):
-        expected = DataFrame(
-            columns=Index(["foo"], dtype=object), index=Index([], dtype="int64")
-        )
+        expected = DataFrame(columns=Index(["foo"]), index=Index([], dtype="int64"))
         expected["foo"] = expected["foo"].astype("float64")
 
         df = DataFrame(index=Index([], dtype="int64"))
@@ -135,9 +131,7 @@ def test_partial_set_empty_frame4(self):
         df = DataFrame(index=Index([], dtype="int64"))
         df["foo"] = range(len(df))
 
-        expected = DataFrame(
-            columns=Index(["foo"], dtype=object), index=Index([], dtype="int64")
-        )
+        expected = DataFrame(columns=Index(["foo"]), index=Index([], dtype="int64"))
         # range is int-dtype-like, so we get int64 dtype
         expected["foo"] = expected["foo"].astype("int64")
         tm.assert_frame_equal(df, expected)
@@ -210,7 +204,7 @@ def test_partial_set_empty_frame_empty_copy_assignment(self):
         df = DataFrame(index=[0])
         df = df.copy()
         df["a"] = 0
-        expected = DataFrame(0, index=[0], columns=Index(["a"], dtype=object))
+        expected = DataFrame(0, index=[0], columns=Index(["a"]))
         tm.assert_frame_equal(df, expected)
 
     def test_partial_set_empty_frame_empty_consistencies(self, using_infer_string):
diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py
index d3556b644c4bf..02efd850f9640 100644
--- a/pandas/tests/series/indexing/test_indexing.py
+++ b/pandas/tests/series/indexing/test_indexing.py
@@ -253,25 +253,17 @@ def test_timedelta_assignment():
     # GH 8209
     s = Series([], dtype=object)
     s.loc["B"] = timedelta(1)
-    expected = Series(
-        Timedelta("1 days"), dtype="timedelta64[ns]", index=Index(["B"], dtype=object)
-    )
+    expected = Series(Timedelta("1 days"), dtype="timedelta64[ns]", index=["B"])
     tm.assert_series_equal(s, expected)
 
     s = s.reindex(s.index.insert(0, "A"))
     expected = Series(
-        [np.nan, Timedelta("1 days")],
-        dtype="timedelta64[ns]",
-        index=Index(["A", "B"], dtype=object),
+        [np.nan, Timedelta("1 days")], dtype="timedelta64[ns]", index=["A", "B"]
     )
     tm.assert_series_equal(s, expected)
 
     s.loc["A"] = timedelta(1)
-    expected = Series(
-        Timedelta("1 days"),
-        dtype="timedelta64[ns]",
-        index=Index(["A", "B"], dtype=object),
-    )
+    expected = Series(Timedelta("1 days"), dtype="timedelta64[ns]", index=["A", "B"])
     tm.assert_series_equal(s, expected)
 
 
diff --git a/pandas/tests/series/indexing/test_set_value.py b/pandas/tests/series/indexing/test_set_value.py
index 99e71fa4b804b..3bf89da53d923 100644
--- a/pandas/tests/series/indexing/test_set_value.py
+++ b/pandas/tests/series/indexing/test_set_value.py
@@ -3,17 +3,21 @@
 import numpy as np
 
 from pandas import (
+    DatetimeIndex,
     Index,
     Series,
 )
 import pandas._testing as tm
 
 
-def test_series_set_value():
+def test_series_set_value(using_infer_string):
     # GH#1561, GH#51363 as of 3.0 we do not do inference in Index.insert
 
     dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)]
-    index = Index(dates, dtype=object)
+    if using_infer_string:
+        index = DatetimeIndex(dates)
+    else:
+        index = Index(dates, dtype=object)
 
     s = Series(dtype=object)
     s._set_value(dates[0], 1.0)
diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py
index 49c933c308235..964f0b90b3c41 100644
--- a/pandas/tests/series/indexing/test_setitem.py
+++ b/pandas/tests/series/indexing/test_setitem.py
@@ -474,12 +474,14 @@ def test_setitem_callable_other(self):
 
 
 class TestSetitemWithExpansion:
-    def test_setitem_empty_series(self):
-        # GH#10193, GH#51363 changed in 3.0 to not do inference in Index.insert
+    def test_setitem_empty_series(self, using_infer_string):
+        # GH#10193
         key = Timestamp("2012-01-01")
         series = Series(dtype=object)
         series[key] = 47
-        expected = Series(47, Index([key], dtype=object))
+        expected = Series(
+            47, index=[key] if using_infer_string else Index([key], dtype=object)
+        )
         tm.assert_series_equal(series, expected)
 
     def test_setitem_empty_series_datetimeindex_preserves_freq(self):
@@ -536,10 +538,7 @@ def test_setitem_with_expansion_type_promotion(self):
         ser["a"] = Timestamp("2016-01-01")
         ser["b"] = 3.0
         ser["c"] = "foo"
-        expected = Series(
-            [Timestamp("2016-01-01"), 3.0, "foo"],
-            index=Index(["a", "b", "c"], dtype=object),
-        )
+        expected = Series([Timestamp("2016-01-01"), 3.0, "foo"], index=["a", "b", "c"])
         tm.assert_series_equal(ser, expected)
 
     def test_setitem_not_contained(self, string_series):

From 249a42f82cbcc745aef677ba5b5482259f74558d Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 26 Jan 2025 20:39:38 +0100
Subject: [PATCH 03/10] fixup

---
 pandas/tests/indexes/test_setops.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py
index 436c862db310e..7cc74f4b3405c 100644
--- a/pandas/tests/indexes/test_setops.py
+++ b/pandas/tests/indexes/test_setops.py
@@ -530,7 +530,7 @@ def test_intersection_difference_match_empty(self, index, sort):
 @pytest.mark.parametrize(
     "method", ["intersection", "union", "difference", "symmetric_difference"]
 )
-def test_setop_with_categorical(index_flat, sort, method):
+def test_setop_with_categorical(index_flat, sort, method, using_infer_string):
     # MultiIndex tested separately in tests.indexes.multi.test_setops
     index = index_flat
 
@@ -539,13 +539,21 @@ def test_setop_with_categorical(index_flat, sort, method):
 
     result = getattr(index, method)(other, sort=sort)
     expected = getattr(index, method)(index, sort=sort)
-    if index.empty and method in ("union", "symmetric_difference"):
+    if (
+        using_infer_string
+        and index.empty
+        and method in ("union", "symmetric_difference")
+    ):
         expected = expected.astype("category")
     tm.assert_index_equal(result, expected, exact=exact)
 
     result = getattr(index, method)(other[:5], sort=sort)
     expected = getattr(index, method)(index[:5], sort=sort)
-    if index.empty and method in ("union", "symmetric_difference"):
+    if (
+        using_infer_string
+        and index.empty
+        and method in ("union", "symmetric_difference")
+    ):
         expected = expected.astype("category")
     tm.assert_index_equal(result, expected, exact=exact)
 

From 513f343e2b5f9d7d176b7c8d1b8c76a1d4335a6d Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 26 Jan 2025 20:44:41 +0100
Subject: [PATCH 04/10] fix insert

---
 pandas/core/indexes/base.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 167c03cc49596..f4411a20baaa7 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -6248,8 +6248,10 @@ def _find_common_type_compat(self, target) -> DtypeObj:
                 isinstance(self, RangeIndex) or self.dtype == np.object_
             ):
                 return target_dtype
-            if len(target) == 0 and (
-                isinstance(target, RangeIndex) or target_dtype == np.object_
+            if (
+                isinstance(target, Index)
+                and len(target) == 0
+                and (isinstance(target, RangeIndex) or target_dtype == np.object_)
             ):
                 return self.dtype
 

From 33ef0d561a1efd078618625b5d75f3535521528b Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 26 Jan 2025 20:47:37 +0100
Subject: [PATCH 05/10] more test fixes

---
 pandas/tests/frame/test_query_eval.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py
index ca572b1026526..375b9b00a4988 100644
--- a/pandas/tests/frame/test_query_eval.py
+++ b/pandas/tests/frame/test_query_eval.py
@@ -791,7 +791,6 @@ def test_check_tz_aware_index_query(self, tz_aware_fixture):
         tm.assert_frame_equal(result, expected)
 
         expected = DataFrame(df_index)
-        expected.columns = expected.columns.astype(object)
         result = df.reset_index().query('"2018-01-03 00:00:00+00" < time')
         tm.assert_frame_equal(result, expected)
 

From 62cd9b7498a985addba6c8dabd4655707b6551ad Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 26 Jan 2025 21:22:07 +0100
Subject: [PATCH 06/10] more fixes for infer_string mode

---
 pandas/tests/dtypes/test_concat.py                |  6 ++++--
 pandas/tests/frame/constructors/test_from_dict.py |  3 ---
 pandas/tests/frame/test_constructors.py           |  3 ---
 pandas/tests/indexes/base_class/test_setops.py    |  1 -
 pandas/tests/indexes/datetimes/test_join.py       | 10 +++++++---
 pandas/tests/indexing/test_loc.py                 |  1 -
 pandas/tests/series/methods/test_combine_first.py |  5 +++--
 7 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/pandas/tests/dtypes/test_concat.py b/pandas/tests/dtypes/test_concat.py
index d4fe6c5264007..571e12d0c3303 100644
--- a/pandas/tests/dtypes/test_concat.py
+++ b/pandas/tests/dtypes/test_concat.py
@@ -47,7 +47,7 @@ def test_concat_periodarray_2d():
         _concat.concat_compat([arr[:2], arr[2:]], axis=1)
 
 
-def test_concat_series_between_empty_and_tzaware_series():
+def test_concat_series_between_empty_and_tzaware_series(using_infer_string):
     tzaware_time = pd.Timestamp("2020-01-01T00:00:00+00:00")
     ser1 = Series(index=[tzaware_time], data=0, dtype=float)
     ser2 = Series(dtype=float)
@@ -57,7 +57,9 @@ def test_concat_series_between_empty_and_tzaware_series():
         data=[
             (0.0, None),
         ],
-        index=pd.Index([tzaware_time], dtype=object),
+        index=[tzaware_time]
+        if using_infer_string
+        else pd.Index([tzaware_time], dtype=object),
         columns=[0, 1],
         dtype=float,
     )
diff --git a/pandas/tests/frame/constructors/test_from_dict.py b/pandas/tests/frame/constructors/test_from_dict.py
index 1509c47ba65c7..845174bbf600e 100644
--- a/pandas/tests/frame/constructors/test_from_dict.py
+++ b/pandas/tests/frame/constructors/test_from_dict.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas import (
     DataFrame,
     Index,
@@ -44,7 +42,6 @@ def test_constructor_single_row(self):
         )
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="columns inferring logic broken")
     def test_constructor_list_of_series(self):
         data = [
             OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]),
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index 9b6080603f0c9..037a2ae294bb2 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -21,8 +21,6 @@
 from numpy.ma import mrecords
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas._libs import lib
 from pandas.compat.numpy import np_version_gt2
 from pandas.errors import IntCastingNaNError
@@ -1974,7 +1972,6 @@ def test_constructor_with_datetimes4(self):
         df = DataFrame({"value": dr})
         assert str(df.iat[0, 0].tz) == "US/Eastern"
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_constructor_with_datetimes5(self):
         # GH 7822
         # preserver an index with a tz on dict construction
diff --git a/pandas/tests/indexes/base_class/test_setops.py b/pandas/tests/indexes/base_class/test_setops.py
index 4038e3b136ceb..d57df82b2358c 100644
--- a/pandas/tests/indexes/base_class/test_setops.py
+++ b/pandas/tests/indexes/base_class/test_setops.py
@@ -240,7 +240,6 @@ def test_tuple_union_bug(self, method, expected, sort):
     def test_union_name_preservation(
         self, first_list, second_list, first_name, second_name, expected_name, sort
     ):
-        # expected_dtype = object if not first_list or not second_list else "str"
         first = Index(first_list, name=first_name)
         second = Index(second_list, name=second_name)
         union = first.union(second, sort=sort)
diff --git a/pandas/tests/indexes/datetimes/test_join.py b/pandas/tests/indexes/datetimes/test_join.py
index d0ac32939296c..abf6809d67f9c 100644
--- a/pandas/tests/indexes/datetimes/test_join.py
+++ b/pandas/tests/indexes/datetimes/test_join.py
@@ -70,13 +70,17 @@ def test_join_utc_convert(self, join_type):
         assert isinstance(result, DatetimeIndex)
         assert result.tz is timezone.utc
 
-    def test_datetimeindex_union_join_empty(self, sort):
+    def test_datetimeindex_union_join_empty(self, sort, using_infer_string):
         dti = date_range(start="1/1/2001", end="2/1/2001", freq="D")
         empty = Index([])
 
         result = dti.union(empty, sort=sort)
-        expected = dti.astype("O")
-        tm.assert_index_equal(result, expected)
+        if using_infer_string:
+            assert isinstance(result, DatetimeIndex)
+            tm.assert_index_equal(result, dti)
+        else:
+            expected = dti.astype("O")
+            tm.assert_index_equal(result, expected)
 
         result = dti.join(empty)
         assert isinstance(result, DatetimeIndex)
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index 7c8e6026ad27c..17e610bda93e4 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -2000,7 +2000,6 @@ def test_loc_setitem_datetime_keys_cast(self, conv, using_infer_string):
         df.loc[conv(dt1), "one"] = 100
         df.loc[conv(dt2), "one"] = 200
 
-        # breakpoint()
         expected = DataFrame(
             {"one": [100.0, 200.0]},
             index=Index(
diff --git a/pandas/tests/series/methods/test_combine_first.py b/pandas/tests/series/methods/test_combine_first.py
index 293919173c2d5..51d6704e1905b 100644
--- a/pandas/tests/series/methods/test_combine_first.py
+++ b/pandas/tests/series/methods/test_combine_first.py
@@ -31,7 +31,7 @@ def test_combine_first_name(self, datetime_series):
         result = datetime_series.combine_first(datetime_series[:5])
         assert result.name == datetime_series.name
 
-    def test_combine_first(self):
+    def test_combine_first(self, using_infer_string):
         values = np.arange(20, dtype=np.float64)
         series = Series(values, index=np.arange(20, dtype=np.int64))
 
@@ -64,7 +64,8 @@ def test_combine_first(self):
         ser = Series([1.0, 2, 3], index=[0, 1, 2])
         empty = Series([], index=[], dtype=object)
         result = ser.combine_first(empty)
-        ser.index = ser.index.astype("O")
+        if not using_infer_string:
+            ser.index = ser.index.astype("O")
         tm.assert_series_equal(result, ser.astype(object))
 
     def test_combine_first_dt64(self, unit):

From 152c4ec0859a8acb1e43aaf7b5dea5549bf35715 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 26 Jan 2025 22:24:44 +0100
Subject: [PATCH 07/10] fix feather test for pyarrow<19

---
 pandas/tests/io/test_feather.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py
index 24af0a014dd50..e778193c147c1 100644
--- a/pandas/tests/io/test_feather.py
+++ b/pandas/tests/io/test_feather.py
@@ -143,8 +143,8 @@ def test_rw_use_threads(self):
     def test_path_pathlib(self):
         df = pd.DataFrame(
             1.1 * np.arange(120).reshape((30, 4)),
-            columns=pd.Index(list("ABCD"), dtype=object),
-            index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
+            columns=pd.Index(list("ABCD")),
+            index=pd.Index([f"i-{i}" for i in range(30)]),
         ).reset_index()
         result = tm.round_trip_pathlib(df.to_feather, read_feather)
         tm.assert_frame_equal(df, result)

From 33f7b5ca407044cf9414ae2a8a26006f30184aa2 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 29 Jan 2025 09:29:53 +0100
Subject: [PATCH 08/10] add explicit tests and fix insert for empty object
 dtype

---
 pandas/core/indexes/base.py                   |  9 ++++++
 pandas/tests/frame/indexing/test_setitem.py   | 20 ++++++++++--
 .../tests/frame/methods/test_reset_index.py   | 31 +++++++++++++++++++
 .../tests/indexes/base_class/test_reshape.py  |  2 +-
 pandas/tests/indexes/test_old_base.py         | 10 +++---
 5 files changed, 65 insertions(+), 7 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index f4411a20baaa7..06aad7c4e062d 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -6242,6 +6242,7 @@ def _find_common_type_compat(self, target) -> DtypeObj:
             # special case: if left or right is a zero-length RangeIndex or
             # Index[object], those can be created by the default empty constructors
             # -> for that case ignore this dtype and always return the other
+            # (https://github.com/pandas-dev/pandas/pull/60797)
             from pandas.core.indexes.range import RangeIndex
 
             if len(self) == 0 and (
@@ -6908,6 +6909,14 @@ def insert(self, loc: int, item) -> Index:
 
         arr = self._values
 
+        if using_string_dtype and len(self) == 0 and self.dtype == np.object_:
+            # special case: if we are an empty object-dtype Index, also
+            # take into account the inserted item for the resulting dtype
+            # (https://github.com/pandas-dev/pandas/pull/60797)
+            dtype = self._find_common_type_compat(item)
+            if dtype != self.dtype:
+                return self.astype(dtype).insert(loc, item)
+
         try:
             if isinstance(arr, ExtensionArray):
                 res_values = arr.insert(loc, item)
diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py
index d0f2eeae62320..20dd7b0c4d3e7 100644
--- a/pandas/tests/frame/indexing/test_setitem.py
+++ b/pandas/tests/frame/indexing/test_setitem.py
@@ -144,8 +144,9 @@ def test_setitem_different_dtype(self):
         )
         tm.assert_series_equal(result, expected)
 
-    def test_setitem_empty_columns(self):
-        # GH 13522
+    def test_setitem_overwrite_index(self):
+        # GH 13522 - assign the index as a column and then overwrite the values
+        # -> should not affect the index
         df = DataFrame(index=["A", "B", "C"])
         df["X"] = df.index
         df["X"] = ["x", "y", "z"]
@@ -154,6 +155,21 @@ def test_setitem_empty_columns(self):
         )
         tm.assert_frame_equal(df, exp)
 
+    def test_setitem_empty_columns(self):
+        # Starting from an empty DataFrame and setting a column should result
+        # in a default string dtype for the columns' Index
+        # https://github.com/pandas-dev/pandas/issues/60338
+
+        df = DataFrame()
+        df["foo"] = [1, 2, 3]
+        expected = DataFrame({"foo": [1, 2, 3]})
+        tm.assert_frame_equal(df, expected)
+
+        df = DataFrame(columns=Index([]))
+        df["foo"] = [1, 2, 3]
+        expected = DataFrame({"foo": [1, 2, 3]})
+        tm.assert_frame_equal(df, expected)
+
     def test_setitem_dt64_index_empty_columns(self):
         rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s")
         df = DataFrame(index=np.arange(len(rng)))
diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py
index 80da849cc59d4..80227c0462329 100644
--- a/pandas/tests/frame/methods/test_reset_index.py
+++ b/pandas/tests/frame/methods/test_reset_index.py
@@ -778,3 +778,34 @@ def test_reset_index_false_index_name():
     result_frame.reset_index()
     expected_frame = DataFrame(range(5, 10), RangeIndex(range(5), name=False))
     tm.assert_frame_equal(result_frame, expected_frame)
+
+
+@pytest.mark.parametrize("columns", [None, Index([])])
+def test_reset_index_with_empty_frame(columns):
+    # Currently empty DataFrame has RangeIndex or object dtype Index, but when
+    # resetting the index we still want to end up with the default string dtype
+    # https://github.com/pandas-dev/pandas/issues/60338
+
+    index = Index([], name="foo")
+    df = DataFrame(index=index, columns=columns)
+    result = df.reset_index()
+    expected = DataFrame(columns=["foo"])
+    tm.assert_frame_equal(result, expected)
+
+    index = Index([1, 2, 3], name="foo")
+    df = DataFrame(index=index, columns=columns)
+    result = df.reset_index()
+    expected = DataFrame({"foo": [1, 2, 3]})
+    tm.assert_frame_equal(result, expected)
+
+    index = MultiIndex.from_tuples([], names=["foo", "bar"])
+    df = DataFrame(index=index, columns=columns)
+    result = df.reset_index()
+    expected = DataFrame(columns=["foo", "bar"])
+    tm.assert_frame_equal(result, expected)
+
+    index = MultiIndex.from_tuples([(1, 2), (2, 3)], names=["foo", "bar"])
+    df = DataFrame(index=index, columns=columns)
+    result = df.reset_index()
+    expected = DataFrame({"foo": [1, 2], "bar": [2, 3]})
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/indexes/base_class/test_reshape.py b/pandas/tests/indexes/base_class/test_reshape.py
index 56cdca49cb2b0..d4932712de1bb 100644
--- a/pandas/tests/indexes/base_class/test_reshape.py
+++ b/pandas/tests/indexes/base_class/test_reshape.py
@@ -34,7 +34,7 @@ def test_insert(self):
 
         # test empty
         null_index = Index([])
-        tm.assert_index_equal(Index(["a"], dtype=object), null_index.insert(0, "a"))
+        tm.assert_index_equal(Index(["a"]), null_index.insert(0, "a"))
 
     def test_insert_missing(self, nulls_fixture, using_infer_string):
         # GH#22295
diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py
index 65feb07e05d9f..49609d28ca56e 100644
--- a/pandas/tests/indexes/test_old_base.py
+++ b/pandas/tests/indexes/test_old_base.py
@@ -454,10 +454,12 @@ def test_insert_out_of_bounds(self, index, using_infer_string):
         else:
             msg = "slice indices must be integers or None or have an __index__ method"
 
-        if using_infer_string and (
-            index.dtype == "string" or index.dtype == "category"
-        ):
-            msg = "loc must be an integer between"
+        if using_infer_string:
+            if index.dtype == "string" or index.dtype == "category":
+                msg = "loc must be an integer between"
+            elif index.dtype == "object" and len(index) == 0:
+                msg = "loc must be an integer between"
+                err = TypeError
 
         with pytest.raises(err, match=msg):
             index.insert(0.5, "foo")

From 46f9fe7a22e1e1d98617f2f3a8d19877cb3cda8d Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 29 Jan 2025 20:46:11 +0100
Subject: [PATCH 09/10] fixup

---
 pandas/core/indexes/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 06aad7c4e062d..c17c8c1d9a172 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -6909,7 +6909,7 @@ def insert(self, loc: int, item) -> Index:
 
         arr = self._values
 
-        if using_string_dtype and len(self) == 0 and self.dtype == np.object_:
+        if using_string_dtype() and len(self) == 0 and self.dtype == np.object_:
             # special case: if we are an empty object-dtype Index, also
             # take into account the inserted item for the resulting dtype
             # (https://github.com/pandas-dev/pandas/pull/60797)

From 73f56608b448e1618a16a9a1418f2d9e7d08bbee Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sat, 15 Feb 2025 17:35:57 +0100
Subject: [PATCH 10/10] add whatsnew note

---
 doc/source/whatsnew/v2.3.0.rst | 10 ++++++++++
 doc/source/whatsnew/v3.0.0.rst |  3 +++
 2 files changed, 13 insertions(+)

diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst
index 8bdddb5b7f85d..32d9253326277 100644
--- a/doc/source/whatsnew/v2.3.0.rst
+++ b/doc/source/whatsnew/v2.3.0.rst
@@ -53,6 +53,16 @@ These are bug fixes that might have notable behavior changes.
 notable_bug_fix1
 ^^^^^^^^^^^^^^^^
 
+.. _whatsnew_230.api_changes:
+
+API changes
+~~~~~~~~~~~
+
+- When enabling the ``future.infer_string`` option: Index set operations (like
+  union or intersection) will now ignore the dtype of an empty ``RangeIndex`` or
+  empty ``Index`` with object dtype when determining the dtype of the resulting
+  Index (:issue:`60797`)
+
 .. ---------------------------------------------------------------------------
 .. _whatsnew_230.deprecations:
 
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 4d9a45abe17cd..64e4a30453366 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -361,6 +361,9 @@ Other API changes
 - pickle and HDF (``.h5``) files created with Python 2 are no longer explicitly supported (:issue:`57387`)
 - pickled objects from pandas version less than ``1.0.0`` are no longer supported (:issue:`57155`)
 - when comparing the indexes in :func:`testing.assert_series_equal`, check_exact defaults to True if an :class:`Index` is of integer dtypes. (:issue:`57386`)
+- Index set operations (like union or intersection) will now ignore the dtype of
+  an empty ``RangeIndex`` or empty ``Index`` with object dtype when determining
+  the dtype of the resulting Index (:issue:`60797`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_300.deprecations: