From 5dd7d939c6db76dda21b7da95e0a9bbd8983c88c Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sun, 1 Oct 2023 21:56:14 +0100
Subject: [PATCH 01/36] Start fixing string tests

---
 pandas/conftest.py                            |  8 ++++++++
 pandas/core/config_init.py                    |  2 +-
 pandas/tests/frame/methods/test_astype.py     | 20 +++++++++++++------
 .../tests/frame/methods/test_combine_first.py |  6 ++++--
 .../frame/methods/test_convert_dtypes.py      |  6 +++++-
 pandas/tests/frame/methods/test_drop.py       |  2 +-
 .../frame/methods/test_drop_duplicates.py     |  2 +-
 pandas/tests/frame/methods/test_duplicated.py |  2 +-
 pandas/tests/frame/methods/test_equals.py     |  4 ++--
 pandas/tests/frame/methods/test_explode.py    |  2 +-
 pandas/tests/frame/methods/test_fillna.py     |  4 ++++
 .../frame/methods/test_get_numeric_data.py    |  6 +++---
 .../tests/frame/methods/test_interpolate.py   | 10 ++++++----
 .../methods/test_is_homogeneous_dtype.py      |  3 ++-
 pandas/tests/frame/methods/test_reindex.py    |  6 +++---
 15 files changed, 56 insertions(+), 27 deletions(-)

diff --git a/pandas/conftest.py b/pandas/conftest.py
index 62f22921f0482..415f8a8168607 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -1999,6 +1999,14 @@ def using_copy_on_write() -> bool:
     )
 
 
+@pytest.fixture
+def using_infer_string() -> bool:
+    """
+    Fixture to check if Copy-on-Write is enabled.
+    """
+    return pd.options.future.infer_string
+
+
 warsaws = ["Europe/Warsaw", "dateutil/Europe/Warsaw"]
 if zoneinfo is not None:
     warsaws.append(zoneinfo.ZoneInfo("Europe/Warsaw"))  # type: ignore[arg-type]
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index 4652acdcae287..c0201e13ed1bf 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -903,7 +903,7 @@ def register_converter_cb(key) -> None:
 with cf.config_prefix("future"):
     cf.register_option(
         "infer_string",
-        False,
+        os.environ.get("PANDAS_INFER_STRING", "0") == "0",
         "Whether to infer sequence of str objects as pyarrow string "
         "dtype, which will be the default in pandas 3.0 "
         "(at which point this option will be deprecated).",
diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py
index 6590f10c6b967..af0010beeb46d 100644
--- a/pandas/tests/frame/methods/test_astype.py
+++ b/pandas/tests/frame/methods/test_astype.py
@@ -157,7 +157,8 @@ def test_astype_str(self):
                 "c": [Timedelta(x)._repr_base() for x in c._values],
                 "d": list(map(str, d._values)),
                 "e": list(map(str, e._values)),
-            }
+            },
+            dtype="object",
         )
 
         tm.assert_frame_equal(result, expected)
@@ -165,13 +166,13 @@ def test_astype_str(self):
     def test_astype_str_float(self):
         # see GH#11302
         result = DataFrame([np.nan]).astype(str)
-        expected = DataFrame(["nan"])
+        expected = DataFrame(["nan"], dtype="object")
 
         tm.assert_frame_equal(result, expected)
         result = DataFrame([1.12345678901234567890]).astype(str)
 
         val = "1.1234567890123457"
-        expected = DataFrame([val])
+        expected = DataFrame([val], dtype="object")
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize("dtype_class", [dict, Series])
@@ -190,7 +191,7 @@ def test_astype_dict_like(self, dtype_class):
         expected = DataFrame(
             {
                 "a": a,
-                "b": Series(["0", "1", "2", "3", "4"]),
+                "b": Series(["0", "1", "2", "3", "4"], dtype="object"),
                 "c": c,
                 "d": Series([1.0, 2.0, 3.14, 4.0, 5.4], dtype="float32"),
             }
@@ -606,6 +607,7 @@ def test_astype_arg_for_errors_dictlist(self):
                 {"a": 2.2, "b": "15.3", "c": "another_test"},
             ]
         )
+        expected["c"] = expected["c"].astype("object")
         type_dict = {"a": "float64", "b": "float64", "c": "object"}
 
         result = df.astype(dtype=type_dict, errors="ignore")
@@ -666,6 +668,7 @@ def test_astype_dt64tz_to_str(self, timezone_frame):
                 ],
             ],
             columns=timezone_frame.columns,
+            dtype="object",
         )
         tm.assert_frame_equal(result, expected)
 
@@ -740,7 +743,9 @@ def test_astype_tz_object_conversion(self, tz):
         result = result.astype({"tz": "datetime64[ns, Europe/London]"})
         tm.assert_frame_equal(result, expected)
 
-    def test_astype_dt64_to_string(self, frame_or_series, tz_naive_fixture):
+    def test_astype_dt64_to_string(
+        self, frame_or_series, tz_naive_fixture, using_infer_string
+    ):
         # GH#41409
         tz = tz_naive_fixture
 
@@ -758,7 +763,10 @@ def test_astype_dt64_to_string(self, frame_or_series, tz_naive_fixture):
         item = result.iloc[0]
         if frame_or_series is DataFrame:
             item = item.iloc[0]
-        assert item is pd.NA
+        if using_infer_string:
+            assert item is np.nan
+        else:
+            assert item is pd.NA
 
         # For non-NA values, we should match what we get for non-EA str
         alt = obj.astype(str)
diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py
index 156e50d50a9ef..71ffbf20eb430 100644
--- a/pandas/tests/frame/methods/test_combine_first.py
+++ b/pandas/tests/frame/methods/test_combine_first.py
@@ -30,7 +30,7 @@ def test_combine_first_mixed(self):
         combined = f.combine_first(g)
         tm.assert_frame_equal(combined, exp)
 
-    def test_combine_first(self, float_frame):
+    def test_combine_first(self, float_frame, using_infer_string):
         # disjoint
         head, tail = float_frame[:5], float_frame[5:]
 
@@ -76,7 +76,9 @@ def test_combine_first(self, float_frame):
         tm.assert_series_equal(combined["A"].reindex(g.index), g["A"])
 
         # corner cases
-        comb = float_frame.combine_first(DataFrame())
+        warning = FutureWarning if using_infer_string else None
+        with tm.assert_produces_warning(warning, match="empty entries"):
+            comb = float_frame.combine_first(DataFrame())
         tm.assert_frame_equal(comb, float_frame)
 
         comb = DataFrame().combine_first(float_frame)
diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py
index c2b1016e88402..c3b04787ff5c1 100644
--- a/pandas/tests/frame/methods/test_convert_dtypes.py
+++ b/pandas/tests/frame/methods/test_convert_dtypes.py
@@ -11,9 +11,13 @@ class TestConvertDtypes:
     @pytest.mark.parametrize(
         "convert_integer, expected", [(False, np.dtype("int32")), (True, "Int32")]
     )
-    def test_convert_dtypes(self, convert_integer, expected, string_storage):
+    def test_convert_dtypes(
+        self, convert_integer, expected, string_storage, using_infer_string
+    ):
         # Specific types are tested in tests/series/test_dtypes.py
         # Just check that it works for DataFrame here
+        if using_infer_string and string_storage == "python":
+            string_storage = "pyarrow_numpy"
         df = pd.DataFrame(
             {
                 "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),
diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py
index 9a4882f11e961..694752be49ff1 100644
--- a/pandas/tests/frame/methods/test_drop.py
+++ b/pandas/tests/frame/methods/test_drop.py
@@ -510,7 +510,7 @@ def test_drop_with_duplicate_columns2(self):
 
     def test_drop_inplace_no_leftover_column_reference(self):
         # GH 13934
-        df = DataFrame({"a": [1, 2, 3]})
+        df = DataFrame({"a": [1, 2, 3]}, columns=Index(["a"], dtype="object"))
         a = df.a
         df.drop(["a"], axis=1, inplace=True)
         tm.assert_index_equal(df.columns, Index([], dtype="object"))
diff --git a/pandas/tests/frame/methods/test_drop_duplicates.py b/pandas/tests/frame/methods/test_drop_duplicates.py
index df12139258a6d..6bea97b2cf189 100644
--- a/pandas/tests/frame/methods/test_drop_duplicates.py
+++ b/pandas/tests/frame/methods/test_drop_duplicates.py
@@ -16,7 +16,7 @@
 def test_drop_duplicates_with_misspelled_column_name(subset):
     # GH 19730
     df = DataFrame({"A": [0, 0, 1], "B": [0, 0, 1], "C": [0, 0, 1]})
-    msg = re.escape("Index(['a'], dtype='object')")
+    msg = re.escape("Index(['a'], dtype=")
 
     with pytest.raises(KeyError, match=msg):
         df.drop_duplicates(subset)
diff --git a/pandas/tests/frame/methods/test_duplicated.py b/pandas/tests/frame/methods/test_duplicated.py
index 788aede805110..6052b61ea8db5 100644
--- a/pandas/tests/frame/methods/test_duplicated.py
+++ b/pandas/tests/frame/methods/test_duplicated.py
@@ -16,7 +16,7 @@
 def test_duplicated_with_misspelled_column_name(subset):
     # GH 19730
     df = DataFrame({"A": [0, 0, 1], "B": [0, 0, 1], "C": [0, 0, 1]})
-    msg = re.escape("Index(['a'], dtype='object')")
+    msg = re.escape("Index(['a'], dtype=")
 
     with pytest.raises(KeyError, match=msg):
         df.duplicated(subset)
diff --git a/pandas/tests/frame/methods/test_equals.py b/pandas/tests/frame/methods/test_equals.py
index 6fcf670f96ef0..d0b9d96cafa0d 100644
--- a/pandas/tests/frame/methods/test_equals.py
+++ b/pandas/tests/frame/methods/test_equals.py
@@ -14,11 +14,11 @@ def test_dataframe_not_equal(self):
         df2 = DataFrame({"a": ["s", "d"], "b": [1, 2]})
         assert df1.equals(df2) is False
 
-    def test_equals_different_blocks(self, using_array_manager):
+    def test_equals_different_blocks(self, using_array_manager, using_infer_string):
         # GH#9330
         df0 = DataFrame({"A": ["x", "y"], "B": [1, 2], "C": ["w", "z"]})
         df1 = df0.reset_index()[["A", "B", "C"]]
-        if not using_array_manager:
+        if not using_array_manager and not using_infer_string:
             # this assert verifies that the above operations have
             # induced a block rearrangement
             assert df0._mgr.blocks[0].dtype != df1._mgr.blocks[0].dtype
diff --git a/pandas/tests/frame/methods/test_explode.py b/pandas/tests/frame/methods/test_explode.py
index d1e4a603c5710..5cd54db62d783 100644
--- a/pandas/tests/frame/methods/test_explode.py
+++ b/pandas/tests/frame/methods/test_explode.py
@@ -203,7 +203,7 @@ def test_usecase():
 )
 def test_duplicate_index(input_dict, input_index, expected_dict, expected_index):
     # GH 28005
-    df = pd.DataFrame(input_dict, index=input_index)
+    df = pd.DataFrame(input_dict, index=input_index, dtype=object)
     result = df.explode("col1")
     expected = pd.DataFrame(expected_dict, index=expected_index, dtype=object)
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py
index 52b4b64ee279f..6cf859ddfce84 100644
--- a/pandas/tests/frame/methods/test_fillna.py
+++ b/pandas/tests/frame/methods/test_fillna.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_pyarrow_string_dtype
+
 import pandas.util._test_decorators as td
 
 from pandas import (
@@ -84,6 +86,7 @@ def test_fillna_datetime(self, datetime_frame):
         with pytest.raises(ValueError, match=msg):
             datetime_frame.fillna(5, method="ffill")
 
+    @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="can't fill 0 in string")
     def test_fillna_mixed_type(self, float_string_frame):
         mf = float_string_frame
         mf.loc[mf.index[5:20], "foo"] = np.nan
@@ -644,6 +647,7 @@ def test_fillna_col_reordering(self):
             filled = df.fillna(method="ffill")
         assert df.columns.tolist() == filled.columns.tolist()
 
+    @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="can't fill 0 in string")
     def test_fill_corner(self, float_frame, float_string_frame):
         mf = float_string_frame
         mf.loc[mf.index[5:20], "foo"] = np.nan
diff --git a/pandas/tests/frame/methods/test_get_numeric_data.py b/pandas/tests/frame/methods/test_get_numeric_data.py
index ec1c768603a59..c5d32d56d03c1 100644
--- a/pandas/tests/frame/methods/test_get_numeric_data.py
+++ b/pandas/tests/frame/methods/test_get_numeric_data.py
@@ -15,12 +15,12 @@
 class TestGetNumericData:
     def test_get_numeric_data_preserve_dtype(self):
         # get the numeric data
-        obj = DataFrame({"A": [1, "2", 3.0]})
+        obj = DataFrame({"A": [1, "2", 3.0]}, columns=Index(["A"], dtype="object"))
         result = obj._get_numeric_data()
         expected = DataFrame(dtype=object, index=pd.RangeIndex(3), columns=[])
         tm.assert_frame_equal(result, expected)
 
-    def test_get_numeric_data(self):
+    def test_get_numeric_data(self, using_infer_string):
         datetime64name = np.dtype("M8[s]").name
         objectname = np.dtype(np.object_).name
 
@@ -33,7 +33,7 @@ def test_get_numeric_data(self):
             [
                 np.dtype("float64"),
                 np.dtype("int64"),
-                np.dtype(objectname),
+                np.dtype(objectname) if not using_infer_string else "string",
                 np.dtype(datetime64name),
             ],
             index=["a", "b", "c", "f"],
diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py
index 291a79815a81c..bfaecbcdab818 100644
--- a/pandas/tests/frame/methods/test_interpolate.py
+++ b/pandas/tests/frame/methods/test_interpolate.py
@@ -67,7 +67,7 @@ def test_interpolate_inplace(self, frame_or_series, using_array_manager, request
         assert np.shares_memory(orig, obj.values)
         assert orig.squeeze()[1] == 1.5
 
-    def test_interp_basic(self, using_copy_on_write):
+    def test_interp_basic(self, using_copy_on_write, using_infer_string):
         df = DataFrame(
             {
                 "A": [1, 2, np.nan, 4],
@@ -85,7 +85,8 @@ def test_interp_basic(self, using_copy_on_write):
             }
         )
         msg = "DataFrame.interpolate with object dtype"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
+        warning = FutureWarning if not using_infer_string else None
+        with tm.assert_produces_warning(warning, match=msg):
             result = df.interpolate()
         tm.assert_frame_equal(result, expected)
 
@@ -108,7 +109,7 @@ def test_interp_basic(self, using_copy_on_write):
         assert np.shares_memory(df["C"]._values, cvalues)
         assert np.shares_memory(df["D"]._values, dvalues)
 
-    def test_interp_basic_with_non_range_index(self):
+    def test_interp_basic_with_non_range_index(self, using_infer_string):
         df = DataFrame(
             {
                 "A": [1, 2, np.nan, 4],
@@ -119,7 +120,8 @@ def test_interp_basic_with_non_range_index(self):
         )
 
         msg = "DataFrame.interpolate with object dtype"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
+        warning = FutureWarning if not using_infer_string else None
+        with tm.assert_produces_warning(warning, match=msg):
             result = df.set_index("C").interpolate()
         expected = df.set_index("C")
         expected.loc[3, "A"] = 3
diff --git a/pandas/tests/frame/methods/test_is_homogeneous_dtype.py b/pandas/tests/frame/methods/test_is_homogeneous_dtype.py
index a5f285d31301b..1fe28cb8eb856 100644
--- a/pandas/tests/frame/methods/test_is_homogeneous_dtype.py
+++ b/pandas/tests/frame/methods/test_is_homogeneous_dtype.py
@@ -25,7 +25,8 @@
                 {
                     "A": np.array([1, 2], dtype=object),
                     "B": np.array(["a", "b"], dtype=object),
-                }
+                },
+                dtype="object",
             ),
             True,
         ),
diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py
index 0105c41bd0eca..00e7ed1bf6487 100644
--- a/pandas/tests/frame/methods/test_reindex.py
+++ b/pandas/tests/frame/methods/test_reindex.py
@@ -119,7 +119,7 @@ def test_reindex_timestamp_with_fold(self, timezone, year, month, day, hour):
         exp = DataFrame({"index": ["1", "2"], "vals": [np.nan, np.nan]}).set_index(
             "index"
         )
-        exp = exp.astype(object)
+        exp = exp.astype(df.vals.dtype)
         tm.assert_frame_equal(
             df,
             exp,
@@ -837,8 +837,8 @@ def test_reindex_fill_value(self):
 
         # other dtypes
         df["foo"] = "foo"
-        result = df.reindex(range(15), fill_value=0)
-        expected = df.reindex(range(15)).fillna(0)
+        result = df.reindex(range(15), fill_value="0")
+        expected = df.reindex(range(15)).fillna("0")
         tm.assert_frame_equal(result, expected)
 
     def test_reindex_uint_dtypes_fill_value(self, any_unsigned_int_numpy_dtype):

From 9320144a66611e027ec95aa52383a7e22164654d Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sun, 1 Oct 2023 22:23:06 +0100
Subject: [PATCH 02/36] BUG: interpolate raising wrong error for ea

---
 doc/source/whatsnew/v2.1.2.rst                 | 1 +
 pandas/core/arrays/base.py                     | 4 ++--
 pandas/tests/frame/methods/test_interpolate.py | 6 ++++++
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.2.rst b/doc/source/whatsnew/v2.1.2.rst
index 1a25b848e0f84..87d95f0dee07f 100644
--- a/doc/source/whatsnew/v2.1.2.rst
+++ b/doc/source/whatsnew/v2.1.2.rst
@@ -22,6 +22,7 @@ Fixed regressions
 
 Bug fixes
 ~~~~~~~~~
+- Fixed bug in :meth:`DataFrame.interpolate` raising incorrect error message (:issue:`55347`)
 - Fixed bug in :meth:`DataFrame.resample` not respecting ``closed`` and ``label`` arguments for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55282`)
 - Fixed bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55281`)
 -
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 933944dbd4632..001e0a5f4996e 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -891,8 +891,8 @@ def interpolate(
         limit,
         limit_direction,
         limit_area,
-        fill_value,
-        copy: bool,
+        fill_value=None,
+        copy: bool = True,
         **kwargs,
     ) -> Self:
         """
diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py
index 291a79815a81c..67aa07dd83764 100644
--- a/pandas/tests/frame/methods/test_interpolate.py
+++ b/pandas/tests/frame/methods/test_interpolate.py
@@ -497,3 +497,9 @@ def test_interpolate_empty_df(self):
         result = df.interpolate(inplace=True)
         assert result is None
         tm.assert_frame_equal(df, expected)
+
+    def test_interpolate_ea_raise(self):
+        # GH#55347
+        df = DataFrame({"a": [1, None, 2]}, dtype="Int64")
+        with pytest.raises(NotImplementedError, match="does not implement"):
+            df.interpolate()

From be20fb20e4d2842d450badf12a5ceacec2a1dca8 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sun, 1 Oct 2023 22:45:01 +0100
Subject: [PATCH 03/36] Fix more tests

---
 pandas/tests/frame/methods/test_astype.py         |  2 +-
 pandas/tests/frame/methods/test_convert_dtypes.py |  2 +-
 pandas/tests/frame/methods/test_cov_corr.py       | 12 +++++++++---
 pandas/tests/frame/methods/test_interpolate.py    | 13 ++++++++++---
 4 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py
index af0010beeb46d..c5924090b070d 100644
--- a/pandas/tests/frame/methods/test_astype.py
+++ b/pandas/tests/frame/methods/test_astype.py
@@ -274,7 +274,7 @@ def test_astype_duplicate_col_series_arg(self):
         result = df.astype(dtypes)
         expected = DataFrame(
             {
-                0: vals[:, 0].astype(str),
+                0: Series(vals[:, 0].astype(str), dtype=object),
                 1: vals[:, 1],
                 2: pd.array(vals[:, 2], dtype="Float64"),
                 3: vals[:, 3],
diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py
index c3b04787ff5c1..c00d0418137f0 100644
--- a/pandas/tests/frame/methods/test_convert_dtypes.py
+++ b/pandas/tests/frame/methods/test_convert_dtypes.py
@@ -16,7 +16,7 @@ def test_convert_dtypes(
     ):
         # Specific types are tested in tests/series/test_dtypes.py
         # Just check that it works for DataFrame here
-        if using_infer_string and string_storage == "python":
+        if using_infer_string:
             string_storage = "pyarrow_numpy"
         df = pd.DataFrame(
             {
diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py
index 23a9656193d2c..652b441a56eb3 100644
--- a/pandas/tests/frame/methods/test_cov_corr.py
+++ b/pandas/tests/frame/methods/test_cov_corr.py
@@ -323,7 +323,7 @@ def test_corrwith(self, datetime_frame, dtype):
         for row in index[:4]:
             tm.assert_almost_equal(correls[row], df1.loc[row].corr(df2.loc[row]))
 
-    def test_corrwith_with_objects(self):
+    def test_corrwith_with_objects(self, using_infer_string):
         df1 = tm.makeTimeDataFrame()
         df2 = tm.makeTimeDataFrame()
         cols = ["A", "B", "C", "D"]
@@ -331,8 +331,14 @@ def test_corrwith_with_objects(self):
         df1["obj"] = "foo"
         df2["obj"] = "bar"
 
-        with pytest.raises(TypeError, match="Could not convert"):
-            df1.corrwith(df2)
+        if using_infer_string:
+            import pyarrow as pa
+
+            with pytest.raises(pa.lib.ArrowNotImplementedError, match="has no kernel"):
+                df1.corrwith(df2)
+        else:
+            with pytest.raises(TypeError, match="Could not convert"):
+                df1.corrwith(df2)
         result = df1.corrwith(df2, numeric_only=True)
         expected = df1.loc[:, cols].corrwith(df2.loc[:, cols])
         tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py
index 45ce8eaa741a1..1300d0808cedd 100644
--- a/pandas/tests/frame/methods/test_interpolate.py
+++ b/pandas/tests/frame/methods/test_interpolate.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_pyarrow_string_dtype
+
 from pandas.errors import ChainedAssignmentError
 import pandas.util._test_decorators as td
 
@@ -67,7 +69,10 @@ def test_interpolate_inplace(self, frame_or_series, using_array_manager, request
         assert np.shares_memory(orig, obj.values)
         assert orig.squeeze()[1] == 1.5
 
-    def test_interp_basic(self, using_copy_on_write, using_infer_string):
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="interpolate doesn't work for string"
+    )
+    def test_interp_basic(self, using_copy_on_write):
         df = DataFrame(
             {
                 "A": [1, 2, np.nan, 4],
@@ -85,8 +90,7 @@ def test_interp_basic(self, using_copy_on_write, using_infer_string):
             }
         )
         msg = "DataFrame.interpolate with object dtype"
-        warning = FutureWarning if not using_infer_string else None
-        with tm.assert_produces_warning(warning, match=msg):
+        with tm.assert_produces_warning(FutureWarning, match=msg):
             result = df.interpolate()
         tm.assert_frame_equal(result, expected)
 
@@ -109,6 +113,9 @@ def test_interp_basic(self, using_copy_on_write, using_infer_string):
         assert np.shares_memory(df["C"]._values, cvalues)
         assert np.shares_memory(df["D"]._values, dvalues)
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="interpolate doesn't work for string"
+    )
     def test_interp_basic_with_non_range_index(self, using_infer_string):
         df = DataFrame(
             {

From 2a3af77d003a829132c431f0ebc9b86fb35ca3af Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sun, 1 Oct 2023 22:58:33 +0100
Subject: [PATCH 04/36] REGR: join segfaulting for arrow string with nulls

---
 doc/source/whatsnew/v2.1.2.rst          | 2 +-
 pandas/core/reshape/merge.py            | 2 ++
 pandas/tests/frame/methods/test_join.py | 5 ++++-
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.2.rst b/doc/source/whatsnew/v2.1.2.rst
index 1a25b848e0f84..a5ba365f2d456 100644
--- a/doc/source/whatsnew/v2.1.2.rst
+++ b/doc/source/whatsnew/v2.1.2.rst
@@ -15,7 +15,7 @@ Fixed regressions
 ~~~~~~~~~~~~~~~~~
 - Fixed bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.MonthBegin` (:issue:`55271`)
 - Fixed bug where PDEP-6 warning about setting an item of an incompatible dtype was being shown when creating a new conditional column (:issue:`55025`)
--
+- Fixed regression in :meth:`DataFrame.join` where result has missing values and dtype is arrow backed string (:issue:`55348`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_212.bug_fixes:
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 4b9fcc80af4bb..ba6579a739f54 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -2443,6 +2443,8 @@ def _factorize_keys(
                 .astype(np.intp, copy=False),
                 len(dc.dictionary),
             )
+            if dc.null_count > 0:
+                count += 1
             if how == "right":
                 return rlab, llab, count
             return llab, rlab, count
diff --git a/pandas/tests/frame/methods/test_join.py b/pandas/tests/frame/methods/test_join.py
index 2d4ac1d4a4444..d131aa6be22f2 100644
--- a/pandas/tests/frame/methods/test_join.py
+++ b/pandas/tests/frame/methods/test_join.py
@@ -158,9 +158,12 @@ def test_join_invalid_validate(left_no_dup, right_no_dup):
         left_no_dup.merge(right_no_dup, on="a", validate="invalid")
 
 
-def test_join_on_single_col_dup_on_right(left_no_dup, right_w_dups):
+@pytest.mark.parametrize("dtype", ["object", "string[pyarrow]"])
+def test_join_on_single_col_dup_on_right(left_no_dup, right_w_dups, dtype):
     # GH 46622
     # Dups on right allowed by one_to_many constraint
+    left_no_dup = left_no_dup.astype(dtype)
+    right_w_dups.index = right_w_dups.index.astype(dtype)
     left_no_dup.join(
         right_w_dups,
         on="a",

From f48d3843514a7b3a9e5501d2ee4dd05863fd9cb3 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sun, 1 Oct 2023 23:56:48 +0100
Subject: [PATCH 05/36] Fix more tests

---
 pandas/tests/frame/methods/test_nlargest.py      |  2 +-
 pandas/tests/frame/methods/test_reset_index.py   | 12 ++++++++++--
 pandas/tests/frame/methods/test_select_dtypes.py | 12 +++++++++---
 pandas/tests/frame/methods/test_to_csv.py        |  5 +++--
 pandas/tests/frame/methods/test_update.py        | 12 +++++++++---
 5 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py
index 0bdf9a0e5c007..c5b10481781a2 100644
--- a/pandas/tests/frame/methods/test_nlargest.py
+++ b/pandas/tests/frame/methods/test_nlargest.py
@@ -86,7 +86,7 @@ def test_nlargest_n(self, df_strings, nselect_method, n, order):
         df = df_strings
         if "b" in order:
             error_msg = (
-                f"Column 'b' has dtype object, "
+                f"Column 'b' has dtype (object|string), "
                 f"cannot use method '{nselect_method}' with this dtype"
             )
             with pytest.raises(TypeError, match=error_msg):
diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py
index 339e19254fd10..8e8cfd6ba3885 100644
--- a/pandas/tests/frame/methods/test_reset_index.py
+++ b/pandas/tests/frame/methods/test_reset_index.py
@@ -683,10 +683,14 @@ def test_rest_index_multiindex_categorical_with_missing_values(self, codes):
         ),
     ],
 )
-def test_reset_index_dtypes_on_empty_frame_with_multiindex(array, dtype):
+def test_reset_index_dtypes_on_empty_frame_with_multiindex(
+    array, dtype, using_infer_string
+):
     # GH 19602 - Preserve dtype on empty DataFrame with MultiIndex
     idx = MultiIndex.from_product([[0, 1], [0.5, 1.0], array])
     result = DataFrame(index=idx)[:0].reset_index().dtypes
+    if using_infer_string and dtype == object:
+        dtype = "string"
     expected = Series({"level_0": np.int64, "level_1": np.float64, "level_2": dtype})
     tm.assert_series_equal(result, expected)
 
@@ -708,7 +712,9 @@ def test_reset_index_empty_frame_with_datetime64_multiindex():
     tm.assert_frame_equal(result, expected)
 
 
-def test_reset_index_empty_frame_with_datetime64_multiindex_from_groupby():
+def test_reset_index_empty_frame_with_datetime64_multiindex_from_groupby(
+    using_infer_string,
+):
     # https://github.com/pandas-dev/pandas/issues/35657
     df = DataFrame({"c1": [10.0], "c2": ["a"], "c3": pd.to_datetime("2020-01-01")})
     df = df.head(0).groupby(["c2", "c3"])[["c1"]].sum()
@@ -718,6 +724,8 @@ def test_reset_index_empty_frame_with_datetime64_multiindex_from_groupby():
     )
     expected["c3"] = expected["c3"].astype("datetime64[ns]")
     expected["c1"] = expected["c1"].astype("float64")
+    if using_infer_string:
+        expected["c2"] = expected["c2"].astype("string[pyarrow_numpy]")
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py
index 67dd5b6217187..b5b9b90598470 100644
--- a/pandas/tests/frame/methods/test_select_dtypes.py
+++ b/pandas/tests/frame/methods/test_select_dtypes.py
@@ -282,7 +282,7 @@ def test_select_dtypes_duplicate_columns(self):
         result = df.select_dtypes(include=[np.number], exclude=["floating"])
         tm.assert_frame_equal(result, expected)
 
-    def test_select_dtypes_not_an_attr_but_still_valid_dtype(self):
+    def test_select_dtypes_not_an_attr_but_still_valid_dtype(self, using_infer_string):
         df = DataFrame(
             {
                 "a": list("abc"),
@@ -296,11 +296,17 @@ def test_select_dtypes_not_an_attr_but_still_valid_dtype(self):
         df["g"] = df.f.diff()
         assert not hasattr(np, "u8")
         r = df.select_dtypes(include=["i8", "O"], exclude=["timedelta"])
-        e = df[["a", "b"]]
+        if using_infer_string:
+            e = df[["b"]]
+        else:
+            e = df[["a", "b"]]
         tm.assert_frame_equal(r, e)
 
         r = df.select_dtypes(include=["i8", "O", "timedelta64[ns]"])
-        e = df[["a", "b", "g"]]
+        if using_infer_string:
+            e = df[["b", "g"]]
+        else:
+            e = df[["a", "b", "g"]]
         tm.assert_frame_equal(r, e)
 
     def test_select_dtypes_empty(self):
diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py
index 9f45347c31165..83d0e68f28221 100644
--- a/pandas/tests/frame/methods/test_to_csv.py
+++ b/pandas/tests/frame/methods/test_to_csv.py
@@ -612,7 +612,7 @@ def _make_frame(names=None):
             tm.assert_index_equal(recons.columns, exp.columns)
             assert len(recons) == 0
 
-    def test_to_csv_interval_index(self):
+    def test_to_csv_interval_index(self, using_infer_string):
         # GH 28210
         df = DataFrame({"A": list("abc"), "B": range(3)}, index=pd.interval_range(0, 3))
 
@@ -622,7 +622,8 @@ def test_to_csv_interval_index(self):
 
             # can't roundtrip intervalindex via read_csv so check string repr (GH 23595)
             expected = df.copy()
-            expected.index = expected.index.astype(str)
+            if not using_infer_string:
+                expected.index = expected.index.astype(str)
 
             tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/frame/methods/test_update.py b/pandas/tests/frame/methods/test_update.py
index 5738a25f26fcb..803c7770d005d 100644
--- a/pandas/tests/frame/methods/test_update.py
+++ b/pandas/tests/frame/methods/test_update.py
@@ -150,11 +150,17 @@ def test_update_with_different_dtype(self, using_copy_on_write):
             with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
                 df["c"].update(Series(["foo"], index=[0]))
 
-        expected = DataFrame({"a": [1, 3], "b": [np.nan, 2], "c": ["foo", np.nan]})
+        expected = DataFrame(
+            {
+                "a": [1, 3],
+                "b": [np.nan, 2],
+                "c": Series(["foo", np.nan], dtype="object"),
+            }
+        )
         tm.assert_frame_equal(df, expected)
 
     @td.skip_array_manager_invalid_test
-    def test_update_modify_view(self, using_copy_on_write):
+    def test_update_modify_view(self, using_copy_on_write, using_infer_string):
         # GH#47188
         df = DataFrame({"A": ["1", np.nan], "B": ["100", np.nan]})
         df2 = DataFrame({"A": ["a", "x"], "B": ["100", "200"]})
@@ -163,7 +169,7 @@ def test_update_modify_view(self, using_copy_on_write):
         df2.update(df)
         expected = DataFrame({"A": ["1", "x"], "B": ["100", "200"]})
         tm.assert_frame_equal(df2, expected)
-        if using_copy_on_write:
+        if using_copy_on_write or using_infer_string:
             tm.assert_frame_equal(result_view, df2_orig)
         else:
             tm.assert_frame_equal(result_view, expected)

From 1cbeced2a865278a36110a90643c7bcc3cecc76e Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Mon, 2 Oct 2023 00:21:13 +0100
Subject: [PATCH 06/36] Fix more tests

---
 pandas/tests/frame/methods/test_fillna.py  |  22 ++++-
 pandas/tests/frame/methods/test_replace.py | 106 ++++++++++++++++++---
 2 files changed, 109 insertions(+), 19 deletions(-)

diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py
index 6cf859ddfce84..29dec49ac9e23 100644
--- a/pandas/tests/frame/methods/test_fillna.py
+++ b/pandas/tests/frame/methods/test_fillna.py
@@ -120,19 +120,27 @@ def test_fillna_empty(self, using_copy_on_write):
                 df.x.fillna(method=m, inplace=True)
                 df.x.fillna(method=m)
 
-    def test_fillna_different_dtype(self):
+    def test_fillna_different_dtype(self, using_infer_string):
         # with different dtype (GH#3386)
         df = DataFrame(
             [["a", "a", np.nan, "a"], ["b", "b", np.nan, "b"], ["c", "c", np.nan, "c"]]
         )
 
-        result = df.fillna({2: "foo"})
+        if using_infer_string:
+            with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
+                result = df.fillna({2: "foo"})
+        else:
+            result = df.fillna({2: "foo"})
         expected = DataFrame(
             [["a", "a", "foo", "a"], ["b", "b", "foo", "b"], ["c", "c", "foo", "c"]]
         )
         tm.assert_frame_equal(result, expected)
 
-        return_value = df.fillna({2: "foo"}, inplace=True)
+        if using_infer_string:
+            with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
+                return_value = df.fillna({2: "foo"}, inplace=True)
+        else:
+            return_value = df.fillna({2: "foo"}, inplace=True)
         tm.assert_frame_equal(df, expected)
         assert return_value is None
 
@@ -356,7 +364,7 @@ def test_fillna_dictlike_value_duplicate_colnames(self, columns):
         expected["A"] = 0.0
         tm.assert_frame_equal(result, expected)
 
-    def test_fillna_dtype_conversion(self):
+    def test_fillna_dtype_conversion(self, using_infer_string):
         # make sure that fillna on an empty frame works
         df = DataFrame(index=["A", "B", "C"], columns=[1, 2, 3, 4, 5])
         result = df.dtypes
@@ -371,7 +379,11 @@ def test_fillna_dtype_conversion(self):
 
         # empty block
         df = DataFrame(index=range(3), columns=["A", "B"], dtype="float64")
-        result = df.fillna("nan")
+        if using_infer_string:
+            with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
+                result = df.fillna("nan")
+        else:
+            result = df.fillna("nan")
         expected = DataFrame("nan", index=range(3), columns=["A", "B"])
         tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py
index f07c53060a06b..318bbdd4ec337 100644
--- a/pandas/tests/frame/methods/test_replace.py
+++ b/pandas/tests/frame/methods/test_replace.py
@@ -6,6 +6,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_pyarrow_string_dtype
+
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -28,6 +30,9 @@ def mix_abc() -> dict[str, list[float | str]]:
 
 
 class TestDataFrameReplace:
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't set float into string"
+    )
     def test_replace_inplace(self, datetime_frame, float_string_frame):
         datetime_frame.loc[datetime_frame.index[:5], "A"] = np.nan
         datetime_frame.loc[datetime_frame.index[-5:], "A"] = np.nan
@@ -278,14 +283,25 @@ def test_regex_replace_dict_nested(self, mix_abc):
         tm.assert_frame_equal(res3, expec)
         tm.assert_frame_equal(res4, expec)
 
-    def test_regex_replace_dict_nested_non_first_character(self, any_string_dtype):
+    def test_regex_replace_dict_nested_non_first_character(
+        self, any_string_dtype, using_infer_string
+    ):
         # GH 25259
         dtype = any_string_dtype
         df = DataFrame({"first": ["abc", "bca", "cab"]}, dtype=dtype)
-        expected = DataFrame({"first": [".bc", "bc.", "c.b"]}, dtype=dtype)
-        result = df.replace({"a": "."}, regex=True)
+        if using_infer_string and any_string_dtype == "object":
+            with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
+                result = df.replace({"a": "."}, regex=True)
+            expected = DataFrame({"first": [".bc", "bc.", "c.b"]})
+
+        else:
+            result = df.replace({"a": "."}, regex=True)
+            expected = DataFrame({"first": [".bc", "bc.", "c.b"]}, dtype=dtype)
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't set float into string"
+    )
     def test_regex_replace_dict_nested_gh4115(self):
         df = DataFrame({"Type": ["Q", "T", "Q", "Q", "T"], "tmp": 2})
         expected = DataFrame({"Type": [0, 1, 0, 0, 1], "tmp": 2})
@@ -294,6 +310,9 @@ def test_regex_replace_dict_nested_gh4115(self):
             result = df.replace({"Type": {"Q": 0, "T": 1}})
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't set float into string"
+    )
     def test_regex_replace_list_to_scalar(self, mix_abc):
         df = DataFrame(mix_abc)
         expec = DataFrame(
@@ -322,6 +341,9 @@ def test_regex_replace_list_to_scalar(self, mix_abc):
         tm.assert_frame_equal(res2, expec)
         tm.assert_frame_equal(res3, expec)
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't set float into string"
+    )
     def test_regex_replace_str_to_numeric(self, mix_abc):
         # what happens when you try to replace a numeric value with a regex?
         df = DataFrame(mix_abc)
@@ -337,6 +359,9 @@ def test_regex_replace_str_to_numeric(self, mix_abc):
         tm.assert_frame_equal(res2, expec)
         tm.assert_frame_equal(res3, expec)
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't set float into string"
+    )
     def test_regex_replace_regex_list_to_numeric(self, mix_abc):
         df = DataFrame(mix_abc)
         res = df.replace([r"\s*\.\s*", "b"], 0, regex=True)
@@ -415,12 +440,23 @@ def test_replace_regex_metachar(self, metachar):
         ],
     )
     def test_regex_replace_string_types(
-        self, data, to_replace, expected, frame_or_series, any_string_dtype
+        self,
+        data,
+        to_replace,
+        expected,
+        frame_or_series,
+        any_string_dtype,
+        using_infer_string,
     ):
         # GH-41333, GH-35977
         dtype = any_string_dtype
         obj = frame_or_series(data, dtype=dtype)
-        result = obj.replace(to_replace, regex=True)
+        if using_infer_string and any_string_dtype == "object":
+            with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
+                result = obj.replace(to_replace, regex=True)
+                dtype = "string[pyarrow_numpy]"
+        else:
+            result = obj.replace(to_replace, regex=True)
         expected = frame_or_series(expected, dtype=dtype)
 
         tm.assert_equal(result, expected)
@@ -522,6 +558,9 @@ def test_replace_series_dict(self):
         result = df.replace(s, df.mean())
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't set float into string"
+    )
     def test_replace_convert(self):
         # gh 3907
         df = DataFrame([["foo", "bar", "bah"], ["bar", "foo", "bah"]])
@@ -533,6 +572,9 @@ def test_replace_convert(self):
         res = rep.dtypes
         tm.assert_series_equal(expec, res)
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't set float into string"
+    )
     def test_replace_mixed(self, float_string_frame):
         mf = float_string_frame
         mf.iloc[5:20, mf.columns.get_loc("foo")] = np.nan
@@ -588,7 +630,7 @@ def test_replace_mixed_int_block_splitting(self):
         result = df.replace(0, 0.5)
         tm.assert_frame_equal(result, expected)
 
-    def test_replace_mixed2(self):
+    def test_replace_mixed2(self, using_infer_string):
         # to object block upcasting
         df = DataFrame(
             {
@@ -607,11 +649,15 @@ def test_replace_mixed2(self):
 
         expected = DataFrame(
             {
-                "A": Series(["foo", "bar"], dtype="object"),
+                "A": Series(["foo", "bar"]),
                 "B": Series([0, "foo"], dtype="object"),
             }
         )
-        result = df.replace([1, 2], ["foo", "bar"])
+        if using_infer_string:
+            with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
+                result = df.replace([1, 2], ["foo", "bar"])
+        else:
+            result = df.replace([1, 2], ["foo", "bar"])
         tm.assert_frame_equal(result, expected)
 
     def test_replace_mixed3(self):
@@ -894,6 +940,9 @@ def test_replace_input_formats_listlike(self):
         with pytest.raises(ValueError, match=msg):
             df.replace(to_rep, values[1:])
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't set float into string"
+    )
     def test_replace_input_formats_scalar(self):
         df = DataFrame(
             {"A": [np.nan, 0, np.inf], "B": [0, 2, 5], "C": ["", "asdf", "fd"]}
@@ -922,6 +971,9 @@ def test_replace_limit(self):
         # TODO
         pass
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't set float into string"
+    )
     def test_replace_dict_no_regex(self):
         answer = Series(
             {
@@ -945,6 +997,9 @@ def test_replace_dict_no_regex(self):
             result = answer.replace(weights)
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't set float into string"
+    )
     def test_replace_series_no_regex(self):
         answer = Series(
             {
@@ -1051,7 +1106,10 @@ def test_nested_dict_overlapping_keys_replace_str(self):
         expected = df.replace({"a": dict(zip(astr, bstr))})
         tm.assert_frame_equal(result, expected)
 
-    def test_replace_swapping_bug(self):
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't set float into string"
+    )
+    def test_replace_swapping_bug(self, using_infer_string):
         df = DataFrame({"a": [True, False, True]})
         res = df.replace({"a": {True: "Y", False: "N"}})
         expect = DataFrame({"a": ["Y", "N", "Y"]})
@@ -1062,6 +1120,9 @@ def test_replace_swapping_bug(self):
         expect = DataFrame({"a": ["Y", "N", "Y"]})
         tm.assert_frame_equal(res, expect)
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't set float into string"
+    )
     def test_replace_period(self):
         d = {
             "fname": {
@@ -1098,6 +1159,9 @@ def test_replace_period(self):
             result = df.replace(d)
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't set float into string"
+    )
     def test_replace_datetime(self):
         d = {
             "fname": {
@@ -1318,6 +1382,9 @@ def test_replace_commutative(self, df, to_replace, exp):
         result = df.replace(to_replace)
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't set float into string"
+    )
     @pytest.mark.parametrize(
         "replacer",
         [
@@ -1478,10 +1545,12 @@ def test_replace_with_compiled_regex(self):
         expected = DataFrame(["z", "b", "c"])
         tm.assert_frame_equal(result, expected)
 
-    def test_replace_intervals(self):
+    def test_replace_intervals(self, using_infer_string):
         # https://github.com/pandas-dev/pandas/issues/35931
         df = DataFrame({"a": [pd.Interval(0, 1), pd.Interval(0, 1)]})
-        result = df.replace({"a": {pd.Interval(0, 1): "x"}})
+        warning = FutureWarning if using_infer_string else None
+        with tm.assert_produces_warning(warning, match="Downcasting"):
+            result = df.replace({"a": {pd.Interval(0, 1): "x"}})
         expected = DataFrame({"a": ["x", "x"]})
         tm.assert_frame_equal(result, expected)
 
@@ -1582,6 +1651,9 @@ def test_regex_replace_scalar(
         expected.loc[expected["a"] == ".", "a"] = expected_replace_val
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't set float into string"
+    )
     @pytest.mark.parametrize("regex", [False, True])
     def test_replace_regex_dtype_frame(self, regex):
         # GH-48644
@@ -1619,9 +1691,15 @@ def test_replace_categorical_no_replacement(self):
         result = df.replace(to_replace=[".", "def"], value=["_", None])
         tm.assert_frame_equal(result, expected)
 
-    def test_replace_object_splitting(self):
+    def test_replace_object_splitting(self, using_infer_string):
         # GH#53977
         df = DataFrame({"a": ["a"], "b": "b"})
-        assert len(df._mgr.blocks) == 1
+        if using_infer_string:
+            assert len(df._mgr.blocks) == 2
+        else:
+            assert len(df._mgr.blocks) == 1
         df.replace(to_replace=r"^\s*$", value="", inplace=True, regex=True)
-        assert len(df._mgr.blocks) == 1
+        if using_infer_string:
+            assert len(df._mgr.blocks) == 2
+        else:
+            assert len(df._mgr.blocks) == 1

From 9dddb80915f24f3c915ae0203af90e5a8f41c84f Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Mon, 2 Oct 2023 15:08:47 +0100
Subject: [PATCH 07/36] BUG: rank raising for arrow string dtypes

---
 doc/source/whatsnew/v2.1.2.rst          |  2 +-
 pandas/core/arrays/arrow/array.py       | 31 ++++++++++++++++++++-----
 pandas/core/arrays/string_arrow.py      | 23 ++++++++++++++++++
 pandas/tests/frame/methods/test_rank.py | 11 +++++++++
 4 files changed, 60 insertions(+), 7 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.2.rst b/doc/source/whatsnew/v2.1.2.rst
index 1a25b848e0f84..a2fa7cf32746b 100644
--- a/doc/source/whatsnew/v2.1.2.rst
+++ b/doc/source/whatsnew/v2.1.2.rst
@@ -24,7 +24,7 @@ Bug fixes
 ~~~~~~~~~
 - Fixed bug in :meth:`DataFrame.resample` not respecting ``closed`` and ``label`` arguments for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55282`)
 - Fixed bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55281`)
--
+- Fixed bug in :meth:`Series.rank` for ``string[pyarrow_numpy]`` dtype (:issue:`TODO`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_212.other:
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 4b79d0dbb683e..37ebb69f1d73c 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -1708,7 +1708,7 @@ def __setitem__(self, key, value) -> None:
             data = pa.chunked_array([data])
         self._pa_array = data
 
-    def _rank(
+    def _rank_calc(
         self,
         *,
         axis: AxisInt = 0,
@@ -1717,9 +1717,6 @@ def _rank(
         ascending: bool = True,
         pct: bool = False,
     ):
-        """
-        See Series.rank.__doc__.
-        """
         if pa_version_under9p0 or axis != 0:
             ranked = super()._rank(
                 axis=axis,
@@ -1734,7 +1731,7 @@ def _rank(
             else:
                 pa_type = pa.uint64()
             result = pa.array(ranked, type=pa_type, from_pandas=True)
-            return type(self)(result)
+            return result
 
         data = self._pa_array.combine_chunks()
         sort_keys = "ascending" if ascending else "descending"
@@ -1773,7 +1770,29 @@ def _rank(
                 divisor = pc.count(result)
             result = pc.divide(result, divisor)
 
-        return type(self)(result)
+        return result
+
+    def _rank(
+        self,
+        *,
+        axis: AxisInt = 0,
+        method: str = "average",
+        na_option: str = "keep",
+        ascending: bool = True,
+        pct: bool = False,
+    ):
+        """
+        See Series.rank.__doc__.
+        """
+        return type(self)(
+            self._rank_calc(
+                axis=axis,
+                method=method,
+                na_option=na_option,
+                ascending=ascending,
+                pct=pct,
+            )
+        )
 
     def _quantile(self, qs: npt.NDArray[np.float64], interpolation: str) -> Self:
         """
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 6262055827428..b0ddd082e9c8e 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -53,6 +53,7 @@
     from collections.abc import Sequence
 
     from pandas._typing import (
+        AxisInt,
         Dtype,
         Scalar,
         npt,
@@ -501,6 +502,28 @@ def _str_find(self, sub: str, start: int = 0, end: int | None = None):
     def _convert_int_dtype(self, result):
         return Int64Dtype().__from_arrow__(result)
 
+    def _rank(
+        self,
+        *,
+        axis: AxisInt = 0,
+        method: str = "average",
+        na_option: str = "keep",
+        ascending: bool = True,
+        pct: bool = False,
+    ):
+        """
+        See Series.rank.__doc__.
+        """
+        return self._convert_int_dtype(
+            self._rank_calc(
+                axis=axis,
+                method=method,
+                na_option=na_option,
+                ascending=ascending,
+                pct=pct,
+            )
+        )
+
 
 class ArrowStringArrayNumpySemantics(ArrowStringArray):
     _storage = "pyarrow_numpy"
diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py
index 8b451c84dc5da..b037169a8241d 100644
--- a/pandas/tests/frame/methods/test_rank.py
+++ b/pandas/tests/frame/methods/test_rank.py
@@ -488,3 +488,14 @@ def test_rank_mixed_axis_zero(self, data, expected):
             df.rank()
         result = df.rank(numeric_only=True)
         tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "dtype, exp_dtype",
+        [("string[pyarrow]", "Int64"), ("string[pyarrow_numpy]", "float64")],
+    )
+    def test_rank_string_dtype(self, dtype, exp_dtype):
+        # GH#
+        obj = Series(["foo", "foo", None, "foo"], dtype=dtype)
+        result = obj.rank(method="first")
+        expected = Series([1, 2, None, 3], dtype=exp_dtype)
+        tm.assert_series_equal(result, expected)

From e07f63930e455e1698e41413fc309587b7d3074a Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Mon, 2 Oct 2023 15:31:26 +0100
Subject: [PATCH 08/36] BUG: eq not implemented for categorical and arrow
 backed strings

---
 doc/source/whatsnew/v2.1.2.rst                  | 2 +-
 pandas/core/arrays/arrow/array.py               | 5 ++++-
 pandas/tests/indexes/categorical/test_equals.py | 6 ++++++
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.2.rst b/doc/source/whatsnew/v2.1.2.rst
index 1a25b848e0f84..7bee10d687def 100644
--- a/doc/source/whatsnew/v2.1.2.rst
+++ b/doc/source/whatsnew/v2.1.2.rst
@@ -24,7 +24,7 @@ Bug fixes
 ~~~~~~~~~
 - Fixed bug in :meth:`DataFrame.resample` not respecting ``closed`` and ``label`` arguments for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55282`)
 - Fixed bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55281`)
--
+- Fixed bug in :meth:`Categorical.equals` if other has arrow backed string dtype (:issue:`TODO`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_212.other:
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 4b79d0dbb683e..4937581b0abdd 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -32,6 +32,7 @@
 
 from pandas.core.dtypes.cast import can_hold_element
 from pandas.core.dtypes.common import (
+    CategoricalDtype,
     is_array_like,
     is_bool_dtype,
     is_integer,
@@ -627,7 +628,9 @@ def __setstate__(self, state) -> None:
 
     def _cmp_method(self, other, op):
         pc_func = ARROW_CMP_FUNCS[op.__name__]
-        if isinstance(other, (ArrowExtensionArray, np.ndarray, list, BaseMaskedArray)):
+        if isinstance(
+            other, (ArrowExtensionArray, np.ndarray, list, BaseMaskedArray)
+        ) or isinstance(getattr(other, "dtype", None), CategoricalDtype):
             result = pc_func(self._pa_array, self._box_pa(other))
         elif is_scalar(other):
             try:
diff --git a/pandas/tests/indexes/categorical/test_equals.py b/pandas/tests/indexes/categorical/test_equals.py
index 1ed8f3a903439..c693e1181b9d0 100644
--- a/pandas/tests/indexes/categorical/test_equals.py
+++ b/pandas/tests/indexes/categorical/test_equals.py
@@ -88,3 +88,9 @@ def test_equals_multiindex(self):
         ci = mi.to_flat_index().astype("category")
 
         assert not ci.equals(mi)
+
+    def test_equals_string_dtype(self, any_string_dtype):
+        # GH#
+        idx = CategoricalIndex(list("abc"), name="B")
+        other = Index(["a", "b", "c"], name="B", dtype=any_string_dtype)
+        assert idx.equals(other)

From 4c074c11783e617180e396f39ce28ca00001e39a Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Mon, 2 Oct 2023 15:31:44 +0100
Subject: [PATCH 09/36] More tests

---
 pandas/tests/frame/methods/test_dtypes.py | 2 +-
 pandas/tests/frame/methods/test_rank.py   | 8 ++++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/frame/methods/test_dtypes.py b/pandas/tests/frame/methods/test_dtypes.py
index 4bdf16977dae6..00ced2978d85b 100644
--- a/pandas/tests/frame/methods/test_dtypes.py
+++ b/pandas/tests/frame/methods/test_dtypes.py
@@ -146,5 +146,5 @@ def test_frame_apply_np_array_return_type(self):
         # GH 35517
         df = DataFrame([["foo"]])
         result = df.apply(lambda col: np.array("bar"))
-        expected = Series(["bar"])
+        expected = Series([np.array(["bar"])])
         tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py
index 8b451c84dc5da..7b191bad7f2f7 100644
--- a/pandas/tests/frame/methods/test_rank.py
+++ b/pandas/tests/frame/methods/test_rank.py
@@ -13,6 +13,7 @@
 
 from pandas import (
     DataFrame,
+    Index,
     Series,
 )
 import pandas._testing as tm
@@ -478,12 +479,15 @@ def test_rank_object_first(self, frame_or_series, na_option, ascending, expected
     @pytest.mark.parametrize(
         "data,expected",
         [
-            ({"a": [1, 2, "a"], "b": [4, 5, 6]}, DataFrame({"b": [1.0, 2.0, 3.0]})),
+            (
+                {"a": [1, 2, "a"], "b": [4, 5, 6]},
+                DataFrame({"b": [1.0, 2.0, 3.0]}, columns=Index(["b"], dtype=object)),
+            ),
             ({"a": [1, 2, "a"]}, DataFrame(index=range(3), columns=[])),
         ],
     )
     def test_rank_mixed_axis_zero(self, data, expected):
-        df = DataFrame(data)
+        df = DataFrame(data, columns=Index(list(data.keys()), dtype=object))
         with pytest.raises(TypeError, match="'<' not supported between instances of"):
             df.rank()
         result = df.rank(numeric_only=True)

From bf4b3cae85c68b73e6b459ab9bc8b582e728cc0c Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Mon, 2 Oct 2023 15:43:40 +0100
Subject: [PATCH 10/36] BUG: ndim of string block incorrect with string
 inference

---
 pandas/core/internals/construction.py   | 2 +-
 pandas/tests/frame/test_constructors.py | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index 6f30bc650aa36..d6aeda3d418ed 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -383,7 +383,7 @@ def ndarray_to_mgr(
             new_block(
                 dtype.construct_array_type()._from_sequence(data, dtype=dtype),
                 BlockPlacement(slice(i, i + 1)),
-                ndim=1,
+                ndim=2,
             )
             for i, data in enumerate(obj_columns)
         ]
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index fd851ab244cb8..01fb8cf53c7cd 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -2743,6 +2743,12 @@ def test_frame_string_inference_array_string_dtype(self):
             df = DataFrame(np.array([["a", "c"], ["b", "d"]]), columns=["a", "b"])
         tm.assert_frame_equal(df, expected)
 
+    def test_frame_string_inference_block_dim(self):
+        # GH#
+        with pd.option_context("future.infer_string", True):
+            df = DataFrame(np.array([["hello", "goodbye"], ["hello", "Hello"]]))
+        assert df._mgr.blocks[0].ndim == 2
+
 
 class TestDataFrameConstructorIndexInference:
     def test_frame_from_dict_of_series_overlapping_monthly_period_indexes(self):

From 3b5974d6247d85fe668e3ca13f5ba4fadb82fe91 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Mon, 2 Oct 2023 15:46:39 +0100
Subject: [PATCH 11/36] Fix test

---
 pandas/tests/frame/methods/test_rank.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py
index ededae5cec77f..d4b572d96a677 100644
--- a/pandas/tests/frame/methods/test_rank.py
+++ b/pandas/tests/frame/methods/test_rank.py
@@ -470,10 +470,14 @@ def test_rank_inf_nans_na_option(
             ("top", False, [2.0, 3.0, 1.0, 4.0]),
         ],
     )
-    def test_rank_object_first(self, frame_or_series, na_option, ascending, expected):
+    def test_rank_object_first(
+        self, frame_or_series, na_option, ascending, expected, using_infer_string
+    ):
         obj = frame_or_series(["foo", "foo", None, "foo"])
         result = obj.rank(method="first", na_option=na_option, ascending=ascending)
         expected = frame_or_series(expected)
+        if using_infer_string:
+            expected = expected.astype("uint64")
         tm.assert_equal(result, expected)
 
     @pytest.mark.parametrize(

From df81cc03e36814187b5c14b1ecc0ebe91905a174 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Mon, 2 Oct 2023 16:05:38 +0100
Subject: [PATCH 12/36] Fix tests

---
 pandas/tests/frame/methods/test_replace.py      | 8 ++++++++
 pandas/tests/frame/methods/test_to_csv.py       | 4 +++-
 pandas/tests/frame/methods/test_value_counts.py | 2 +-
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py
index 318bbdd4ec337..e825931669f84 100644
--- a/pandas/tests/frame/methods/test_replace.py
+++ b/pandas/tests/frame/methods/test_replace.py
@@ -447,11 +447,19 @@ def test_regex_replace_string_types(
         frame_or_series,
         any_string_dtype,
         using_infer_string,
+        request,
     ):
         # GH-41333, GH-35977
         dtype = any_string_dtype
         obj = frame_or_series(data, dtype=dtype)
         if using_infer_string and any_string_dtype == "object":
+            if len(to_replace) > 1 and isinstance(obj, DataFrame):
+                request.node.add_marker(
+                    pytest.mark.xfail(
+                        reason="object input array that gets downcasted raises on "
+                        "second pass"
+                    )
+                )
             with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
                 result = obj.replace(to_replace, regex=True)
                 dtype = "string[pyarrow_numpy]"
diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py
index 83d0e68f28221..8fb6a13be8899 100644
--- a/pandas/tests/frame/methods/test_to_csv.py
+++ b/pandas/tests/frame/methods/test_to_csv.py
@@ -622,7 +622,9 @@ def test_to_csv_interval_index(self, using_infer_string):
 
             # can't roundtrip intervalindex via read_csv so check string repr (GH 23595)
             expected = df.copy()
-            if not using_infer_string:
+            if using_infer_string:
+                expected.index = expected.index.astype("string[pyarrow_numpy]")
+            else:
                 expected.index = expected.index.astype(str)
 
             tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/frame/methods/test_value_counts.py b/pandas/tests/frame/methods/test_value_counts.py
index f30db91f82b60..fa177754b4ff3 100644
--- a/pandas/tests/frame/methods/test_value_counts.py
+++ b/pandas/tests/frame/methods/test_value_counts.py
@@ -147,7 +147,7 @@ def test_data_frame_value_counts_dropna_false(nulls_fixture):
         index=pd.MultiIndex(
             levels=[
                 pd.Index(["Anne", "Beth", "John"]),
-                pd.Index(["Louise", "Smith", nulls_fixture]),
+                pd.Index(["Louise", "Smith", np.nan]),  # GH#48476
             ],
             codes=[[0, 1, 2, 2], [2, 0, 1, 2]],
             names=["first_name", "middle_name"],

From 74e09e4e8a62c81ebdf94016b06a7c31acacb39b Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Mon, 2 Oct 2023 16:09:10 +0100
Subject: [PATCH 13/36] Fix tests

---
 pandas/tests/frame/methods/test_align.py | 4 ++--
 pandas/tests/frame/methods/test_rank.py  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/frame/methods/test_align.py b/pandas/tests/frame/methods/test_align.py
index 87a56c0736287..cf111ba740861 100644
--- a/pandas/tests/frame/methods/test_align.py
+++ b/pandas/tests/frame/methods/test_align.py
@@ -107,7 +107,7 @@ def test_align_float(self, float_frame, using_copy_on_write):
             af, bf = float_frame.align(
                 other.iloc[:, 0], join="inner", axis=1, method=None, fill_value=None
             )
-        tm.assert_index_equal(bf.index, Index([]))
+        tm.assert_index_equal(bf.index, Index([]).astype(bf.index.dtype))
 
         msg = (
             "The 'method', 'limit', and 'fill_axis' keywords in DataFrame.align "
@@ -117,7 +117,7 @@ def test_align_float(self, float_frame, using_copy_on_write):
             af, bf = float_frame.align(
                 other.iloc[:, 0], join="inner", axis=1, method=None, fill_value=0
             )
-        tm.assert_index_equal(bf.index, Index([]))
+        tm.assert_index_equal(bf.index, Index([]).astype(bf.index.dtype))
 
         # Try to align DataFrame to Series along bad axis
         msg = "No axis named 2 for object type DataFrame"
diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py
index d4b572d96a677..3ed8b3d455546 100644
--- a/pandas/tests/frame/methods/test_rank.py
+++ b/pandas/tests/frame/methods/test_rank.py
@@ -476,7 +476,7 @@ def test_rank_object_first(
         obj = frame_or_series(["foo", "foo", None, "foo"])
         result = obj.rank(method="first", na_option=na_option, ascending=ascending)
         expected = frame_or_series(expected)
-        if using_infer_string:
+        if using_infer_string and isinstance(obj, Series):
             expected = expected.astype("uint64")
         tm.assert_equal(result, expected)
 

From 255267f0adda46454778acc58fcc9b5ea4ec1e7c Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Mon, 2 Oct 2023 16:25:34 +0100
Subject: [PATCH 14/36] Fix more indexing tests

---
 pandas/tests/frame/indexing/test_getitem.py  |  2 +-
 pandas/tests/frame/indexing/test_indexing.py | 33 ++++++++++++++------
 pandas/tests/frame/indexing/test_setitem.py  |  2 +-
 3 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/pandas/tests/frame/indexing/test_getitem.py b/pandas/tests/frame/indexing/test_getitem.py
index 9d9324f557c8d..ed66bdf6c5a25 100644
--- a/pandas/tests/frame/indexing/test_getitem.py
+++ b/pandas/tests/frame/indexing/test_getitem.py
@@ -106,7 +106,7 @@ def test_getitem_list_duplicates(self):
 
     def test_getitem_dupe_cols(self):
         df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"])
-        msg = "\"None of [Index(['baf'], dtype='object')] are in the [columns]\""
+        msg = "\"None of [Index(['baf'], dtype=.*)] are in the [columns]\""
         with pytest.raises(KeyError, match=re.escape(msg)):
             df[["baf"]]
 
diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
index 370cbf0f33174..1fd27b057cbdc 100644
--- a/pandas/tests/frame/indexing/test_indexing.py
+++ b/pandas/tests/frame/indexing/test_indexing.py
@@ -288,7 +288,7 @@ def test_setattr_column(self):
         df.foobar = 5
         assert (df.foobar == 5).all()
 
-    def test_setitem(self, float_frame, using_copy_on_write):
+    def test_setitem(self, float_frame, using_copy_on_write, using_infer_string):
         # not sure what else to do here
         series = float_frame["A"][::2]
         float_frame["col5"] = series
@@ -331,7 +331,10 @@ def test_setitem(self, float_frame, using_copy_on_write):
             with pytest.raises(SettingWithCopyError, match=msg):
                 smaller["col10"] = ["1", "2"]
 
-        assert smaller["col10"].dtype == np.object_
+        if using_infer_string:
+            assert smaller["col10"].dtype == "string"
+        else:
+            assert smaller["col10"].dtype == np.object_
         assert (smaller["col10"] == ["1", "2"]).all()
 
     def test_setitem2(self):
@@ -426,7 +429,7 @@ def test_setitem_cast(self, float_frame):
         float_frame["something"] = 2.5
         assert float_frame["something"].dtype == np.float64
 
-    def test_setitem_corner(self, float_frame):
+    def test_setitem_corner(self, float_frame, using_infer_string):
         # corner case
         df = DataFrame({"B": [1.0, 2.0, 3.0], "C": ["a", "b", "c"]}, index=np.arange(3))
         del df["B"]
@@ -463,10 +466,16 @@ def test_setitem_corner(self, float_frame):
         dm["foo"] = "bar"
         del dm["foo"]
         dm["foo"] = "bar"
-        assert dm["foo"].dtype == np.object_
+        if using_infer_string:
+            assert dm["foo"].dtype == "string"
+        else:
+            assert dm["foo"].dtype == np.object_
 
         dm["coercible"] = ["1", "2", "3"]
-        assert dm["coercible"].dtype == np.object_
+        if using_infer_string:
+            assert dm["coercible"].dtype == "string"
+        else:
+            assert dm["coercible"].dtype == np.object_
 
     def test_setitem_corner2(self):
         data = {
@@ -483,7 +492,7 @@ def test_setitem_corner2(self):
         assert df.loc[1, "title"] == "foobar"
         assert df.loc[1, "cruft"] == 0
 
-    def test_setitem_ambig(self):
+    def test_setitem_ambig(self, using_infer_string):
         # Difficulties with mixed-type data
         # Created as float type
         dm = DataFrame(index=range(3), columns=range(3))
@@ -499,18 +508,22 @@ def test_setitem_ambig(self):
 
         dm[2] = uncoercable_series
         assert len(dm.columns) == 3
-        assert dm[2].dtype == np.object_
+        if using_infer_string:
+            assert dm[2].dtype == "string"
+        else:
+            assert dm[2].dtype == np.object_
 
-    def test_setitem_None(self, float_frame):
+    def test_setitem_None(self, float_frame, using_infer_string):
         # GH #766
         float_frame[None] = float_frame["A"]
+        key = None if not using_infer_string else np.nan
         tm.assert_series_equal(
             float_frame.iloc[:, -1], float_frame["A"], check_names=False
         )
         tm.assert_series_equal(
-            float_frame.loc[:, None], float_frame["A"], check_names=False
+            float_frame.loc[:, key], float_frame["A"], check_names=False
         )
-        tm.assert_series_equal(float_frame[None], float_frame["A"], check_names=False)
+        tm.assert_series_equal(float_frame[key], float_frame["A"], check_names=False)
         repr(float_frame)
 
     def test_loc_setitem_boolean_mask_allfalse(self):
diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py
index ccc1249088f9a..0ccd181ebfc6c 100644
--- a/pandas/tests/frame/indexing/test_setitem.py
+++ b/pandas/tests/frame/indexing/test_setitem.py
@@ -1298,7 +1298,7 @@ def test_setitem_column_frame_as_category(self):
         df["col2"] = Series([1, 2, 3], dtype="category")
 
         expected_types = Series(
-            ["int64", "category", "category"], index=[0, "col1", "col2"]
+            ["int64", "category", "category"], index=[0, "col1", "col2"], dtype=object
         )
         tm.assert_series_equal(df.dtypes, expected_types)
 

From a62342106b27860624d38c239330872881eb10c3 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Mon, 2 Oct 2023 16:31:58 +0100
Subject: [PATCH 15/36] BUG: Index.insert raising when inserting None into new
 string dtype

---
 doc/source/whatsnew/v2.1.2.rst                  | 1 +
 pandas/core/arrays/string_arrow.py              | 5 +++++
 pandas/tests/indexes/base_class/test_reshape.py | 8 ++++++++
 3 files changed, 14 insertions(+)

diff --git a/doc/source/whatsnew/v2.1.2.rst b/doc/source/whatsnew/v2.1.2.rst
index 1a25b848e0f84..5a5380177620e 100644
--- a/doc/source/whatsnew/v2.1.2.rst
+++ b/doc/source/whatsnew/v2.1.2.rst
@@ -24,6 +24,7 @@ Bug fixes
 ~~~~~~~~~
 - Fixed bug in :meth:`DataFrame.resample` not respecting ``closed`` and ``label`` arguments for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55282`)
 - Fixed bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55281`)
+- Fixed bug in :meth:`Index.insert` raising when inserting ``None`` into :class:`Index` with ``dtype="string[pyarrow_numpy]"`` (issue:`TODO`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 6262055827428..8cb6e5a0e9db2 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -613,3 +613,8 @@ def _reduce(
             )
         else:
             return super()._reduce(name, skipna=skipna, keepdims=keepdims, **kwargs)
+
+    def insert(self, loc: int, item) -> ArrowStringArrayNumpySemantics:
+        if item is np.nan:
+            item = libmissing.NA
+        return super().insert(loc, item)
diff --git a/pandas/tests/indexes/base_class/test_reshape.py b/pandas/tests/indexes/base_class/test_reshape.py
index 5ecb2c753644d..886793b299ba2 100644
--- a/pandas/tests/indexes/base_class/test_reshape.py
+++ b/pandas/tests/indexes/base_class/test_reshape.py
@@ -54,6 +54,14 @@ def test_insert_datetime_into_object(self, loc, val):
         tm.assert_index_equal(result, expected)
         assert type(expected[2]) is type(val)
 
+    def test_insert_none_into_string_numpy(self):
+        # GH#
+        pytest.importorskip("pyarrow")
+        index = Index(["a", "b", "c"], dtype="string[pyarrow_numpy]")
+        result = index.insert(-1, None)
+        expected = Index(["a", "b", None, "c"], dtype="string[pyarrow_numpy]")
+        tm.assert_index_equal(result, expected)
+
     @pytest.mark.parametrize(
         "pos,expected",
         [

From 3cf79ef7b911ea4177f559705d3a76cfb0904073 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Mon, 2 Oct 2023 16:55:08 +0100
Subject: [PATCH 16/36] Fix tests

---
 pandas/tests/frame/indexing/test_getitem.py | 2 +-
 pandas/tests/frame/indexing/test_where.py   | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/frame/indexing/test_getitem.py b/pandas/tests/frame/indexing/test_getitem.py
index ed66bdf6c5a25..bf503cb090b45 100644
--- a/pandas/tests/frame/indexing/test_getitem.py
+++ b/pandas/tests/frame/indexing/test_getitem.py
@@ -106,7 +106,7 @@ def test_getitem_list_duplicates(self):
 
     def test_getitem_dupe_cols(self):
         df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"])
-        msg = "\"None of [Index(['baf'], dtype=.*)] are in the [columns]\""
+        msg = "\"None of [Index(['baf'], dtype="
         with pytest.raises(KeyError, match=re.escape(msg)):
             df[["baf"]]
 
diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py
index 4576a86ad27cd..0572798b8ce61 100644
--- a/pandas/tests/frame/indexing/test_where.py
+++ b/pandas/tests/frame/indexing/test_where.py
@@ -1079,9 +1079,13 @@ def test_where_producing_ea_cond_for_np_dtype():
 @pytest.mark.parametrize(
     "replacement", [0.001, True, "snake", None, datetime(2022, 5, 4)]
 )
-def test_where_int_overflow(replacement):
+def test_where_int_overflow(replacement, using_infer_string, request):
     # GH 31687
     df = DataFrame([[1.0, 2e25, "nine"], [np.nan, 0.1, None]])
+    if using_infer_string and replacement not in (None, "snake"):
+        request.node.add_marker(
+            pytest.mark.xfail(reason="Can't set non-string into string column")
+        )
     result = df.where(pd.notnull(df), replacement)
     expected = DataFrame([[1.0, 2e25, "nine"], [replacement, 0.1, replacement]])
 

From e823c97a39615193265ac8d96718c0a47edcd11b Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Mon, 2 Oct 2023 16:58:28 +0100
Subject: [PATCH 17/36] BUG: Inserting ndim=0 array does not infer string dtype

---
 doc/source/whatsnew/v2.1.2.rst               |  2 +-
 pandas/core/construction.py                  |  9 ++++++++-
 pandas/tests/frame/indexing/test_indexing.py | 12 ++++++++++++
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.2.rst b/doc/source/whatsnew/v2.1.2.rst
index 1a25b848e0f84..b6c86621537f8 100644
--- a/doc/source/whatsnew/v2.1.2.rst
+++ b/doc/source/whatsnew/v2.1.2.rst
@@ -24,7 +24,7 @@ Bug fixes
 ~~~~~~~~~
 - Fixed bug in :meth:`DataFrame.resample` not respecting ``closed`` and ``label`` arguments for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55282`)
 - Fixed bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55281`)
--
+- Fixed bug in :meth:`DataFrame.__setitem__` not inferring string dtype for zero-dimensional array with ``infer_string=True`` (:issue:`TODO`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_212.other:
diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index aaac0dc73486f..bb1b4978d2367 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -562,7 +562,14 @@ def sanitize_array(
     if not is_list_like(data):
         if index is None:
             raise ValueError("index must be specified when data is not list-like")
-        data = construct_1d_arraylike_from_scalar(data, len(index), dtype)
+        if isinstance(data, str) and using_pyarrow_string_dtype():
+            from pandas.core.arrays.string_ import StringDtype
+
+            dtype = StringDtype("pyarrow_numpy")
+            data = dtype.construct_array_type()._from_sequence_of_strings([data])
+        else:
+            data = construct_1d_arraylike_from_scalar(data, len(index), dtype)
+
         return data
 
     elif isinstance(data, ABCExtensionArray):
diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
index 370cbf0f33174..ac4a820b1b319 100644
--- a/pandas/tests/frame/indexing/test_indexing.py
+++ b/pandas/tests/frame/indexing/test_indexing.py
@@ -1905,6 +1905,18 @@ def test_adding_new_conditional_column() -> None:
     tm.assert_frame_equal(df, expected)
 
 
+def test_add_new_column_infer_string():
+    # GH#
+    df = DataFrame({"x": [1]})
+    with pd.option_context("future.infer_string", True):
+        df.loc[df["x"] == 1, "y"] = "1"
+    expected = DataFrame(
+        {"x": [1], "y": Series(["1"], dtype="string[pyarrow_numpy]")},
+        columns=Index(["x", "y"], dtype="string[pyarrow_numpy]"),
+    )
+    tm.assert_frame_equal(df, expected)
+
+
 class TestSetitemValidation:
     # This is adapted from pandas/tests/arrays/masked/test_indexing.py
     # but checks for warnings instead of errors.

From c89da874207cd95abcfe96ec894738385387c019 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Mon, 2 Oct 2023 17:06:32 +0100
Subject: [PATCH 18/36] Fix tests

---
 pandas/core/construction.py                   | 4 +++-
 pandas/tests/frame/indexing/test_set_value.py | 8 +++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index bb1b4978d2367..1b0150dbab602 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -566,7 +566,9 @@ def sanitize_array(
             from pandas.core.arrays.string_ import StringDtype
 
             dtype = StringDtype("pyarrow_numpy")
-            data = dtype.construct_array_type()._from_sequence_of_strings([data])
+            data = dtype.construct_array_type()._from_sequence_of_strings(
+                [data] * len(index)
+            )
         else:
             data = construct_1d_arraylike_from_scalar(data, len(index), dtype)
 
diff --git a/pandas/tests/frame/indexing/test_set_value.py b/pandas/tests/frame/indexing/test_set_value.py
index 32312868adacb..1e3c793c8449f 100644
--- a/pandas/tests/frame/indexing/test_set_value.py
+++ b/pandas/tests/frame/indexing/test_set_value.py
@@ -16,7 +16,7 @@ def test_set_value(self, float_frame):
                 float_frame._set_value(idx, col, 1)
                 assert float_frame[col][idx] == 1
 
-    def test_set_value_resize(self, float_frame):
+    def test_set_value_resize(self, float_frame, using_infer_string):
         res = float_frame._set_value("foobar", "B", 0)
         assert res is None
         assert float_frame.index[-1] == "foobar"
@@ -27,8 +27,10 @@ def test_set_value_resize(self, float_frame):
 
         res = float_frame.copy()
         res._set_value("foobar", "baz", "sam")
-        assert res["baz"].dtype == np.object_
-
+        if using_infer_string:
+            assert res["baz"].dtype == "string"
+        else:
+            assert res["baz"].dtype == np.object_
         res = float_frame.copy()
         with tm.assert_produces_warning(
             FutureWarning, match="Setting an item of incompatible dtype"

From 04f3d9dc95daaca2e94099c1905625451c3632e6 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Mon, 2 Oct 2023 17:44:04 +0100
Subject: [PATCH 19/36] Fix tests

---
 pandas/tests/frame/test_reductions.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index e66557f132c1d..a4bcdbc702e65 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -590,14 +590,15 @@ def test_mode_dropna(self, dropna, expected):
         expected = DataFrame(expected)
         tm.assert_frame_equal(result, expected)
 
-    def test_mode_sortwarning(self):
+    def test_mode_sortwarning(self, using_infer_string):
         # Check for the warning that is raised when the mode
         # results cannot be sorted
 
         df = DataFrame({"A": [np.nan, np.nan, "a", "a"]})
         expected = DataFrame({"A": ["a", np.nan]})
 
-        with tm.assert_produces_warning(UserWarning):
+        warning = None if using_infer_string else UserWarning
+        with tm.assert_produces_warning(warning):
             result = df.mode(dropna=False)
             result = result.sort_values(by="A").reset_index(drop=True)
 
@@ -1273,7 +1274,8 @@ def test_any_datetime(self):
     def test_any_all_bool_only(self):
         # GH 25101
         df = DataFrame(
-            {"col1": [1, 2, 3], "col2": [4, 5, 6], "col3": [None, None, None]}
+            {"col1": [1, 2, 3], "col2": [4, 5, 6], "col3": [None, None, None]},
+            columns=Index(["col1", "col2", "col3"], dtype=object),
         )
 
         result = df.all(bool_only=True)

From 28f5411cdf5f7a1290caa9754a319b60c019d5fd Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Mon, 2 Oct 2023 18:04:05 +0100
Subject: [PATCH 20/36] Fix more tests

---
 pandas/tests/frame/test_reductions.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index a4bcdbc702e65..94e3f40b61d52 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -1237,7 +1237,9 @@ def test_any_all_extra(self):
     @pytest.mark.parametrize("axis", [0, 1])
     @pytest.mark.parametrize("bool_agg_func", ["any", "all"])
     @pytest.mark.parametrize("skipna", [True, False])
-    def test_any_all_object_dtype(self, axis, bool_agg_func, skipna):
+    def test_any_all_object_dtype(
+        self, axis, bool_agg_func, skipna, using_infer_string
+    ):
         # GH#35450
         df = DataFrame(
             data=[
@@ -1247,8 +1249,13 @@ def test_any_all_object_dtype(self, axis, bool_agg_func, skipna):
                 [np.nan, np.nan, "5", np.nan],
             ]
         )
+        if using_infer_string:
+            # na in object is True while in string pyarrow numpy it's false
+            val = False if axis == 0 and not skipna and bool_agg_func == "all" else True
+        else:
+            val = True
         result = getattr(df, bool_agg_func)(axis=axis, skipna=skipna)
-        expected = Series([True, True, True, True])
+        expected = Series([True, True, val, True])
         tm.assert_series_equal(result, expected)
 
     # GH#50947 deprecates this but it is not emitting a warning in some builds.

From ca296ec5b4efc412155adc285219a6a64ab24b92 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Tue, 3 Oct 2023 22:41:42 +0200
Subject: [PATCH 21/36] Fix more tests

---
 pandas/tests/frame/test_reductions.py | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index 3fe2076f91356..0ef230e448a75 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -167,11 +167,17 @@ class TestDataFrameAnalytics:
             pytest.param("kurt", marks=td.skip_if_no_scipy),
         ],
     )
-    def test_stat_op_api_float_string_frame(self, float_string_frame, axis, opname):
-        if (opname in ("sum", "min", "max") and axis == 0) or opname in (
-            "count",
-            "nunique",
-        ):
+    def test_stat_op_api_float_string_frame(
+        self, float_string_frame, axis, opname, using_infer_string
+    ):
+        if (
+            (opname in ("sum", "min", "max") and axis == 0)
+            or opname
+            in (
+                "count",
+                "nunique",
+            )
+        ) and not (using_infer_string and opname == "sum"):
             getattr(float_string_frame, opname)(axis=axis)
         else:
             if opname in ["var", "std", "sem", "skew", "kurt"]:
@@ -197,7 +203,11 @@ def test_stat_op_api_float_string_frame(self, float_string_frame, axis, opname):
             elif opname in ["min", "max"]:
                 msg = "'[><]=' not supported between instances of 'float' and 'str'"
             elif opname == "median":
-                msg = re.compile(r"Cannot convert \[.*\] to numeric", flags=re.S)
+                msg = re.compile(
+                    r"Cannot convert \[.*\] to numeric|does not support", flags=re.S
+                )
+            if not isinstance(msg, re.Pattern):
+                msg = msg + "|does not support"
             with pytest.raises(TypeError, match=msg):
                 getattr(float_string_frame, opname)(axis=axis)
         if opname != "nunique":
@@ -358,6 +368,7 @@ def test_mixed_ops(self, op):
                 "Could not convert",
                 "could not convert",
                 "can't multiply sequence by non-int",
+                "does not support",
             ]
         )
         with pytest.raises(TypeError, match=msg):
@@ -369,6 +380,7 @@ def test_mixed_ops(self, op):
                     "Could not convert",
                     "could not convert",
                     "can't multiply sequence by non-int",
+                    "does not support",
                 ]
             )
             with pytest.raises(TypeError, match=msg):
@@ -887,7 +899,8 @@ def test_sum_mixed_datetime(self):
 
     def test_mean_corner(self, float_frame, float_string_frame):
         # unit test when have object data
-        with pytest.raises(TypeError, match="Could not convert"):
+        msg = "Could not convert|does not support"
+        with pytest.raises(TypeError, match=msg):
             float_string_frame.mean(axis=0)
 
         # xs sum mixed type, just want to know it works...

From ab35982e4bba6b49a32e619c733a5507484f8fe3 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Tue, 3 Oct 2023 22:50:07 +0200
Subject: [PATCH 22/36] BUG: idxmax raising for arrow strings

---
 pandas/core/arrays/arrow/array.py     | 11 ++++++++++-
 pandas/core/arrays/string_arrow.py    |  8 ++++++++
 pandas/tests/frame/test_reductions.py |  9 +++++++++
 3 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 2c788411eb089..9743ca891d4b8 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -1627,6 +1627,15 @@ def _reduce(
         ------
         TypeError : subclass does not define reductions
         """
+        result = self._reduce_calc(name, skipna=skipna, keepdims=keepdims, **kwargs)
+        if isinstance(result, pa.Array):
+            return type(self)(result)
+        else:
+            return result
+
+    def _reduce_calc(
+        self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs
+    ):
         pa_result = self._reduce_pyarrow(name, skipna=skipna, **kwargs)
 
         if keepdims:
@@ -1637,7 +1646,7 @@ def _reduce(
                     [pa_result],
                     type=to_pyarrow_type(infer_dtype_from_scalar(pa_result)[0]),
                 )
-            return type(self)(result)
+            return result
 
         if pc.is_null(pa_result).as_py():
             return self.dtype.na_value
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index e904123849821..33fcdf56d31cb 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -501,6 +501,14 @@ def _str_find(self, sub: str, start: int = 0, end: int | None = None):
     def _convert_int_dtype(self, result):
         return Int64Dtype().__from_arrow__(result)
 
+    def _reduce(
+        self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs
+    ):
+        result = self._reduce_calc(name, skipna=skipna, keepdims=keepdims, **kwargs)
+        if name in ("argmin", "argmax") and isinstance(result, pa.Array):
+            return self._convert_int_dtype(result)
+        return type(self)(result)
+
 
 class ArrowStringArrayNumpySemantics(ArrowStringArray):
     _storage = "pyarrow_numpy"
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index 77f64b18a82f8..1fcc08946cb04 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -1069,6 +1069,15 @@ def test_idxmax_arrow_types(self):
         expected = Series([2, 1], index=["a", "b"])
         tm.assert_series_equal(result, expected)
 
+        df = DataFrame({"a": ["b", "c", "a"]}, dtype="string[pyarrow]")
+        result = df.idxmax(numeric_only=False)
+        expected = Series([1], index=["a"])
+        tm.assert_series_equal(result, expected)
+
+        result = df.idxmin(numeric_only=False)
+        expected = Series([2], index=["a"])
+        tm.assert_series_equal(result, expected)
+
     def test_idxmax_axis_2(self, float_frame):
         frame = float_frame
         msg = "No axis named 2 for object type DataFrame"

From d910efac8126b1845dad9f64269b09ba525a41b1 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Tue, 3 Oct 2023 22:51:10 +0200
Subject: [PATCH 23/36] Fix

---
 pandas/core/arrays/string_arrow.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 33fcdf56d31cb..5f800e781d2fa 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -507,7 +507,10 @@ def _reduce(
         result = self._reduce_calc(name, skipna=skipna, keepdims=keepdims, **kwargs)
         if name in ("argmin", "argmax") and isinstance(result, pa.Array):
             return self._convert_int_dtype(result)
-        return type(self)(result)
+        elif isinstance(result, pa.Array):
+            return type(self)(result)
+        else:
+            return result
 
 
 class ArrowStringArrayNumpySemantics(ArrowStringArray):

From d0221e35a05132447ad6a79d6db675b5148bd287 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Tue, 3 Oct 2023 23:01:53 +0200
Subject: [PATCH 24/36] Fix more tests

---
 pandas/tests/frame/test_arithmetic.py | 11 ++++++++---
 pandas/tests/frame/test_query_eval.py | 17 ++++++++++++-----
 pandas/tests/frame/test_reductions.py |  4 ++--
 3 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
index bb9a76829c77d..f27e51cb98ea7 100644
--- a/pandas/tests/frame/test_arithmetic.py
+++ b/pandas/tests/frame/test_arithmetic.py
@@ -417,8 +417,8 @@ def test_bool_flex_frame_complex_dtype(self):
 
     def test_bool_flex_frame_object_dtype(self):
         # corner, dtype=object
-        df1 = DataFrame({"col": ["foo", np.nan, "bar"]})
-        df2 = DataFrame({"col": ["foo", datetime.now(), "bar"]})
+        df1 = DataFrame({"col": ["foo", np.nan, "bar"]}, dtype=object)
+        df2 = DataFrame({"col": ["foo", datetime.now(), "bar"]}, dtype=object)
         result = df1.ne(df2)
         exp = DataFrame({"col": [False, True, False]})
         tm.assert_frame_equal(result, exp)
@@ -1997,7 +1997,12 @@ def test_dataframe_blockwise_slicelike():
     "df, col_dtype",
     [
         (DataFrame([[1.0, 2.0], [4.0, 5.0]], columns=list("ab")), "float64"),
-        (DataFrame([[1.0, "b"], [4.0, "b"]], columns=list("ab")), "object"),
+        (
+            DataFrame([[1.0, "b"], [4.0, "b"]], columns=list("ab")).astype(
+                {"b": object}
+            ),
+            "object",
+        ),
     ],
 )
 def test_dataframe_operation_with_non_numeric_types(df, col_dtype):
diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py
index 72e8236159bda..880775f709dcb 100644
--- a/pandas/tests/frame/test_query_eval.py
+++ b/pandas/tests/frame/test_query_eval.py
@@ -1031,7 +1031,7 @@ def test_query_with_string_columns(self, parser, engine):
             with pytest.raises(NotImplementedError, match=msg):
                 df.query("a in b and c < d", parser=parser, engine=engine)
 
-    def test_object_array_eq_ne(self, parser, engine):
+    def test_object_array_eq_ne(self, parser, engine, using_infer_string):
         df = DataFrame(
             {
                 "a": list("aaaabbbbcccc"),
@@ -1040,11 +1040,14 @@ def test_object_array_eq_ne(self, parser, engine):
                 "d": np.random.default_rng(2).integers(9, size=12),
             }
         )
-        res = df.query("a == b", parser=parser, engine=engine)
+        warning = RuntimeWarning if using_infer_string and engine == "numexpr" else None
+        with tm.assert_produces_warning(warning):
+            res = df.query("a == b", parser=parser, engine=engine)
         exp = df[df.a == df.b]
         tm.assert_frame_equal(res, exp)
 
-        res = df.query("a != b", parser=parser, engine=engine)
+        with tm.assert_produces_warning(warning):
+            res = df.query("a != b", parser=parser, engine=engine)
         exp = df[df.a != df.b]
         tm.assert_frame_equal(res, exp)
 
@@ -1083,12 +1086,16 @@ def test_query_with_nested_special_character(self, parser, engine):
             [">=", operator.ge],
         ],
     )
-    def test_query_lex_compare_strings(self, parser, engine, op, func):
+    def test_query_lex_compare_strings(
+        self, parser, engine, op, func, using_infer_string
+    ):
         a = Series(np.random.default_rng(2).choice(list("abcde"), 20))
         b = Series(np.arange(a.size))
         df = DataFrame({"X": a, "Y": b})
 
-        res = df.query(f'X {op} "d"', engine=engine, parser=parser)
+        warning = RuntimeWarning if using_infer_string and engine == "numexpr" else None
+        with tm.assert_produces_warning(warning):
+            res = df.query(f'X {op} "d"', engine=engine, parser=parser)
         expected = df[func(df.X, "d")]
         tm.assert_frame_equal(res, expected)
 
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index 7f7ab4b8989c4..1fe2b3c8ff9b8 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -1888,7 +1888,7 @@ def test_minmax_extensionarray(method, numeric_only):
     expected = Series(
         [getattr(int64_info, method)],
         dtype="Int64",
-        index=Index(["Int64"], dtype="object"),
+        index=Index(["Int64"]),
     )
     tm.assert_series_equal(result, expected)
 
@@ -1906,7 +1906,7 @@ def test_prod_sum_min_count_mixed_object():
     df = DataFrame([1, "a", True])
 
     result = df.prod(axis=0, min_count=1, numeric_only=False)
-    expected = Series(["a"])
+    expected = Series(["a"], dtype=object)
     tm.assert_series_equal(result, expected)
 
     msg = re.escape("unsupported operand type(s) for +: 'int' and 'str'")

From 18d5e62da95b06c07eefc4477a039daead8636e3 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Tue, 3 Oct 2023 23:21:39 +0200
Subject: [PATCH 25/36] Fix more tests

---
 pandas/tests/frame/test_query_eval.py    |  2 +-
 pandas/tests/frame/test_stack_unstack.py | 23 +++++++++++++++--------
 pandas/tests/frame/test_unary.py         |  1 +
 3 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py
index 880775f709dcb..bbff4a507d8f4 100644
--- a/pandas/tests/frame/test_query_eval.py
+++ b/pandas/tests/frame/test_query_eval.py
@@ -1169,7 +1169,7 @@ def test_bool_arith_expr(self, frame, parser, engine):
     @pytest.mark.parametrize("op", ["+", "-", "*", "/"])
     def test_invalid_type_for_operator_raises(self, parser, engine, op):
         df = DataFrame({"a": [1, 2], "b": ["c", "d"]})
-        msg = r"unsupported operand type\(s\) for .+: '.+' and '.+'"
+        msg = r"unsupported operand type\(s\) for .+: '.+' and '.+'|Cannot"
 
         with pytest.raises(TypeError, match=msg):
             df.eval(f"a {op} b", engine=engine, parser=parser)
diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py
index 9b76ae093e8c4..bbb3d08e0e566 100644
--- a/pandas/tests/frame/test_stack_unstack.py
+++ b/pandas/tests/frame/test_stack_unstack.py
@@ -602,7 +602,7 @@ def test_unstack_to_series(self, float_frame):
             data = data.unstack()
         tm.assert_frame_equal(old_data, data)
 
-    def test_unstack_dtypes(self):
+    def test_unstack_dtypes(self, using_infer_string):
         # GH 2929
         rows = [[1, 1, 3, 4], [1, 2, 3, 4], [2, 1, 3, 4], [2, 2, 3, 4]]
 
@@ -638,8 +638,9 @@ def test_unstack_dtypes(self):
         df2["D"] = "foo"
         df3 = df2.unstack("B")
         result = df3.dtypes
+        dtype = "string" if using_infer_string else np.dtype("object")
         expected = Series(
-            [np.dtype("float64")] * 2 + [np.dtype("object")] * 2,
+            [np.dtype("float64")] * 2 + [dtype] * 2,
             index=MultiIndex.from_arrays(
                 [["C", "C", "D", "D"], [1, 2, 1, 2]], names=(None, "B")
             ),
@@ -1321,14 +1322,16 @@ def test_unstack_fill_frame_object():
     # By default missing values will be NaN
     result = data.unstack()
     expected = DataFrame(
-        {"a": ["a", np.nan, "a"], "b": ["b", "c", np.nan]}, index=list("xyz")
+        {"a": ["a", np.nan, "a"], "b": ["b", "c", np.nan]},
+        index=list("xyz"),
+        dtype=object,
     )
     tm.assert_frame_equal(result, expected)
 
     # Fill with any value replaces missing values as expected
     result = data.unstack(fill_value="d")
     expected = DataFrame(
-        {"a": ["a", "d", "a"], "b": ["b", "c", "d"]}, index=list("xyz")
+        {"a": ["a", "d", "a"], "b": ["b", "c", "d"]}, index=list("xyz"), dtype=object
     )
     tm.assert_frame_equal(result, expected)
 
@@ -2013,7 +2016,7 @@ def test_stack_multiple_bug(self, future_stack):
         multi = df.set_index(["DATE", "ID"])
         multi.columns.name = "Params"
         unst = multi.unstack("ID")
-        msg = re.escape("agg function failed [how->mean,dtype->object]")
+        msg = re.escape("agg function failed [how->mean,dtype->")
         with pytest.raises(TypeError, match=msg):
             unst.resample("W-THU").mean()
         down = unst.resample("W-THU").mean(numeric_only=True)
@@ -2210,7 +2213,7 @@ def test_stack_unstack_unordered_multiindex(self, future_stack):
         tm.assert_frame_equal(result, expected)
 
     def test_unstack_preserve_types(
-        self, multiindex_year_month_day_dataframe_random_data
+        self, multiindex_year_month_day_dataframe_random_data, using_infer_string
     ):
         # GH#403
         ymd = multiindex_year_month_day_dataframe_random_data
@@ -2219,7 +2222,11 @@ def test_unstack_preserve_types(
 
         unstacked = ymd.unstack("month")
         assert unstacked["A", 1].dtype == np.float64
-        assert unstacked["E", 1].dtype == np.object_
+        assert (
+            unstacked["E", 1].dtype == np.object_
+            if not using_infer_string
+            else "string"
+        )
         assert unstacked["F", 1].dtype == np.float64
 
     def test_unstack_group_index_overflow(self, future_stack):
@@ -2279,7 +2286,7 @@ def test_unstack_with_missing_int_cast_to_float(self, using_array_manager):
 
         expected = DataFrame(
             [[10.0, 10.0, 1.0, 1.0], [np.nan, 10.0, 0.0, 1.0]],
-            index=Index(["A", "B"], dtype="object", name="a"),
+            index=Index(["A", "B"], name="a"),
             columns=MultiIndex.from_tuples(
                 [("v", "ca"), ("v", "cb"), ("is_", "ca"), ("is_", "cb")],
                 names=[None, "b"],
diff --git a/pandas/tests/frame/test_unary.py b/pandas/tests/frame/test_unary.py
index 5e29d3c868983..a6f1f87db3010 100644
--- a/pandas/tests/frame/test_unary.py
+++ b/pandas/tests/frame/test_unary.py
@@ -51,6 +51,7 @@ def test_neg_object(self, df, expected):
     def test_neg_raises(self, df):
         msg = (
             "bad operand type for unary -: 'str'|"
+            "has no kernel matching input types|"
             r"bad operand type for unary -: 'DatetimeArray'"
         )
         with pytest.raises(TypeError, match=msg):

From 130eeb3d3300e2e2cc341a917c0321a1c75a0e58 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 4 Oct 2023 21:43:07 +0200
Subject: [PATCH 26/36] Fix more tests

---
 pandas/tests/frame/test_reductions.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index 1fe2b3c8ff9b8..e0765c3fab145 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -6,6 +6,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_pyarrow_string_dtype
+
 from pandas.compat import (
     IS64,
     is_platform_windows,
@@ -386,6 +388,9 @@ def test_mixed_ops(self, op):
             with pytest.raises(TypeError, match=msg):
                 getattr(df, op)()
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="sum doesn't work for arrow strings"
+    )
     def test_reduce_mixed_frame(self):
         # GH 6806
         df = DataFrame(
@@ -452,7 +457,9 @@ def test_mean_mixed_string_decimal(self):
 
         df = DataFrame(d)
 
-        with pytest.raises(TypeError, match="unsupported operand type"):
+        with pytest.raises(
+            TypeError, match="unsupported operand type|does not support"
+        ):
             df.mean()
         result = df[["A", "C"]].mean()
         expected = Series([2.7, 681.6], index=["A", "C"], dtype=object)
@@ -584,7 +591,7 @@ def test_mode_dropna(self, dropna, expected):
                 "A": [12, 12, 19, 11],
                 "B": [10, 10, np.nan, 3],
                 "C": [1, np.nan, np.nan, np.nan],
-                "D": [np.nan, np.nan, "a", np.nan],
+                "D": Series([np.nan, np.nan, "a", np.nan], dtype=object),
                 "E": Categorical([np.nan, np.nan, "a", np.nan]),
                 "F": to_datetime(["NaT", "2000-1-2", "NaT", "NaT"]),
                 "G": to_timedelta(["1 days", "nan", "nan", "nan"]),
@@ -1868,6 +1875,9 @@ def test_sum_timedelta64_skipna_false(using_array_manager, request):
     tm.assert_series_equal(result, expected)
 
 
+@pytest.mark.xfail(
+    using_pyarrow_string_dtype(), reason="sum doesn't work with arrow strings"
+)
 def test_mixed_frame_with_integer_sum():
     # https://github.com/pandas-dev/pandas/issues/34520
     df = DataFrame([["a", 1]], columns=list("ab"))

From 1e7b93e5c3ea7bf957c7308ed79985bb2e400bbe Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 4 Oct 2023 22:51:20 +0200
Subject: [PATCH 27/36] Fix remaining tests

---
 .../frame/constructors/test_from_dict.py      |  5 +++
 .../frame/constructors/test_from_records.py   |  5 +++
 pandas/tests/frame/test_api.py                |  2 +
 pandas/tests/frame/test_arithmetic.py         |  5 +++
 pandas/tests/frame/test_block_internals.py    |  4 +-
 pandas/tests/frame/test_constructors.py       | 39 +++++++++++--------
 pandas/tests/frame/test_logical_ops.py        | 12 ++++--
 pandas/tests/frame/test_repr_info.py          |  3 ++
 pandas/tests/frame/test_unary.py              | 21 +++++++---
 9 files changed, 68 insertions(+), 28 deletions(-)

diff --git a/pandas/tests/frame/constructors/test_from_dict.py b/pandas/tests/frame/constructors/test_from_dict.py
index d78924ff9d046..631709bf713bf 100644
--- a/pandas/tests/frame/constructors/test_from_dict.py
+++ b/pandas/tests/frame/constructors/test_from_dict.py
@@ -3,6 +3,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_pyarrow_string_dtype
+
 from pandas import (
     DataFrame,
     Index,
@@ -42,6 +44,9 @@ def test_constructor_single_row(self):
         )
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.skipif(
+        using_pyarrow_string_dtype(), reason="columns inferring logic broken"
+    )
     def test_constructor_list_of_series(self):
         data = [
             OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]),
diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py
index 7dffa7bb242d5..36549fdd9d7df 100644
--- a/pandas/tests/frame/constructors/test_from_records.py
+++ b/pandas/tests/frame/constructors/test_from_records.py
@@ -6,6 +6,8 @@
 import pytest
 import pytz
 
+from pandas._config import using_pyarrow_string_dtype
+
 from pandas.compat import is_platform_little_endian
 
 from pandas import (
@@ -56,6 +58,9 @@ def test_from_records_with_datetimes(self):
         expected["EXPIRY"] = expected["EXPIRY"].astype("M8[s]")
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.skipif(
+        using_pyarrow_string_dtype(), reason="dtype checking logic doesn't work"
+    )
     def test_from_records_sequencelike(self):
         df = DataFrame(
             {
diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py
index 8fc78629beb0a..0f1f4018a069c 100644
--- a/pandas/tests/frame/test_api.py
+++ b/pandas/tests/frame/test_api.py
@@ -5,6 +5,7 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_pyarrow_string_dtype
 from pandas._config.config import option_context
 
 from pandas.util._test_decorators import async_mark
@@ -114,6 +115,7 @@ def test_not_hashable(self):
         with pytest.raises(TypeError, match=msg):
             hash(empty_frame)
 
+    @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="surrogates not allowed")
     def test_column_name_contains_unicode_surrogate(self):
         # GH 25509
         colname = "\ud83d"
diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
index f27e51cb98ea7..b93d2cd3111d7 100644
--- a/pandas/tests/frame/test_arithmetic.py
+++ b/pandas/tests/frame/test_arithmetic.py
@@ -11,6 +11,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_pyarrow_string_dtype
+
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -236,6 +238,9 @@ def test_timestamp_compare(self, left, right):
             with pytest.raises(TypeError, match=msg):
                 right_f(pd.Timestamp("nat"), df)
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't compare string and int"
+    )
     def test_mixed_comparison(self):
         # GH#13128, GH#22163 != datetime64 vs non-dt64 should be False,
         # not raise TypeError
diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py
index 9e8d92e832d01..b57b639c955e4 100644
--- a/pandas/tests/frame/test_block_internals.py
+++ b/pandas/tests/frame/test_block_internals.py
@@ -176,7 +176,7 @@ def test_constructor_with_convert(self):
         )
         tm.assert_series_equal(result, expected)
 
-    def test_construction_with_mixed(self, float_string_frame):
+    def test_construction_with_mixed(self, float_string_frame, using_infer_string):
         # test construction edge cases with mixed types
 
         # f7u12, this does not work without extensive workaround
@@ -199,7 +199,7 @@ def test_construction_with_mixed(self, float_string_frame):
         expected = Series(
             [np.dtype("float64")] * 4
             + [
-                np.dtype("object"),
+                np.dtype("object") if not using_infer_string else "string",
                 np.dtype("datetime64[us]"),
                 np.dtype("timedelta64[us]"),
             ],
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index 4b41050c86467..b558b303069de 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -21,6 +21,8 @@
 import pytest
 import pytz
 
+from pandas._config import using_pyarrow_string_dtype
+
 from pandas._libs import lib
 from pandas.errors import IntCastingNaNError
 import pandas.util._test_decorators as td
@@ -79,7 +81,7 @@ def test_constructor_from_ndarray_with_str_dtype(self):
         #  with an array of strings each of which is e.g. "[0 1 2]"
         arr = np.arange(12).reshape(4, 3)
         df = DataFrame(arr, dtype=str)
-        expected = DataFrame(arr.astype(str))
+        expected = DataFrame(arr.astype(str), dtype=object)
         tm.assert_frame_equal(df, expected)
 
     def test_constructor_from_2d_datetimearray(self, using_array_manager):
@@ -261,8 +263,9 @@ def test_emptylike_constructor(self, emptylike, expected_index, expected_columns
         result = DataFrame(emptylike)
         tm.assert_frame_equal(result, expected)
 
-    def test_constructor_mixed(self, float_string_frame):
-        assert float_string_frame["foo"].dtype == np.object_
+    def test_constructor_mixed(self, float_string_frame, using_infer_string):
+        dtype = "string" if using_infer_string else np.object_
+        assert float_string_frame["foo"].dtype == dtype
 
     def test_constructor_cast_failure(self):
         # as of 2.0, we raise if we can't respect "dtype", previously we
@@ -318,6 +321,7 @@ def test_constructor_dtype_nocast_view_2d_array(
             assert df2._mgr.arrays[0].flags.c_contiguous
 
     @td.skip_array_manager_invalid_test
+    @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="conversion copies")
     def test_1d_object_array_does_not_copy(self):
         # https://github.com/pandas-dev/pandas/issues/39272
         arr = np.array(["a", "b"], dtype="object")
@@ -325,6 +329,7 @@ def test_1d_object_array_does_not_copy(self):
         assert np.shares_memory(df.values, arr)
 
     @td.skip_array_manager_invalid_test
+    @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="conversion copies")
     def test_2d_object_array_does_not_copy(self):
         # https://github.com/pandas-dev/pandas/issues/39272
         arr = np.array([["a", "b"], ["c", "d"]], dtype="object")
@@ -764,7 +769,7 @@ def test_constructor_dict_block(self):
         )
         tm.assert_numpy_array_equal(df.values, expected)
 
-    def test_constructor_dict_cast(self):
+    def test_constructor_dict_cast(self, using_infer_string):
         # cast float tests
         test_data = {"A": {"1": 1, "2": 2}, "B": {"1": "1", "2": "2", "3": "3"}}
         frame = DataFrame(test_data, dtype=float)
@@ -774,7 +779,7 @@ def test_constructor_dict_cast(self):
 
         frame = DataFrame(test_data)
         assert len(frame) == 3
-        assert frame["B"].dtype == np.object_
+        assert frame["B"].dtype == np.object_ if not using_infer_string else "string"
         assert frame["A"].dtype == np.float64
 
     def test_constructor_dict_cast2(self):
@@ -1186,7 +1191,7 @@ def test_constructor_dtype_nullable_extension_arrays(
         df = DataFrame({"a": data}, dtype=input_dtype)
         assert df["a"].dtype == expected_dtype()
 
-    def test_constructor_scalar_inference(self):
+    def test_constructor_scalar_inference(self, using_infer_string):
         data = {"int": 1, "bool": True, "float": 3.0, "complex": 4j, "object": "foo"}
         df = DataFrame(data, index=np.arange(10))
 
@@ -1194,7 +1199,7 @@ def test_constructor_scalar_inference(self):
         assert df["bool"].dtype == np.bool_
         assert df["float"].dtype == np.float64
         assert df["complex"].dtype == np.complex128
-        assert df["object"].dtype == np.object_
+        assert df["object"].dtype == np.object_ if not using_infer_string else "string"
 
     def test_constructor_arrays_and_scalars(self):
         df = DataFrame({"a": np.random.default_rng(2).standard_normal(10), "b": True})
@@ -1273,11 +1278,11 @@ def empty_gen():
         df = DataFrame(empty_gen(), columns=["A", "B"])
         tm.assert_frame_equal(df, expected)
 
-    def test_constructor_list_of_lists(self):
+    def test_constructor_list_of_lists(self, using_infer_string):
         # GH #484
         df = DataFrame(data=[[1, "a"], [2, "b"]], columns=["num", "str"])
         assert is_integer_dtype(df["num"])
-        assert df["str"].dtype == np.object_
+        assert df["str"].dtype == np.object_ if not using_infer_string else "string"
 
         # GH 4851
         # list of 0-dim ndarrays
@@ -1822,7 +1827,7 @@ def test_constructor_single_value(self):
         with pytest.raises(TypeError, match=msg):
             DataFrame("a", [1, 2], ["a", "c"], float)
 
-    def test_constructor_with_datetimes(self):
+    def test_constructor_with_datetimes(self, using_infer_string):
         intname = np.dtype(np.int_).name
         floatname = np.dtype(np.float64).name
         objectname = np.dtype(np.object_).name
@@ -1841,7 +1846,7 @@ def test_constructor_with_datetimes(self):
         result = df.dtypes
         expected = Series(
             [np.dtype("int64")]
-            + [np.dtype(objectname)] * 2
+            + [np.dtype(objectname) if not using_infer_string else "string"] * 2
             + [np.dtype("M8[s]"), np.dtype("M8[us]")],
             index=list("ABCDE"),
         )
@@ -1863,7 +1868,7 @@ def test_constructor_with_datetimes(self):
         expected = Series(
             [np.dtype("float64")]
             + [np.dtype("int64")]
-            + [np.dtype("object")]
+            + [np.dtype("object") if not using_infer_string else "string"]
             + [np.dtype("float64")]
             + [np.dtype(intname)],
             index=["a", "b", "c", floatname, intname],
@@ -1885,7 +1890,7 @@ def test_constructor_with_datetimes(self):
         expected = Series(
             [np.dtype("float64")]
             + [np.dtype("int64")]
-            + [np.dtype("object")]
+            + [np.dtype("object") if not using_infer_string else "string"]
             + [np.dtype("float64")]
             + [np.dtype(intname)],
             index=["a", "b", "c", floatname, intname],
@@ -1922,13 +1927,13 @@ def test_constructor_with_datetimes3(self):
         df = DataFrame({"End Date": dt}, index=[0])
         assert df.iat[0, 0] == dt
         tm.assert_series_equal(
-            df.dtypes, Series({"End Date": "datetime64[us, US/Eastern]"})
+            df.dtypes, Series({"End Date": "datetime64[us, US/Eastern]"}, dtype=object)
         )
 
         df = DataFrame([{"End Date": dt}])
         assert df.iat[0, 0] == dt
         tm.assert_series_equal(
-            df.dtypes, Series({"End Date": "datetime64[ns, US/Eastern]"})
+            df.dtypes, Series({"End Date": "datetime64[ns, US/Eastern]"}, dtype=object)
         )
 
     def test_constructor_with_datetimes4(self):
@@ -2053,7 +2058,7 @@ def test_constructor_timedelta_non_ns(self, order, unit):
         #  dtype=exp_dtype.
         tm.assert_frame_equal(df, expected)
 
-    def test_constructor_for_list_with_dtypes(self):
+    def test_constructor_for_list_with_dtypes(self, using_infer_string):
         # test list of lists/ndarrays
         df = DataFrame([np.arange(5) for x in range(5)])
         result = df.dtypes
@@ -2104,7 +2109,7 @@ def test_constructor_for_list_with_dtypes(self):
             [
                 np.dtype("int64"),
                 np.dtype("float64"),
-                np.dtype("object"),
+                np.dtype("object") if not using_infer_string else "string",
                 np.dtype("datetime64[ns]"),
                 np.dtype("float64"),
             ],
diff --git a/pandas/tests/frame/test_logical_ops.py b/pandas/tests/frame/test_logical_ops.py
index a15d7d7f93f01..16ca3a202f1e0 100644
--- a/pandas/tests/frame/test_logical_ops.py
+++ b/pandas/tests/frame/test_logical_ops.py
@@ -96,7 +96,7 @@ def test_logical_ops_int_frame(self):
         res_ser = df1a_int["A"] | df1a_bool["A"]
         tm.assert_series_equal(res_ser, df1a_bool["A"])
 
-    def test_logical_ops_invalid(self):
+    def test_logical_ops_invalid(self, using_infer_string):
         # GH#5808
 
         df1 = DataFrame(1.0, index=[1], columns=["A"])
@@ -108,8 +108,14 @@ def test_logical_ops_invalid(self):
         df1 = DataFrame("foo", index=[1], columns=["A"])
         df2 = DataFrame(True, index=[1], columns=["A"])
         msg = re.escape("unsupported operand type(s) for |: 'str' and 'bool'")
-        with pytest.raises(TypeError, match=msg):
-            df1 | df2
+        if using_infer_string:
+            import pyarrow as pa
+
+            with pytest.raises(pa.lib.ArrowNotImplementedError, match="|has no kernel"):
+                df1 | df2
+        else:
+            with pytest.raises(TypeError, match=msg):
+                df1 | df2
 
     def test_logical_operators(self):
         def _check_bin_op(op):
diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py
index 64d516e484991..c0cdf4853c4fe 100644
--- a/pandas/tests/frame/test_repr_info.py
+++ b/pandas/tests/frame/test_repr_info.py
@@ -7,6 +7,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_pyarrow_string_dtype
+
 from pandas import (
     NA,
     Categorical,
@@ -167,6 +169,7 @@ def test_repr_mixed_big(self):
 
         repr(biggie)
 
+    @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="/r in")
     def test_repr(self, float_frame):
         buf = StringIO()
 
diff --git a/pandas/tests/frame/test_unary.py b/pandas/tests/frame/test_unary.py
index a6f1f87db3010..f79534bc08c93 100644
--- a/pandas/tests/frame/test_unary.py
+++ b/pandas/tests/frame/test_unary.py
@@ -48,16 +48,25 @@ def test_neg_object(self, df, expected):
             pd.DataFrame({"a": pd.to_datetime(["2017-01-22", "1970-01-01"])}),
         ],
     )
-    def test_neg_raises(self, df):
+    def test_neg_raises(self, df, using_infer_string):
         msg = (
             "bad operand type for unary -: 'str'|"
-            "has no kernel matching input types|"
             r"bad operand type for unary -: 'DatetimeArray'"
         )
-        with pytest.raises(TypeError, match=msg):
-            (-df)
-        with pytest.raises(TypeError, match=msg):
-            (-df["a"])
+        if using_infer_string:
+            import pyarrow as pa
+
+            msg = "has no kernel"
+            with pytest.raises(pa.lib.ArrowNotImplementedError, match=msg):
+                (-df)
+            with pytest.raises(pa.lib.ArrowNotImplementedError, match=msg):
+                (-df["a"])
+
+        else:
+            with pytest.raises(TypeError, match=msg):
+                (-df)
+            with pytest.raises(TypeError, match=msg):
+                (-df["a"])
 
     def test_invert(self, float_frame):
         df = float_frame

From 6e55ce22836ac40c321cfabc494b20e6c2f5681a Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 4 Oct 2023 22:54:46 +0200
Subject: [PATCH 28/36] Fix remaining tests

---
 pandas/tests/frame/test_unary.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/frame/test_unary.py b/pandas/tests/frame/test_unary.py
index f79534bc08c93..850c92013694f 100644
--- a/pandas/tests/frame/test_unary.py
+++ b/pandas/tests/frame/test_unary.py
@@ -53,7 +53,7 @@ def test_neg_raises(self, df, using_infer_string):
             "bad operand type for unary -: 'str'|"
             r"bad operand type for unary -: 'DatetimeArray'"
         )
-        if using_infer_string:
+        if using_infer_string and df.dtypes.iloc[0] == "string":
             import pyarrow as pa
 
             msg = "has no kernel"

From 70483617f0a4a33cbd77ae0e8ace832d69f9a9d4 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 4 Oct 2023 22:56:19 +0200
Subject: [PATCH 29/36] Change default

---
 pandas/core/config_init.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index c0201e13ed1bf..8c98f9fc25756 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -903,7 +903,7 @@ def register_converter_cb(key) -> None:
 with cf.config_prefix("future"):
     cf.register_option(
         "infer_string",
-        os.environ.get("PANDAS_INFER_STRING", "0") == "0",
+        os.environ.get("PANDAS_INFER_STRING", "0") == "1",
         "Whether to infer sequence of str objects as pyarrow string "
         "dtype, which will be the default in pandas 3.0 "
         "(at which point this option will be deprecated).",

From 0cb459c5763c794c669cc217aec6e07dc9cae97a Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sat, 21 Oct 2023 20:07:18 +0200
Subject: [PATCH 30/36] BUG: Groupby not keeping string dtype for empty objects

---
 doc/source/whatsnew/v2.1.2.rst          |  1 +
 pandas/core/arrays/base.py              |  2 ++
 pandas/core/groupby/ops.py              | 20 +++++++++++++-------
 pandas/tests/groupby/test_reductions.py | 13 +++++++++++++
 4 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.2.rst b/doc/source/whatsnew/v2.1.2.rst
index 97a718dd496e9..0f8ba33160e72 100644
--- a/doc/source/whatsnew/v2.1.2.rst
+++ b/doc/source/whatsnew/v2.1.2.rst
@@ -23,6 +23,7 @@ Fixed regressions
 
 Bug fixes
 ~~~~~~~~~
+- Fixed bug in :meth:`.DataFrameGroupBy.min()` and :meth:`.DataFrameGroupBy.max()` not preserving extension dtype for empty object (:issue:`55619`)
 - Fixed bug in :meth:`Categorical.equals` if other has arrow backed string dtype (:issue:`55364`)
 - Fixed bug in :meth:`DataFrame.__setitem__` not inferring string dtype for zero-dimensional array with ``infer_string=True`` (:issue:`55366`)
 - Fixed bug in :meth:`DataFrame.idxmin` and :meth:`DataFrame.idxmax` raising for arrow dtypes (:issue:`55368`)
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 05e6fc09a5ef6..b48250ca95df6 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -2352,6 +2352,8 @@ def _groupby_op(
         # GH#43682
         if isinstance(self.dtype, StringDtype):
             # StringArray
+            # Fail early to avoid conversion to object
+            op._get_cython_function(op.kind, op.how, np.dtype(object), False)
             npvalues = self.to_numpy(object, na_value=np.nan)
         else:
             raise NotImplementedError(
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 607059e5183ec..e4cba7ce8f1cd 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -33,6 +33,7 @@
 from pandas.errors import AbstractMethodError
 from pandas.util._decorators import cache_readonly
 
+from pandas.core.dtypes.base import ExtensionDtype
 from pandas.core.dtypes.cast import (
     maybe_cast_pointwise_result,
     maybe_downcast_to_dtype,
@@ -837,10 +838,8 @@ def agg_series(
         -------
         np.ndarray or ExtensionArray
         """
-        # test_groupby_empty_with_category gets here with self.ngroups == 0
-        #  and len(obj) > 0
 
-        if len(obj) > 0 and not isinstance(obj._values, np.ndarray):
+        if not isinstance(obj._values, np.ndarray):
             # we can preserve a little bit more aggressively with EA dtype
             #  because maybe_cast_pointwise_result will do a try/except
             #  with _from_sequence.  NB we are assuming here that _from_sequence
@@ -849,11 +848,18 @@ def agg_series(
 
         result = self._aggregate_series_pure_python(obj, func)
 
-        npvalues = lib.maybe_convert_objects(result, try_float=False)
-        if preserve_dtype:
-            out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True)
+        if len(obj) == 0 and len(result) == 0 and isinstance(obj.dtype, ExtensionDtype):
+            cls = obj.dtype.construct_array_type()
+            out = cls._from_sequence(result)
+
         else:
-            out = npvalues
+            npvalues = lib.maybe_convert_objects(result, try_float=False)
+            if preserve_dtype:
+                out = maybe_cast_pointwise_result(
+                    npvalues, obj.dtype, numeric_only=True
+                )
+            else:
+                out = npvalues
         return out
 
     @final
diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
index fdfb211ac2269..35ad8e3f5dc61 100644
--- a/pandas/tests/groupby/test_reductions.py
+++ b/pandas/tests/groupby/test_reductions.py
@@ -575,6 +575,19 @@ def test_groupby_min_max_categorical(func):
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.parametrize("func", ["min", "max"])
+def test_min_empty_string_dtype(func):
+    # GH#55619
+    pytest.importorskip("pyarrow")
+    dtype = "string[pyarrow_numpy]"
+    df = DataFrame({"a": ["a"], "b": "a", "c": "a"}, dtype=dtype).iloc[:0]
+    result = getattr(df.groupby("a"), func)()
+    expected = DataFrame(
+        columns=["b", "c"], dtype=dtype, index=pd.Index([], dtype=dtype, name="a")
+    )
+    tm.assert_frame_equal(result, expected)
+
+
 def test_max_nan_bug():
     raw = """,Date,app,File
 -04-23,2013-04-23 00:00:00,,log080001.log

From aff4f170a465d376c991c0048ad69b075bf0f09a Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sat, 21 Oct 2023 20:36:01 +0200
Subject: [PATCH 31/36] Start fixing gb tests

---
 pandas/core/arrays/base.py               |  2 ++
 pandas/core/config_init.py               |  2 +-
 pandas/tests/groupby/test_apply.py       | 27 +++++++++++++-----------
 pandas/tests/groupby/test_categorical.py |  5 +++--
 pandas/tests/groupby/test_function.py    | 18 ++++++++++++----
 pandas/tests/groupby/test_groupby.py     | 25 ++++++++++++++--------
 6 files changed, 51 insertions(+), 28 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 177c105688d0c..254b38cd59f13 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -2300,6 +2300,8 @@ def _groupby_op(
         # GH#43682
         if isinstance(self.dtype, StringDtype):
             # StringArray
+            # Fail early to avoid conversion to object
+            op._get_cython_function(op.kind, op.how, np.dtype(object), False)
             npvalues = self.to_numpy(object, na_value=np.nan)
         else:
             raise NotImplementedError(
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index 8c98f9fc25756..06979f90a571f 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -903,7 +903,7 @@ def register_converter_cb(key) -> None:
 with cf.config_prefix("future"):
     cf.register_option(
         "infer_string",
-        os.environ.get("PANDAS_INFER_STRING", "0") == "1",
+        os.environ.get("PANDAS_INFER_STRING", "1") == "1",
         "Whether to infer sequence of str objects as pyarrow string "
         "dtype, which will be the default in pandas 3.0 "
         "(at which point this option will be deprecated).",
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index abcb9f68e0f5c..b3dbe9f778573 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -38,7 +38,7 @@ def store(group):
     tm.assert_frame_equal(groups[0], expected_value)
 
 
-def test_apply_issues():
+def test_apply_issues(using_infer_string):
     # GH 5788
 
     s = """2011.05.16,00:00,1.40893
@@ -69,8 +69,9 @@ def test_apply_issues():
     # GH 5789
     # don't auto coerce dates
     df = pd.read_csv(StringIO(s), header=None, names=["date", "time", "value"])
+    dtype = "string[pyarrow_numpy]" if using_infer_string else object
     exp_idx = Index(
-        ["2011.05.16", "2011.05.17", "2011.05.18"], dtype=object, name="date"
+        ["2011.05.16", "2011.05.17", "2011.05.18"], dtype=dtype, name="date"
     )
     expected = Series(["00:00", "02:00", "02:00"], index=exp_idx)
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
@@ -81,14 +82,15 @@ def test_apply_issues():
     tm.assert_series_equal(result, expected)
 
 
-def test_apply_trivial():
+def test_apply_trivial(using_infer_string):
     # GH 20066
     # trivial apply: ignore input and return a constant dataframe.
     df = DataFrame(
         {"key": ["a", "a", "b", "b", "a"], "data": [1.0, 2.0, 3.0, 4.0, 5.0]},
         columns=["key", "data"],
     )
-    expected = pd.concat([df.iloc[1:], df.iloc[1:]], axis=1, keys=["float64", "object"])
+    dtype = "string" if using_infer_string else "object"
+    expected = pd.concat([df.iloc[1:], df.iloc[1:]], axis=1, keys=["float64", dtype])
 
     msg = "DataFrame.groupby with axis=1 is deprecated"
     with tm.assert_produces_warning(FutureWarning, match=msg):
@@ -98,13 +100,14 @@ def test_apply_trivial():
     tm.assert_frame_equal(result, expected)
 
 
-def test_apply_trivial_fail():
+def test_apply_trivial_fail(using_infer_string):
     # GH 20066
     df = DataFrame(
         {"key": ["a", "a", "b", "b", "a"], "data": [1.0, 2.0, 3.0, 4.0, 5.0]},
         columns=["key", "data"],
     )
-    expected = pd.concat([df, df], axis=1, keys=["float64", "object"])
+    dtype = "string" if using_infer_string else "object"
+    expected = pd.concat([df, df], axis=1, keys=["float64", dtype])
     msg = "DataFrame.groupby with axis=1 is deprecated"
     with tm.assert_produces_warning(FutureWarning, match=msg):
         gb = df.groupby([str(x) for x in df.dtypes], axis=1, group_keys=True)
@@ -901,7 +904,7 @@ def test_func_returns_object():
     "group_column_dtlike",
     [datetime.today(), datetime.today().date(), datetime.today().time()],
 )
-def test_apply_datetime_issue(group_column_dtlike):
+def test_apply_datetime_issue(group_column_dtlike, using_infer_string):
     # GH-28247
     # groupby-apply throws an error if one of the columns in the DataFrame
     #   is a datetime object and the column labels are different from
@@ -912,9 +915,8 @@ def test_apply_datetime_issue(group_column_dtlike):
     with tm.assert_produces_warning(FutureWarning, match=msg):
         result = df.groupby("a").apply(lambda x: Series(["spam"], index=[42]))
 
-    expected = DataFrame(
-        ["spam"], Index(["foo"], dtype="object", name="a"), columns=[42]
-    )
+    dtype = "string" if using_infer_string else "object"
+    expected = DataFrame(["spam"], Index(["foo"], dtype=dtype, name="a"), columns=[42])
     tm.assert_frame_equal(result, expected)
 
 
@@ -981,7 +983,7 @@ def test_apply_multi_level_name(category):
     assert df.index.names == ["A", "B"]
 
 
-def test_groupby_apply_datetime_result_dtypes():
+def test_groupby_apply_datetime_result_dtypes(using_infer_string):
     # GH 14849
     data = DataFrame.from_records(
         [
@@ -995,8 +997,9 @@ def test_groupby_apply_datetime_result_dtypes():
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
     with tm.assert_produces_warning(FutureWarning, match=msg):
         result = data.groupby("color").apply(lambda g: g.iloc[0]).dtypes
+    dtype = "string" if using_infer_string else object
     expected = Series(
-        [np.dtype("datetime64[ns]"), object, object, np.int64, object],
+        [np.dtype("datetime64[ns]"), dtype, dtype, np.int64, dtype],
         index=["observation", "color", "mood", "intensity", "score"],
     )
     tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index b11240c841420..51e585c06d5b1 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -82,7 +82,7 @@ def get_stats(group):
     assert result.index.names[0] == "C"
 
 
-def test_basic():  # TODO: split this test
+def test_basic(using_infer_string):  # TODO: split this test
     cats = Categorical(
         ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
         categories=["a", "b", "c", "d"],
@@ -129,7 +129,8 @@ def f(x):
         result = g.apply(f)
     expected = x.iloc[[0, 1]].copy()
     expected.index = Index([1, 2], name="person_id")
-    expected["person_name"] = expected["person_name"].astype("object")
+    dtype = "string[pyarrow_numpy]" if using_infer_string else object
+    expected["person_name"] = expected["person_name"].astype(dtype)
     tm.assert_frame_equal(result, expected)
 
     # GH 9921
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index a92880c87b847..ac15e0489d45a 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -275,6 +275,7 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
                 [
                     "category type does not support sum operations",
                     re.escape(f"agg function failed [how->{method},dtype->object]"),
+                    re.escape(f"agg function failed [how->{method},dtype->string]"),
                 ]
             )
             with pytest.raises(exception, match=msg):
@@ -291,6 +292,7 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
                     "function is not implemented for this dtype",
                     f"Cannot perform {method} with non-ordered Categorical",
                     re.escape(f"agg function failed [how->{method},dtype->object]"),
+                    re.escape(f"agg function failed [how->{method},dtype->string]"),
                 ]
             )
             with pytest.raises(exception, match=msg):
@@ -545,7 +547,7 @@ def test_idxmin_idxmax_axis1():
 
 
 @pytest.mark.parametrize("numeric_only", [True, False, None])
-def test_axis1_numeric_only(request, groupby_func, numeric_only):
+def test_axis1_numeric_only(request, groupby_func, numeric_only, using_infer_string):
     if groupby_func in ("idxmax", "idxmin"):
         pytest.skip("idxmax and idx_min tested in test_idxmin_idxmax_axis1")
     if groupby_func in ("corrwith", "skew"):
@@ -607,8 +609,15 @@ def test_axis1_numeric_only(request, groupby_func, numeric_only):
             "can't multiply sequence by non-int of type 'float'",
             # cumsum, diff, pct_change
             "unsupported operand type",
+            "has no kernel",
         )
-        with pytest.raises(TypeError, match=f"({'|'.join(msgs)})"):
+        if using_infer_string:
+            import pyarrow as pa
+
+            errs = (TypeError, pa.lib.ArrowNotImplementedError)
+        else:
+            errs = TypeError
+        with pytest.raises(errs, match=f"({'|'.join(msgs)})"):
             with tm.assert_produces_warning(FutureWarning, match=warn_msg):
                 method(*args, **kwargs)
     else:
@@ -1657,7 +1666,7 @@ def test_deprecate_numeric_only_series(dtype, groupby_func, request):
         {"percentiles": [0.10, 0.20, 0.30], "include": ["int"], "exclude": None},
     ],
 )
-def test_groupby_empty_dataset(dtype, kwargs):
+def test_groupby_empty_dataset(dtype, kwargs, using_infer_string):
     # GH#41575
     df = DataFrame([[1, 2, 3]], columns=["A", "B", "C"], dtype=dtype)
     df["B"] = df["B"].astype(int)
@@ -1669,7 +1678,8 @@ def test_groupby_empty_dataset(dtype, kwargs):
 
     result = df.iloc[:0].groupby("A").B.describe(**kwargs)
     expected = df.groupby("A").B.describe(**kwargs).reset_index(drop=True).iloc[:0]
-    expected.index = Index([])
+    dtype = "string[pyarrow_numpy]" if using_infer_string else None
+    expected.index = Index([], dtype=dtype)
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 4ca8b0e317bd2..ced688c951d8d 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -10,6 +10,8 @@
     SpecificationError,
 )
 
+from pandas.core.dtypes.common import is_string_dtype
+
 import pandas as pd
 from pandas import (
     Categorical,
@@ -672,7 +674,7 @@ def test_frame_multi_key_function_list_partial_failure():
 
     grouped = data.groupby(["A", "B"])
     funcs = ["mean", "std"]
-    msg = re.escape("agg function failed [how->mean,dtype->object]")
+    msg = re.escape("agg function failed [how->mean,dtype->")
     with pytest.raises(TypeError, match=msg):
         grouped.agg(funcs)
 
@@ -961,7 +963,7 @@ def test_groupby_multi_corner(df):
 
 def test_raises_on_nuisance(df):
     grouped = df.groupby("A")
-    msg = re.escape("agg function failed [how->mean,dtype->object]")
+    msg = re.escape("agg function failed [how->mean,dtype->")
     with pytest.raises(TypeError, match=msg):
         grouped.agg("mean")
     with pytest.raises(TypeError, match=msg):
@@ -1017,7 +1019,7 @@ def test_omit_nuisance_agg(df, agg_function, numeric_only):
             msg = "could not convert string to float: 'one'"
         else:
             klass = TypeError
-            msg = re.escape(f"agg function failed [how->{agg_function},dtype->object]")
+            msg = re.escape(f"agg function failed [how->{agg_function},dtype->")
         with pytest.raises(klass, match=msg):
             getattr(grouped, agg_function)(numeric_only=numeric_only)
     else:
@@ -1042,7 +1044,7 @@ def test_raise_on_nuisance_python_single(df):
 
 def test_raise_on_nuisance_python_multiple(three_group):
     grouped = three_group.groupby(["A", "B"])
-    msg = re.escape("agg function failed [how->mean,dtype->object]")
+    msg = re.escape("agg function failed [how->mean,dtype->")
     with pytest.raises(TypeError, match=msg):
         grouped.agg("mean")
     with pytest.raises(TypeError, match=msg):
@@ -1085,7 +1087,7 @@ def test_wrap_aggregated_output_multindex(mframe):
     df["baz", "two"] = "peekaboo"
 
     keys = [np.array([0, 0, 1]), np.array([0, 0, 1])]
-    msg = re.escape("agg function failed [how->mean,dtype->object]")
+    msg = re.escape("agg function failed [how->mean,dtype->")
     with pytest.raises(TypeError, match=msg):
         df.groupby(keys).agg("mean")
     agged = df.drop(columns=("baz", "two")).groupby(keys).agg("mean")
@@ -1174,7 +1176,7 @@ def test_groupby_complex():
     tm.assert_series_equal(result, expected)
 
 
-def test_groupby_complex_numbers():
+def test_groupby_complex_numbers(using_infer_string):
     # GH 17927
     df = DataFrame(
         [
@@ -1183,10 +1185,11 @@ def test_groupby_complex_numbers():
             {"a": 4, "b": 1},
         ]
     )
+    dtype = "string[pyarrow_numpy]" if using_infer_string else object
     expected = DataFrame(
         np.array([1, 1, 1], dtype=np.int64),
         index=Index([(1 + 1j), (1 + 2j), (1 + 0j)], name="b"),
-        columns=Index(["a"], dtype="object"),
+        columns=Index(["a"], dtype=dtype),
     )
     result = df.groupby("b", sort=False).count()
     tm.assert_frame_equal(result, expected)
@@ -1697,14 +1700,18 @@ def g(group):
 
 
 @pytest.mark.parametrize("grouper", ["A", ["A", "B"]])
-def test_set_group_name(df, grouper):
+def test_set_group_name(df, grouper, using_infer_string):
     def f(group):
         assert group.name is not None
         return group
 
     def freduce(group):
         assert group.name is not None
-        return group.sum()
+        if using_infer_string and grouper == "A" and is_string_dtype(group.dtype):
+            with pytest.raises(TypeError, match="does not support"):
+                group.sum()
+        else:
+            return group.sum()
 
     def freducex(x):
         return freduce(x)

From 620555efa85e1952bb08c350dc5192c5eb0d6a9e Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sat, 21 Oct 2023 21:25:15 +0200
Subject: [PATCH 32/36] Fix tests

---
 pandas/tests/groupby/test_groupby.py | 10 ++++++++--
 pandas/tests/groupby/test_raises.py  |  2 +-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 6e8f2e0391c7d..17e8bdff942ca 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -2008,7 +2008,9 @@ def test_pivot_table_values_key_error():
 @pytest.mark.parametrize(
     "op", ["idxmax", "idxmin", "min", "max", "sum", "prod", "skew"]
 )
-def test_empty_groupby(columns, keys, values, method, op, using_array_manager, dropna):
+def test_empty_groupby(
+    columns, keys, values, method, op, using_array_manager, dropna, using_infer_string
+):
     # GH8093 & GH26411
     override_dtype = None
 
@@ -2049,7 +2051,11 @@ def get_categorical_invalid_expected():
             # Categorical is special without 'observed=True'
             idx = Index(lev, name=keys[0])
 
-        expected = DataFrame([], columns=[], index=idx)
+        if using_infer_string:
+            columns = Index([], dtype="string[pyarrow_numpy]")
+        else:
+            columns = []
+        expected = DataFrame([], columns=columns, index=idx)
         return expected
 
     is_per = isinstance(df.dtypes.iloc[0], pd.PeriodDtype)
diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py
index 46bf324fad1d7..613ecd30cb158 100644
--- a/pandas/tests/groupby/test_raises.py
+++ b/pandas/tests/groupby/test_raises.py
@@ -189,7 +189,7 @@ def test_groupby_raises_string(
         "sum": (None, ""),
         "var": (
             TypeError,
-            re.escape("agg function failed [how->var,dtype->object]"),
+            re.escape("agg function failed [how->var,dtype->"),
         ),
     }[groupby_func]
 

From 05f1c958f242c17aebac40dd53145938b58fb93b Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Fri, 8 Dec 2023 22:39:39 +0100
Subject: [PATCH 33/36] Merge main

---
 pandas/core/arrays/string_arrow.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index e079580302b7e..2a10e87981bc3 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -535,17 +535,6 @@ def _rank(
             )
         )
 
-    def _reduce(
-        self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs
-    ):
-        result = self._reduce_calc(name, skipna=skipna, keepdims=keepdims, **kwargs)
-        if name in ("argmin", "argmax") and isinstance(result, pa.Array):
-            return self._convert_int_dtype(result)
-        elif isinstance(result, pa.Array):
-            return type(self)(result)
-        else:
-            return result
-
 
 class ArrowStringArrayNumpySemantics(ArrowStringArray):
     _storage = "pyarrow_numpy"

From c7ba71774ffa96db92ddc04b46a59ea1b70435c9 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Fri, 8 Dec 2023 23:30:13 +0100
Subject: [PATCH 34/36] Update config_init.py

---
 pandas/core/config_init.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index 112f889d99b06..a8b63f97141c2 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -905,7 +905,7 @@ def register_converter_cb(key) -> None:
 with cf.config_prefix("future"):
     cf.register_option(
         "infer_string",
-        os.environ.get("PANDAS_INFER_STRING", "1") == "1",
+        False,
         "Whether to infer sequence of str objects as pyarrow string "
         "dtype, which will be the default in pandas 3.0 "
         "(at which point this option will be deprecated).",

From 15fb683d6f2ff0b92fed2bbbf0350c87710e4a73 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sat, 9 Dec 2023 00:11:15 +0100
Subject: [PATCH 35/36] Fixup

---
 pandas/tests/frame/test_reductions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index cb26977e140e2..66145c32c18d7 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -1376,7 +1376,7 @@ def test_any_all_object_dtype(
         )
         if using_infer_string:
             # na in object is True while in string pyarrow numpy it's false
-            val = False if axis == 0 and not skipna and bool_agg_func == "all" else True
+            val = not axis == 0 and not skipna and bool_agg_func == "all"
         else:
             val = True
         result = getattr(df, bool_agg_func)(axis=axis, skipna=skipna)

From fcc8245fbe6a8c7540e2f2accfe1a86f427cc9cf Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sat, 9 Dec 2023 00:12:56 +0100
Subject: [PATCH 36/36] Update

---
 pandas/tests/frame/methods/test_dtypes.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/frame/methods/test_dtypes.py b/pandas/tests/frame/methods/test_dtypes.py
index 00ced2978d85b..ab632ac17318e 100644
--- a/pandas/tests/frame/methods/test_dtypes.py
+++ b/pandas/tests/frame/methods/test_dtypes.py
@@ -142,9 +142,12 @@ def test_dtypes_timedeltas(self):
         )
         tm.assert_series_equal(result, expected)
 
-    def test_frame_apply_np_array_return_type(self):
+    def test_frame_apply_np_array_return_type(self, using_infer_string):
         # GH 35517
         df = DataFrame([["foo"]])
         result = df.apply(lambda col: np.array("bar"))
-        expected = Series([np.array(["bar"])])
+        if using_infer_string:
+            expected = Series([np.array(["bar"])])
+        else:
+            expected = Series(["bar"])
         tm.assert_series_equal(result, expected)