From ff6e271961434063b277e723e918cc7f3cbc5dea Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sat, 4 Nov 2023 22:47:43 -0400
Subject: [PATCH 01/13] BUG: Index.getitem returning wrong result with negative
 step for arrow

---
 doc/source/whatsnew/v2.1.3.rst               |  2 +-
 pandas/core/arrays/arrow/array.py            |  7 +++++++
 pandas/tests/indexes/object/test_indexing.py | 14 +++++++++++---
 3 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.3.rst b/doc/source/whatsnew/v2.1.3.rst
index 3b1cd1c152baa..f4c32b6ecc056 100644
--- a/doc/source/whatsnew/v2.1.3.rst
+++ b/doc/source/whatsnew/v2.1.3.rst
@@ -22,7 +22,7 @@ Fixed regressions
 Bug fixes
 ~~~~~~~~~
 - Bug in :meth:`DatetimeIndex.diff` raising ``TypeError`` (:issue:`55080`)
--
+- Bug in :meth:`Index.__getitem__` returning wrong result for Arrow dtypes and negative stepsize (:issue:`55832`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_213.other:
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 4bcc03643dac8..43927f554a875 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -553,6 +553,13 @@ def __getitem__(self, item: PositionalIndexer):
             )
         # We are not an array indexer, so maybe e.g. a slice or integer
         # indexer. We dispatch to pyarrow.
+        if isinstance(item, slice):
+            if item.start == item.stop:
+                pass
+            elif item.start == -len(self) - 1:
+                item = slice(None, item.stop, item.step)
+            elif item.stop == -len(self) - 1:
+                item = slice(item.start, None, item.step)
         value = self._pa_array[item]
         if isinstance(value, pa.ChunkedArray):
             return type(self)(value)
diff --git a/pandas/tests/indexes/object/test_indexing.py b/pandas/tests/indexes/object/test_indexing.py
index 87d3afc77d556..7a6e93a4605d7 100644
--- a/pandas/tests/indexes/object/test_indexing.py
+++ b/pandas/tests/indexes/object/test_indexing.py
@@ -4,6 +4,7 @@
 import pytest
 
 from pandas._libs.missing import is_matching_na
+import pandas.util._test_decorators as td
 
 import pandas as pd
 from pandas import Index
@@ -144,6 +145,13 @@ def test_get_indexer_non_unique_np_nats(self, np_nat_fixture, np_nat_fixture2):
 
 
 class TestSliceLocs:
+    @pytest.mark.parametrize(
+        "dtype",
+        [
+            "object",
+            pytest.param("string[pyarrow_numpy]", marks=td.skip_if_no("pyarrow")),
+        ],
+    )
     @pytest.mark.parametrize(
         "in_slice,expected",
         [
@@ -167,12 +175,12 @@ class TestSliceLocs:
             (pd.IndexSlice["m":"m":-1], ""),  # type: ignore[misc]
         ],
     )
-    def test_slice_locs_negative_step(self, in_slice, expected):
-        index = Index(list("bcdxy"))
+    def test_slice_locs_negative_step(self, in_slice, expected, dtype):
+        index = Index(list("bcdxy"), dtype=dtype)
 
         s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step)
         result = index[s_start : s_stop : in_slice.step]
-        expected = Index(list(expected))
+        expected = Index(list(expected), dtype=dtype)
         tm.assert_index_equal(result, expected)
 
     def test_slice_locs_dup(self):

From 7684911a70609559f7efcb84c73662ba0579a169 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sun, 5 Nov 2023 09:18:25 -0500
Subject: [PATCH 02/13] Update

---
 pandas/core/arrays/arrow/array.py            |  5 +++--
 pandas/tests/indexes/object/test_indexing.py | 11 +++++++++++
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 43927f554a875..e073b8c20acbe 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -556,10 +556,11 @@ def __getitem__(self, item: PositionalIndexer):
         if isinstance(item, slice):
             if item.start == item.stop:
                 pass
-            elif item.start == -len(self) - 1:
+            elif item.start <= -len(self) - 1:
                 item = slice(None, item.stop, item.step)
-            elif item.stop == -len(self) - 1:
+            elif item.stop <= -len(self) - 1:
                 item = slice(item.start, None, item.step)
+
         value = self._pa_array[item]
         if isinstance(value, pa.ChunkedArray):
             return type(self)(value)
diff --git a/pandas/tests/indexes/object/test_indexing.py b/pandas/tests/indexes/object/test_indexing.py
index 7a6e93a4605d7..93d46ebdd0b51 100644
--- a/pandas/tests/indexes/object/test_indexing.py
+++ b/pandas/tests/indexes/object/test_indexing.py
@@ -183,6 +183,17 @@ def test_slice_locs_negative_step(self, in_slice, expected, dtype):
         expected = Index(list(expected), dtype=dtype)
         tm.assert_index_equal(result, expected)
 
+    @td.skip_if_no("pyarrow")
+    def test_slice_locs_negative_step_oob(self):
+        index = Index(list("bcdxy"), dtype="string[pyarrow_numpy]")
+
+        result = index[-10:5:1]
+        tm.assert_index_equal(result, index)
+
+        result = index[4:-10:-1]
+        expected = Index(list("yxdcb"), dtype="string[pyarrow_numpy]")
+        tm.assert_index_equal(result, expected)
+
     def test_slice_locs_dup(self):
         index = Index(["a", "a", "b", "c", "d", "d"])
         assert index.slice_locs("a", "d") == (0, 6)

From 7474cb2b70e548a6000dd797c8f5c524b08624cb Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sun, 5 Nov 2023 09:21:00 -0500
Subject: [PATCH 03/13] Update

---
 pandas/core/arrays/arrow/array.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index e073b8c20acbe..87d355ef79142 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -556,9 +556,9 @@ def __getitem__(self, item: PositionalIndexer):
         if isinstance(item, slice):
             if item.start == item.stop:
                 pass
-            elif item.start <= -len(self) - 1:
+            elif item.start < -len(self):
                 item = slice(None, item.stop, item.step)
-            elif item.stop <= -len(self) - 1:
+            elif item.stop < -len(self):
                 item = slice(item.start, None, item.step)
 
         value = self._pa_array[item]

From b27b0f82b85e20d0c3ae724acbece252999a8b26 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sun, 5 Nov 2023 09:26:59 -0500
Subject: [PATCH 04/13] Fix

---
 pandas/core/arrays/arrow/array.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 87d355ef79142..9a6b91f21d90f 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -556,9 +556,9 @@ def __getitem__(self, item: PositionalIndexer):
         if isinstance(item, slice):
             if item.start == item.stop:
                 pass
-            elif item.start < -len(self):
+            elif item.start is not None and item.start < -len(self):
                 item = slice(None, item.stop, item.step)
-            elif item.stop < -len(self):
+            elif item.stop is not None and item.stop < -len(self):
                 item = slice(item.start, None, item.step)
 
         value = self._pa_array[item]

From 659577e6c43dfe5125b865f707ec537f66b89cd3 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sun, 5 Nov 2023 10:13:53 -0500
Subject: [PATCH 05/13] Update array.py

---
 pandas/core/arrays/arrow/array.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 9a6b91f21d90f..7e5452a21af9a 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -556,8 +556,6 @@ def __getitem__(self, item: PositionalIndexer):
         if isinstance(item, slice):
             if item.start == item.stop:
                 pass
-            elif item.start is not None and item.start < -len(self):
-                item = slice(None, item.stop, item.step)
             elif item.stop is not None and item.stop < -len(self):
                 item = slice(item.start, None, item.step)
 

From 86fe4f129a648fa2f765e9f3587d8829eedbc7ff Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Fri, 17 Nov 2023 00:35:46 +0100
Subject: [PATCH 06/13] Fix

---
 pandas/core/arrays/arrow/array.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 7e5452a21af9a..820a3856d48fc 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -556,7 +556,12 @@ def __getitem__(self, item: PositionalIndexer):
         if isinstance(item, slice):
             if item.start == item.stop:
                 pass
-            elif item.stop is not None and item.stop < -len(self):
+            elif (
+                item.stop is not None
+                and item.stop < -len(self)
+                and item.step is not None
+                and item.step < 0
+            ):
                 item = slice(item.start, None, item.step)
 
         value = self._pa_array[item]

From 242552937fe6d8e12c2af90e2d55b91a9d9bdb5c Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Fri, 17 Nov 2023 19:35:18 +0100
Subject: [PATCH 07/13] Add gh ref

---
 pandas/core/arrays/arrow/array.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 820a3856d48fc..d162b66e5d369 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -554,6 +554,7 @@ def __getitem__(self, item: PositionalIndexer):
         # We are not an array indexer, so maybe e.g. a slice or integer
         # indexer. We dispatch to pyarrow.
         if isinstance(item, slice):
+            # Arrow bug https://github.com/apache/arrow/issues/38768
             if item.start == item.stop:
                 pass
             elif (

From 06b4f8909fef759bdda8877602b40f4be3b9925e Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Tue, 21 Nov 2023 21:17:28 +0100
Subject: [PATCH 08/13] Update

---
 doc/source/whatsnew/v2.1.3.rst | 1 -
 doc/source/whatsnew/v2.1.4.rst | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v2.1.3.rst b/doc/source/whatsnew/v2.1.3.rst
index a7f20aa67685f..af626895a9e0e 100644
--- a/doc/source/whatsnew/v2.1.3.rst
+++ b/doc/source/whatsnew/v2.1.3.rst
@@ -21,7 +21,6 @@ Fixed regressions
 Bug fixes
 ~~~~~~~~~
 - Bug in :meth:`DatetimeIndex.diff` raising ``TypeError`` (:issue:`55080`)
-- Bug in :meth:`Index.__getitem__` returning wrong result for Arrow dtypes and negative stepsize (:issue:`55832`)
 - Bug in :meth:`Index.isin` raising for Arrow backed string and ``None`` value (:issue:`55821`)
 - Fix :func:`read_parquet` and :func:`read_feather` for `CVE-2023-47248 <https://www.cve.org/CVERecord?id=CVE-2023-47248>`__ (:issue:`55894`)
 
diff --git a/doc/source/whatsnew/v2.1.4.rst b/doc/source/whatsnew/v2.1.4.rst
index 25afcbb3bb532..e52c42dd31211 100644
--- a/doc/source/whatsnew/v2.1.4.rst
+++ b/doc/source/whatsnew/v2.1.4.rst
@@ -22,6 +22,7 @@ Fixed regressions
 Bug fixes
 ~~~~~~~~~
 - Bug in :class:`Series` constructor raising DeprecationWarning when ``index`` is a list of :class:`Series` (:issue:`55228`)
+- Bug in :meth:`Index.__getitem__` returning wrong result for Arrow dtypes and negative stepsize (:issue:`55832`)
 -
 
 .. ---------------------------------------------------------------------------

From a015f483d7a88af30b447e11c6e7c2ebc8ce290c Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Wed, 22 Nov 2023 00:19:47 +0100
Subject: [PATCH 09/13] Fix string option tests in indexing

---
 pandas/core/config_init.py                    |  2 +-
 pandas/tests/indexing/multiindex/test_loc.py  | 10 ++--
 pandas/tests/indexing/test_at.py              |  7 ++-
 pandas/tests/indexing/test_categorical.py     |  2 +-
 .../indexing/test_chaining_and_caching.py     |  4 +-
 pandas/tests/indexing/test_coercion.py        | 26 ++++++----
 pandas/tests/indexing/test_iloc.py            | 18 +++++--
 pandas/tests/indexing/test_indexing.py        | 48 ++++++++++++------
 pandas/tests/indexing/test_loc.py             | 49 +++++++++++++------
 pandas/tests/indexing/test_partial.py         | 40 ++++++++++-----
 10 files changed, 145 insertions(+), 61 deletions(-)

diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index a8b63f97141c2..bdbab78a443de 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -905,7 +905,7 @@ def register_converter_cb(key) -> None:
 with cf.config_prefix("future"):
     cf.register_option(
         "infer_string",
-        False,
+        True,
         "Whether to infer sequence of str objects as pyarrow string "
         "dtype, which will be the default in pandas 3.0 "
         "(at which point this option will be deprecated).",
diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py
index 873c4e3e60f4c..5508153322adb 100644
--- a/pandas/tests/indexing/multiindex/test_loc.py
+++ b/pandas/tests/indexing/multiindex/test_loc.py
@@ -566,7 +566,7 @@ def test_loc_setitem_single_column_slice():
     tm.assert_frame_equal(df, expected)
 
 
-def test_loc_nan_multiindex():
+def test_loc_nan_multiindex(using_infer_string):
     # GH 5286
     tups = [
         ("Good Things", "C", np.nan),
@@ -586,8 +586,12 @@ def test_loc_nan_multiindex():
     result = df.loc["Good Things"].loc["C"]
     expected = DataFrame(
         np.ones((1, 4)),
-        index=Index([np.nan], dtype="object", name="u3"),
-        columns=Index(["d1", "d2", "d3", "d4"], dtype="object"),
+        index=Index(
+            [np.nan],
+            dtype="object" if not using_infer_string else "string[pyarrow_numpy]",
+            name="u3",
+        ),
+        columns=Index(["d1", "d2", "d3", "d4"]),
     )
     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/indexing/test_at.py b/pandas/tests/indexing/test_at.py
index 7504c984794e8..d78694018749c 100644
--- a/pandas/tests/indexing/test_at.py
+++ b/pandas/tests/indexing/test_at.py
@@ -13,6 +13,7 @@
     CategoricalIndex,
     DataFrame,
     DatetimeIndex,
+    Index,
     MultiIndex,
     Series,
     Timestamp,
@@ -70,7 +71,11 @@ def test_at_setitem_item_cache_cleared(self):
         df.at[0, "x"] = 4
         df.at[0, "cost"] = 789
 
-        expected = DataFrame({"x": [4], "cost": 789}, index=[0])
+        expected = DataFrame(
+            {"x": [4], "cost": 789},
+            index=[0],
+            columns=Index(["x", "cost"], dtype=object),
+        )
         tm.assert_frame_equal(df, expected)
 
         # And in particular, check that the _item_cache has updated correctly.
diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py
index 0432c8856e5c5..6f0ef0b357269 100644
--- a/pandas/tests/indexing/test_categorical.py
+++ b/pandas/tests/indexing/test_categorical.py
@@ -273,7 +273,7 @@ def test_slicing_doc_examples(self):
         tm.assert_frame_equal(result, expected)
 
         result = df.iloc[2:4, :].dtypes
-        expected = Series(["category", "int64"], ["cats", "values"])
+        expected = Series(["category", "int64"], ["cats", "values"], dtype=object)
         tm.assert_series_equal(result, expected)
 
         result = df.loc["h":"j", "cats"]
diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py
index 21aab6652a300..bf0975a803dce 100644
--- a/pandas/tests/indexing/test_chaining_and_caching.py
+++ b/pandas/tests/indexing/test_chaining_and_caching.py
@@ -339,7 +339,9 @@ def test_detect_chained_assignment_object_dtype(
         self, using_array_manager, using_copy_on_write, warn_copy_on_write
     ):
         expected = DataFrame({"A": [111, "bbb", "ccc"], "B": [1, 2, 3]})
-        df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]})
+        df = DataFrame(
+            {"A": Series(["aaa", "bbb", "ccc"], dtype=object), "B": [1, 2, 3]}
+        )
         df_original = df.copy()
 
         if not using_copy_on_write and not warn_copy_on_write:
diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py
index c743166030048..95db295f8fc2c 100644
--- a/pandas/tests/indexing/test_coercion.py
+++ b/pandas/tests/indexing/test_coercion.py
@@ -9,6 +9,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_pyarrow_string_dtype
+
 from pandas.compat import (
     IS64,
     is_platform_windows,
@@ -111,7 +113,7 @@ def _assert_setitem_index_conversion(
         "val,exp_dtype", [("x", object), (5, IndexError), (1.1, object)]
     )
     def test_setitem_index_object(self, val, exp_dtype):
-        obj = pd.Series([1, 2, 3, 4], index=list("abcd"))
+        obj = pd.Series([1, 2, 3, 4], index=pd.Index(list("abcd"), dtype=object))
         assert obj.index.dtype == object
 
         if exp_dtype is IndexError:
@@ -122,7 +124,7 @@ def test_setitem_index_object(self, val, exp_dtype):
                 with tm.assert_produces_warning(FutureWarning, match=warn_msg):
                     temp[5] = 5
         else:
-            exp_index = pd.Index(list("abcd") + [val])
+            exp_index = pd.Index(list("abcd") + [val], dtype=object)
             self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype)
 
     @pytest.mark.parametrize(
@@ -195,10 +197,10 @@ def _assert_insert_conversion(self, original, value, expected, expected_dtype):
         ],
     )
     def test_insert_index_object(self, insert, coerced_val, coerced_dtype):
-        obj = pd.Index(list("abcd"))
+        obj = pd.Index(list("abcd"), dtype=object)
         assert obj.dtype == object
 
-        exp = pd.Index(["a", coerced_val, "b", "c", "d"])
+        exp = pd.Index(["a", coerced_val, "b", "c", "d"], dtype=object)
         self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
 
     @pytest.mark.parametrize(
@@ -397,7 +399,7 @@ def _run_test(self, obj, fill_val, klass, exp_dtype):
     )
     def test_where_object(self, index_or_series, fill_val, exp_dtype):
         klass = index_or_series
-        obj = klass(list("abcd"))
+        obj = klass(list("abcd"), dtype=object)
         assert obj.dtype == object
         self._run_test(obj, fill_val, klass, exp_dtype)
 
@@ -559,10 +561,10 @@ def _assert_fillna_conversion(self, original, value, expected, expected_dtype):
     )
     def test_fillna_object(self, index_or_series, fill_val, fill_dtype):
         klass = index_or_series
-        obj = klass(["a", np.nan, "c", "d"])
+        obj = klass(["a", np.nan, "c", "d"], dtype=object)
         assert obj.dtype == object
 
-        exp = klass(["a", fill_val, "c", "d"])
+        exp = klass(["a", fill_val, "c", "d"], dtype=object)
         self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
 
     @pytest.mark.parametrize(
@@ -824,6 +826,7 @@ def replacer(self, how, from_key, to_key):
             raise ValueError
         return replacer
 
+    @pytest.mark.skipif(using_pyarrow_string_dtype(), reason="TODO: test is to complex")
     def test_replace_series(self, how, to_key, from_key, replacer):
         index = pd.Index([3, 4], name="xxx")
         obj = pd.Series(self.rep[from_key], index=index, name="yyy")
@@ -870,13 +873,18 @@ def test_replace_series(self, how, to_key, from_key, replacer):
     @pytest.mark.parametrize(
         "from_key", ["datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"], indirect=True
     )
-    def test_replace_series_datetime_tz(self, how, to_key, from_key, replacer):
+    def test_replace_series_datetime_tz(
+        self, how, to_key, from_key, replacer, using_infer_string
+    ):
         index = pd.Index([3, 4], name="xyz")
         obj = pd.Series(self.rep[from_key], index=index, name="yyy")
         assert obj.dtype == from_key
 
         exp = pd.Series(self.rep[to_key], index=index, name="yyy")
-        assert exp.dtype == to_key
+        if using_infer_string and to_key == "object":
+            assert exp.dtype == "string"
+        else:
+            assert exp.dtype == to_key
 
         msg = "Downcasting behavior in `replace`"
         warn = FutureWarning if exp.dtype != object else None
diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
index cbcbf3396363a..a285356e2563f 100644
--- a/pandas/tests/indexing/test_iloc.py
+++ b/pandas/tests/indexing/test_iloc.py
@@ -100,9 +100,8 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manage
         #  we retain the object dtype.
         frame = DataFrame({0: np.array([0, 1, 2], dtype=object), 1: range(3)})
         df = frame.copy()
-        orig_vals = df.values
         indexer(df)[key, 0] = cat
-        expected = DataFrame({0: cat.astype(object), 1: range(3)})
+        expected = DataFrame({0: Series(cat.astype(object), dtype=object), 1: range(3)})
         tm.assert_frame_equal(df, expected)
 
     @pytest.mark.parametrize("box", [array, Series])
@@ -451,12 +450,16 @@ def test_iloc_setitem(self):
     def test_iloc_setitem_axis_argument(self):
         # GH45032
         df = DataFrame([[6, "c", 10], [7, "d", 11], [8, "e", 12]])
+        df[1] = df[1].astype(object)
         expected = DataFrame([[6, "c", 10], [7, "d", 11], [5, 5, 5]])
+        expected[1] = expected[1].astype(object)
         df.iloc(axis=0)[2] = 5
         tm.assert_frame_equal(df, expected)
 
         df = DataFrame([[6, "c", 10], [7, "d", 11], [8, "e", 12]])
+        df[1] = df[1].astype(object)
         expected = DataFrame([[6, "c", 5], [7, "d", 5], [8, "e", 5]])
+        expected[1] = expected[1].astype(object)
         df.iloc(axis=1)[2] = 5
         tm.assert_frame_equal(df, expected)
 
@@ -615,7 +618,7 @@ def test_iloc_getitem_labelled_frame(self):
         assert result == exp
 
         # out-of-bounds exception
-        msg = "index 5 is out of bounds for axis 0 with size 4"
+        msg = "index 5 is out of bounds for axis 0 with size 4|index out of bounds"
         with pytest.raises(IndexError, match=msg):
             df.iloc[10, 5]
 
@@ -1313,7 +1316,9 @@ def test_iloc_setitem_dtypes_duplicate_columns(
         self, dtypes, init_value, expected_value
     ):
         # GH#22035
-        df = DataFrame([[init_value, "str", "str2"]], columns=["a", "b", "b"])
+        df = DataFrame(
+            [[init_value, "str", "str2"]], columns=["a", "b", "b"], dtype=object
+        )
 
         # with the enforcement of GH#45333 in 2.0, this sets values inplace,
         #  so we retain object dtype
@@ -1360,7 +1365,10 @@ def test_frame_iloc_getitem_callable(self):
 
     def test_frame_iloc_setitem_callable(self):
         # GH#11485
-        df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD"))
+        df = DataFrame(
+            {"X": [1, 2, 3, 4], "Y": Series(list("aabb"), dtype=object)},
+            index=list("ABCD"),
+        )
 
         # return location
         res = df.copy()
diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py
index bdbbcabcaab0e..d6ec7ac3e4185 100644
--- a/pandas/tests/indexing/test_indexing.py
+++ b/pandas/tests/indexing/test_indexing.py
@@ -8,6 +8,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_pyarrow_string_dtype
+
 from pandas.errors import IndexingError
 
 from pandas.core.dtypes.common import (
@@ -189,7 +191,7 @@ def test_setitem_dtype_upcast(self):
         ):
             df.loc[0, "c"] = "foo"
         expected = DataFrame(
-            [{"a": 1, "b": np.nan, "c": "foo"}, {"a": 3, "b": 2, "c": np.nan}]
+            {"a": [1, 3], "b": [np.nan, 2], "c": Series(["foo", np.nan], dtype=object)}
         )
         tm.assert_frame_equal(df, expected)
 
@@ -284,18 +286,27 @@ def test_dups_fancy_indexing_not_in_order(self):
         with pytest.raises(KeyError, match="not in index"):
             df.loc[rows]
 
-    def test_dups_fancy_indexing_only_missing_label(self):
+    def test_dups_fancy_indexing_only_missing_label(self, using_infer_string):
         # List containing only missing label
         dfnu = DataFrame(
             np.random.default_rng(2).standard_normal((5, 3)), index=list("AABCD")
         )
-        with pytest.raises(
-            KeyError,
-            match=re.escape(
-                "\"None of [Index(['E'], dtype='object')] are in the [index]\""
-            ),
-        ):
-            dfnu.loc[["E"]]
+        if using_infer_string:
+            with pytest.raises(
+                KeyError,
+                match=re.escape(
+                    "\"None of [Index(['E'], dtype='string')] are in the [index]\""
+                ),
+            ):
+                dfnu.loc[["E"]]
+        else:
+            with pytest.raises(
+                KeyError,
+                match=re.escape(
+                    "\"None of [Index(['E'], dtype='object')] are in the [index]\""
+                ),
+            ):
+                dfnu.loc[["E"]]
 
     @pytest.mark.parametrize("vals", [[0, 1, 2], list("abc")])
     def test_dups_fancy_indexing_missing_label(self, vals):
@@ -451,6 +462,9 @@ def test_set_index_nan(self):
         )
         tm.assert_frame_equal(result, df)
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="can't multiply arrow strings"
+    )
     def test_multi_assign(self):
         # GH 3626, an assignment of a sub-df to a df
         # set float64 to avoid upcast when setting nan
@@ -553,7 +567,7 @@ def test_string_slice_empty(self):
         with pytest.raises(KeyError, match="^0$"):
             df.loc["2011", 0]
 
-    def test_astype_assignment(self):
+    def test_astype_assignment(self, using_infer_string):
         # GH4312 (iloc)
         df_orig = DataFrame(
             [["1", "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
@@ -567,8 +581,9 @@ def test_astype_assignment(self):
         expected = DataFrame(
             [[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
         )
-        expected["A"] = expected["A"].astype(object)
-        expected["B"] = expected["B"].astype(object)
+        if not using_infer_string:
+            expected["A"] = expected["A"].astype(object)
+            expected["B"] = expected["B"].astype(object)
         tm.assert_frame_equal(df, expected)
 
         # GH5702 (loc)
@@ -577,7 +592,8 @@ def test_astype_assignment(self):
         expected = DataFrame(
             [[1, "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
         )
-        expected["A"] = expected["A"].astype(object)
+        if not using_infer_string:
+            expected["A"] = expected["A"].astype(object)
         tm.assert_frame_equal(df, expected)
 
         df = df_orig.copy()
@@ -585,8 +601,9 @@ def test_astype_assignment(self):
         expected = DataFrame(
             [["1", 2, 3, ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
         )
-        expected["B"] = expected["B"].astype(object)
-        expected["C"] = expected["C"].astype(object)
+        if not using_infer_string:
+            expected["B"] = expected["B"].astype(object)
+            expected["C"] = expected["C"].astype(object)
         tm.assert_frame_equal(df, expected)
 
     def test_astype_assignment_full_replacements(self):
@@ -673,6 +690,7 @@ def test_loc_setitem_fullindex_views(self):
         df.loc[df.index] = df.loc[df.index]
         tm.assert_frame_equal(df, df2)
 
+    @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="can't set int into string")
     def test_rhs_alignment(self):
         # GH8258, tests that both rows & columns are aligned to what is
         # assigned to. covers both uniform data-type & multi-type cases
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index 96fd3f4e6fca0..8459cc5a30130 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -12,6 +12,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_pyarrow_string_dtype
+
 from pandas._libs import index as libindex
 from pandas.errors import IndexingError
 import pandas.util._test_decorators as td
@@ -469,7 +471,7 @@ def test_loc_to_fail2(self):
 
         msg = r"\"None of \[Index\(\['4'\], dtype='object'\)\] are in the \[index\]\""
         with pytest.raises(KeyError, match=msg):
-            s.loc[["4"]]
+            s.loc[Index(["4"], dtype=object)]
 
         s.loc[-1] = 3
         with pytest.raises(KeyError, match="not in index"):
@@ -781,7 +783,9 @@ def test_loc_setitem_empty_frame(self):
         #  is inplace, so that dtype is retained
         sera = Series(val1, index=keys1, dtype=np.float64)
         serb = Series(val2, index=keys2)
-        expected = DataFrame({"A": sera, "B": serb}).reindex(index=index)
+        expected = DataFrame(
+            {"A": sera, "B": serb}, columns=Index(["A", "B"], dtype=object)
+        ).reindex(index=index)
         tm.assert_frame_equal(df, expected)
 
     def test_loc_setitem_frame(self):
@@ -979,7 +983,7 @@ def test_setitem_new_key_tz(self, indexer_sl):
             to_datetime(42).tz_localize("UTC"),
             to_datetime(666).tz_localize("UTC"),
         ]
-        expected = Series(vals, index=["foo", "bar"])
+        expected = Series(vals, index=Index(["foo", "bar"], dtype=object))
 
         ser = Series(dtype=object)
         indexer_sl(ser)["foo"] = vals[0]
@@ -1254,6 +1258,7 @@ def test_loc_reverse_assignment(self):
 
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="can't set int into string")
     def test_loc_setitem_str_to_small_float_conversion_type(self):
         # GH#20388
 
@@ -1439,7 +1444,7 @@ def test_loc_setitem_categorical_values_partial_column_slice(self):
             df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"])
         tm.assert_frame_equal(df, exp)
 
-    def test_loc_setitem_single_row_categorical(self):
+    def test_loc_setitem_single_row_categorical(self, using_infer_string):
         # GH#25495
         df = DataFrame({"Alpha": ["a"], "Numeric": [0]})
         categories = Categorical(df["Alpha"], categories=["a", "b", "c"])
@@ -1449,7 +1454,9 @@ def test_loc_setitem_single_row_categorical(self):
         df.loc[:, "Alpha"] = categories
 
         result = df["Alpha"]
-        expected = Series(categories, index=df.index, name="Alpha").astype(object)
+        expected = Series(categories, index=df.index, name="Alpha").astype(
+            object if not using_infer_string else "string[pyarrow_numpy]"
+        )
         tm.assert_series_equal(result, expected)
 
         # double-check that the non-loc setting retains categoricalness
@@ -1615,7 +1622,7 @@ def test_loc_getitem_index_namedtuple(self):
         result = df.loc[IndexType("foo", "bar")]["A"]
         assert result == 1
 
-    def test_loc_setitem_single_column_mixed(self):
+    def test_loc_setitem_single_column_mixed(self, using_infer_string):
         df = DataFrame(
             np.random.default_rng(2).standard_normal((5, 3)),
             index=["a", "b", "c", "d", "e"],
@@ -1623,7 +1630,10 @@ def test_loc_setitem_single_column_mixed(self):
         )
         df["str"] = "qux"
         df.loc[df.index[::2], "str"] = np.nan
-        expected = np.array([np.nan, "qux", np.nan, "qux", np.nan], dtype=object)
+        expected = Series(
+            [np.nan, "qux", np.nan, "qux", np.nan],
+            dtype=object if not using_infer_string else "string[pyarrow_numpy]",
+        ).values
         tm.assert_almost_equal(df["str"].values, expected)
 
     def test_loc_setitem_cast2(self):
@@ -2016,11 +2026,15 @@ def test_loc_setitem_empty_series_str_idx(self):
         # partially set with an empty object series
         ser = Series(dtype=object)
         ser.loc["foo"] = 1
-        tm.assert_series_equal(ser, Series([1], index=["foo"]))
+        tm.assert_series_equal(ser, Series([1], index=Index(["foo"], dtype=object)))
         ser.loc["bar"] = 3
-        tm.assert_series_equal(ser, Series([1, 3], index=["foo", "bar"]))
+        tm.assert_series_equal(
+            ser, Series([1, 3], index=Index(["foo", "bar"], dtype=object))
+        )
         ser.loc[3] = 4
-        tm.assert_series_equal(ser, Series([1, 3, 4], index=["foo", "bar", 3]))
+        tm.assert_series_equal(
+            ser, Series([1, 3, 4], index=Index(["foo", "bar", 3], dtype=object))
+        )
 
     def test_loc_setitem_incremental_with_dst(self):
         # GH#20724
@@ -2050,7 +2064,11 @@ def test_loc_setitem_datetime_keys_cast(self, conv):
         df.loc[conv(dt1), "one"] = 100
         df.loc[conv(dt2), "one"] = 200
 
-        expected = DataFrame({"one": [100.0, 200.0]}, index=[dt1, dt2])
+        expected = DataFrame(
+            {"one": [100.0, 200.0]},
+            index=[dt1, dt2],
+            columns=Index(["one"], dtype=object),
+        )
         tm.assert_frame_equal(df, expected)
 
     def test_loc_setitem_categorical_column_retains_dtype(self, ordered):
@@ -2168,11 +2186,11 @@ def test_loc_setitem_with_expansion_preserves_nullable_int(self, dtype):
         result = DataFrame(index=df.index)
         result.loc[df.index, "data"] = ser
 
-        tm.assert_frame_equal(result, df)
+        tm.assert_frame_equal(result, df, check_column_type=False)
 
         result = DataFrame(index=df.index)
         result.loc[df.index, "data"] = ser._values
-        tm.assert_frame_equal(result, df)
+        tm.assert_frame_equal(result, df, check_column_type=False)
 
     def test_loc_setitem_ea_not_full_column(self):
         # GH#39163
@@ -2262,7 +2280,10 @@ def test_frame_loc_getitem_callable_labels(self):
 
     def test_frame_loc_setitem_callable(self):
         # GH#11485
-        df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD"))
+        df = DataFrame(
+            {"X": [1, 2, 3, 4], "Y": Series(list("aabb"), dtype=object)},
+            index=list("ABCD"),
+        )
 
         # return label
         res = df.copy()
diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py
index 3d04cc764563f..2f9018112c03b 100644
--- a/pandas/tests/indexing/test_partial.py
+++ b/pandas/tests/indexing/test_partial.py
@@ -28,7 +28,9 @@ def test_empty_frame_setitem_index_name_retained(self):
 
         df["series"] = series
         expected = DataFrame(
-            {"series": [1.23] * 4}, index=pd.RangeIndex(4, name="df_index")
+            {"series": [1.23] * 4},
+            index=pd.RangeIndex(4, name="df_index"),
+            columns=Index(["series"], dtype=object),
         )
 
         tm.assert_frame_equal(df, expected)
@@ -39,7 +41,9 @@ def test_empty_frame_setitem_index_name_inherited(self):
         series = Series(1.23, index=pd.RangeIndex(4, name="series_index"))
         df["series"] = series
         expected = DataFrame(
-            {"series": [1.23] * 4}, index=pd.RangeIndex(4, name="series_index")
+            {"series": [1.23] * 4},
+            index=pd.RangeIndex(4, name="series_index"),
+            columns=Index(["series"], dtype=object),
         )
         tm.assert_frame_equal(df, expected)
 
@@ -92,7 +96,9 @@ def test_partial_set_empty_frame2(self):
         # these work as they don't really change
         # anything but the index
         # GH#5632
-        expected = DataFrame(columns=["foo"], index=Index([], dtype="object"))
+        expected = DataFrame(
+            columns=Index(["foo"], dtype=object), index=Index([], dtype="object")
+        )
 
         df = DataFrame(index=Index([], dtype="object"))
         df["foo"] = Series([], dtype="object")
@@ -110,7 +116,9 @@ def test_partial_set_empty_frame2(self):
         tm.assert_frame_equal(df, expected)
 
     def test_partial_set_empty_frame3(self):
-        expected = DataFrame(columns=["foo"], index=Index([], dtype="int64"))
+        expected = DataFrame(
+            columns=Index(["foo"], dtype=object), index=Index([], dtype="int64")
+        )
         expected["foo"] = expected["foo"].astype("float64")
 
         df = DataFrame(index=Index([], dtype="int64"))
@@ -127,7 +135,9 @@ def test_partial_set_empty_frame4(self):
         df = DataFrame(index=Index([], dtype="int64"))
         df["foo"] = range(len(df))
 
-        expected = DataFrame(columns=["foo"], index=Index([], dtype="int64"))
+        expected = DataFrame(
+            columns=Index(["foo"], dtype=object), index=Index([], dtype="int64")
+        )
         # range is int-dtype-like, so we get int64 dtype
         expected["foo"] = expected["foo"].astype("int64")
         tm.assert_frame_equal(df, expected)
@@ -200,10 +210,10 @@ def test_partial_set_empty_frame_empty_copy_assignment(self):
         df = DataFrame(index=[0])
         df = df.copy()
         df["a"] = 0
-        expected = DataFrame(0, index=[0], columns=["a"])
+        expected = DataFrame(0, index=[0], columns=Index(["a"], dtype=object))
         tm.assert_frame_equal(df, expected)
 
-    def test_partial_set_empty_frame_empty_consistencies(self):
+    def test_partial_set_empty_frame_empty_consistencies(self, using_infer_string):
         # GH#6171
         # consistency on empty frames
         df = DataFrame(columns=["x", "y"])
@@ -213,7 +223,15 @@ def test_partial_set_empty_frame_empty_consistencies(self):
 
         df = DataFrame(columns=["x", "y"])
         df["x"] = ["1", "2"]
-        expected = DataFrame({"x": ["1", "2"], "y": [np.nan, np.nan]}, dtype=object)
+        expected = DataFrame(
+            {
+                "x": Series(
+                    ["1", "2"],
+                    dtype=object if not using_infer_string else "string[pyarrow_numpy]",
+                ),
+                "y": Series([np.nan, np.nan], dtype=object),
+            }
+        )
         tm.assert_frame_equal(df, expected)
 
         df = DataFrame(columns=["x", "y"])
@@ -618,7 +636,7 @@ def test_loc_with_list_of_strings_representing_datetimes_missing_value(
         [
             (
                 period_range(start="2000", periods=20, freq="D"),
-                ["4D", "8D"],
+                Index(["4D", "8D"], dtype=object),
                 (
                     r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\] "
                     r"are in the \[index\]"
@@ -626,7 +644,7 @@ def test_loc_with_list_of_strings_representing_datetimes_missing_value(
             ),
             (
                 date_range(start="2000", periods=20, freq="D"),
-                ["4D", "8D"],
+                Index(["4D", "8D"], dtype=object),
                 (
                     r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\] "
                     r"are in the \[index\]"
@@ -634,7 +652,7 @@ def test_loc_with_list_of_strings_representing_datetimes_missing_value(
             ),
             (
                 pd.timedelta_range(start="1 day", periods=20),
-                ["2000-01-04", "2000-01-08"],
+                Index(["2000-01-04", "2000-01-08"], dtype=object),
                 (
                     r"None of \[Index\(\['2000-01-04', '2000-01-08'\], "
                     r"dtype='object'\)\] are in the \[index\]"

From dfda6cfaa4b6d3a27cb9d84b1d5ae181e334d696 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Wed, 22 Nov 2023 00:39:52 +0100
Subject: [PATCH 10/13] Fix string option tests in indexing

---
 pandas/tests/indexing/test_iloc.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
index a285356e2563f..baf2cdae43fe4 100644
--- a/pandas/tests/indexing/test_iloc.py
+++ b/pandas/tests/indexing/test_iloc.py
@@ -231,7 +231,10 @@ def test_iloc_exceeds_bounds(self):
         dfl = DataFrame(
             np.random.default_rng(2).standard_normal((5, 2)), columns=list("AB")
         )
-        tm.assert_frame_equal(dfl.iloc[:, 2:3], DataFrame(index=dfl.index, columns=[]))
+        tm.assert_frame_equal(
+            dfl.iloc[:, 2:3],
+            DataFrame(index=dfl.index, columns=Index([], dtype=dfl.columns.dtype)),
+        )
         tm.assert_frame_equal(dfl.iloc[:, 1:3], dfl.iloc[:, [1]])
         tm.assert_frame_equal(dfl.iloc[4:6], dfl.iloc[[4]])
 

From 88f0957d5e39a588e2a8620a540dc397e9b20a09 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Wed, 22 Nov 2023 00:40:56 +0100
Subject: [PATCH 11/13] Fix string option tests in indexing

---
 pandas/core/config_init.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index bdbab78a443de..a8b63f97141c2 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -905,7 +905,7 @@ def register_converter_cb(key) -> None:
 with cf.config_prefix("future"):
     cf.register_option(
         "infer_string",
-        True,
+        False,
         "Whether to infer sequence of str objects as pyarrow string "
         "dtype, which will be the default in pandas 3.0 "
         "(at which point this option will be deprecated).",

From b5bed5dc5840554b1a2f54ec839569b4a284d0a8 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sun, 26 Nov 2023 12:41:47 +0100
Subject: [PATCH 12/13] Update test_coercion.py

---
 pandas/tests/indexing/test_coercion.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py
index 95db295f8fc2c..029926e714e75 100644
--- a/pandas/tests/indexing/test_coercion.py
+++ b/pandas/tests/indexing/test_coercion.py
@@ -826,6 +826,7 @@ def replacer(self, how, from_key, to_key):
             raise ValueError
         return replacer
 
+    # Needs adjustment for the infer string option
     @pytest.mark.skipif(using_pyarrow_string_dtype(), reason="TODO: test is to complex")
     def test_replace_series(self, how, to_key, from_key, replacer):
         index = pd.Index([3, 4], name="xxx")

From abadce8865ae1a141753d36398fdf8304b1e948a Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sun, 26 Nov 2023 12:42:13 +0100
Subject: [PATCH 13/13] Update test_coercion.py

---
 pandas/tests/indexing/test_coercion.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py
index 029926e714e75..03691ca318037 100644
--- a/pandas/tests/indexing/test_coercion.py
+++ b/pandas/tests/indexing/test_coercion.py
@@ -826,7 +826,7 @@ def replacer(self, how, from_key, to_key):
             raise ValueError
         return replacer
 
-    # Needs adjustment for the infer string option
+    # Expected needs adjustment for the infer string option, seems to work as expecetd
     @pytest.mark.skipif(using_pyarrow_string_dtype(), reason="TODO: test is to complex")
     def test_replace_series(self, how, to_key, from_key, replacer):
         index = pd.Index([3, 4], name="xxx")