From 39042458ff6570cacbc4a22090d7bc0f49a872fc Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sun, 30 Jul 2023 16:06:00 +0200
Subject: [PATCH 01/11] ENH: Implement arrow string option for various I/O
 methods

---
 pandas/_config/__init__.py                |  5 +++++
 pandas/io/_util.py                        |  8 ++++++++
 pandas/io/feather_format.py               |  8 +++++++-
 pandas/io/orc.py                          |  9 ++++++++-
 pandas/io/parquet.py                      |  5 +++++
 pandas/io/parsers/arrow_parser_wrapper.py |  9 ++++++++-
 pandas/io/pytables.py                     | 17 ++++++++++++++++-
 7 files changed, 57 insertions(+), 4 deletions(-)

diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py
index c37ad563df8ef..9f9cf0fd2a5a8 100644
--- a/pandas/_config/__init__.py
+++ b/pandas/_config/__init__.py
@@ -38,3 +38,8 @@ def using_copy_on_write() -> bool:
 def using_nullable_dtypes() -> bool:
     _mode_options = _global_config["mode"]
     return _mode_options["nullable_dtypes"]
+
+
+def using_pyarrow_string_dtype() -> bool:
+    _mode_options = _global_config["future"]
+    return _mode_options["pyarrow_strings"]
diff --git a/pandas/io/_util.py b/pandas/io/_util.py
index d2a001f0cf925..27316b3ab0af0 100644
--- a/pandas/io/_util.py
+++ b/pandas/io/_util.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+from typing import Callable
+
 from pandas.compat._optional import import_optional_dependency
 
 import pandas as pd
@@ -21,3 +23,9 @@ def _arrow_dtype_mapping() -> dict:
         pa.float32(): pd.Float32Dtype(),
         pa.float64(): pd.Float64Dtype(),
     }
+
+
+def arrow_string_types_mapper() -> Callable:
+    pa = import_optional_dependency("pyarrow")
+
+    return {pa.string(): pd.ArrowDtype(pa.string())}.get
diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py
index 77b2b12fda77f..a8f56565b676b 100644
--- a/pandas/io/feather_format.py
+++ b/pandas/io/feather_format.py
@@ -6,6 +6,8 @@
     Any,
 )
 
+from pandas._config import using_pyarrow_string_dtype
+
 from pandas._libs import lib
 from pandas.compat._optional import import_optional_dependency
 from pandas.util._decorators import doc
@@ -15,6 +17,7 @@
 from pandas.core.api import DataFrame
 from pandas.core.shared_docs import _shared_docs
 
+from pandas.io._util import arrow_string_types_mapper
 from pandas.io.common import get_handle
 
 if TYPE_CHECKING:
@@ -119,7 +122,7 @@ def read_feather(
     with get_handle(
         path, "rb", storage_options=storage_options, is_text=False
     ) as handles:
-        if dtype_backend is lib.no_default:
+        if dtype_backend is lib.no_default and not using_pyarrow_string_dtype():
             return feather.read_feather(
                 handles.handle, columns=columns, use_threads=bool(use_threads)
             )
@@ -135,3 +138,6 @@ def read_feather(
 
         elif dtype_backend == "pyarrow":
             return pa_table.to_pandas(types_mapper=pd.ArrowDtype)
+
+        elif using_pyarrow_string_dtype():
+            return pa_table.to_pandas(types_mapper=arrow_string_types_mapper())
diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index 75f7f9e56439e..774f9d797b011 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -9,6 +9,8 @@
     Literal,
 )
 
+from pandas._config import using_pyarrow_string_dtype
+
 from pandas._libs import lib
 from pandas.compat import pa_version_under8p0
 from pandas.compat._optional import import_optional_dependency
@@ -24,6 +26,7 @@
 import pandas as pd
 from pandas.core.indexes.api import default_index
 
+from pandas.io._util import arrow_string_types_mapper
 from pandas.io.common import (
     get_handle,
     is_fsspec_url,
@@ -132,7 +135,11 @@ def read_orc(
             df = pa_table.to_pandas(types_mapper=mapping.get)
         return df
     else:
-        return pa_table.to_pandas()
+        if using_pyarrow_string_dtype():
+            types_mapper = arrow_string_types_mapper()
+        else:
+            types_mapper = None
+        return pa_table.to_pandas(types_mapper=types_mapper)
 
 
 def to_orc(
diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index 61112542fb9d8..39b43f48ac343 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -11,6 +11,8 @@
 import warnings
 from warnings import catch_warnings
 
+from pandas._config import using_pyarrow_string_dtype
+
 from pandas._libs import lib
 from pandas.compat._optional import import_optional_dependency
 from pandas.errors import AbstractMethodError
@@ -25,6 +27,7 @@
 )
 from pandas.core.shared_docs import _shared_docs
 
+from pandas.io._util import arrow_string_types_mapper
 from pandas.io.common import (
     IOHandles,
     get_handle,
@@ -244,6 +247,8 @@ def read(
             to_pandas_kwargs["types_mapper"] = mapping.get
         elif dtype_backend == "pyarrow":
             to_pandas_kwargs["types_mapper"] = pd.ArrowDtype  # type: ignore[assignment]  # noqa: E501
+        elif using_pyarrow_string_dtype():
+            to_pandas_kwargs["types_mapper"] = arrow_string_types_mapper()
 
         manager = get_option("mode.data_manager")
         if manager == "array":
diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py
index 09ea6b8b7902b..71bfb00a95b50 100644
--- a/pandas/io/parsers/arrow_parser_wrapper.py
+++ b/pandas/io/parsers/arrow_parser_wrapper.py
@@ -2,6 +2,8 @@
 
 from typing import TYPE_CHECKING
 
+from pandas._config import using_pyarrow_string_dtype
+
 from pandas._libs import lib
 from pandas.compat._optional import import_optional_dependency
 
@@ -10,7 +12,10 @@
 import pandas as pd
 from pandas import DataFrame
 
-from pandas.io._util import _arrow_dtype_mapping
+from pandas.io._util import (
+    _arrow_dtype_mapping,
+    arrow_string_types_mapper,
+)
 from pandas.io.parsers.base_parser import ParserBase
 
 if TYPE_CHECKING:
@@ -215,6 +220,8 @@ def read(self) -> DataFrame:
             dtype_mapping = _arrow_dtype_mapping()
             dtype_mapping[pa.null()] = pd.Int64Dtype()
             frame = table.to_pandas(types_mapper=dtype_mapping.get)
+        elif using_pyarrow_string_dtype():
+            frame = table.to_pandas(types_mapper=arrow_string_types_mapper())
         else:
             frame = table.to_pandas()
         return self._finalize_pandas_output(frame)
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index e50a1f6e56d51..9d905311d32ed 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -30,6 +30,7 @@
 from pandas._config import (
     config,
     get_option,
+    using_pyarrow_string_dtype,
 )
 
 from pandas._libs import (
@@ -66,6 +67,7 @@
 )
 from pandas.core.dtypes.missing import array_equivalent
 
+import pandas as pd
 from pandas import (
     DataFrame,
     DatetimeIndex,
@@ -3219,7 +3221,12 @@ def read(
         self.validate_read(columns, where)
         index = self.read_index("index", start=start, stop=stop)
         values = self.read_array("values", start=start, stop=stop)
-        return Series(values, index=index, name=self.name, copy=False)
+        result = Series(values, index=index, name=self.name, copy=False)
+        if result.dtype.kind == "O" and using_pyarrow_string_dtype():
+            import pyarrow as pa
+
+            result = result.astype(pd.ArrowDtype(pa.string()))
+        return result
 
     # error: Signature of "write" incompatible with supertype "Fixed"
     def write(self, obj, **kwargs) -> None:  # type: ignore[override]
@@ -3287,6 +3294,10 @@ def read(
 
             columns = items[items.get_indexer(blk_items)]
             df = DataFrame(values.T, columns=columns, index=axes[1], copy=False)
+            if values.dtype.kind == "O" and using_pyarrow_string_dtype():
+                import pyarrow as pa
+
+                df = df.astype(pd.ArrowDtype(pa.string()))
             dfs.append(df)
 
         if len(dfs) > 0:
@@ -4669,6 +4680,10 @@ def read(
                 # Categorical
                 df = DataFrame._from_arrays([values], columns=cols_, index=index_)
             assert (df.dtypes == values.dtype).all(), (df.dtypes, values.dtype)
+            if values.dtype.kind == "O" and using_pyarrow_string_dtype():
+                import pyarrow as pa
+
+                df = df.astype(pd.ArrowDtype(pa.string()))
             frames.append(df)
 
         if len(frames) == 1:

From ebe0bd51c2939f10f10164eb169276537fa15c51 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 31 Jul 2023 20:02:14 -0700
Subject: [PATCH 02/11] ENH: allow opt-in to inferring pyarrow strings

---
 pandas/_libs/lib.pyx       | 38 ++++++++++++++++++++++++++++++++++++++
 pandas/core/config_init.py | 11 +++++++++++
 pandas/core/dtypes/cast.py | 19 +++++++++++++++++++
 3 files changed, 68 insertions(+)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index c3fbd3ee4853e..183a111249710 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -1299,6 +1299,7 @@ cdef class Seen:
         bint datetimetz_      # seen_datetimetz
         bint period_          # seen_period
         bint interval_        # seen_interval
+        bint str_             # seen_str
 
     def __cinit__(self, bint coerce_numeric=False):
         """
@@ -1325,6 +1326,7 @@ cdef class Seen:
         self.datetimetz_ = False
         self.period_ = False
         self.interval_ = False
+        self.str_ = False
         self.coerce_numeric = coerce_numeric
 
     cdef bint check_uint64_conflict(self) except -1:
@@ -2615,6 +2617,13 @@ def maybe_convert_objects(ndarray[object] objects,
             else:
                 seen.object_ = True
                 break
+        elif isinstance(val, str):
+            if convert_non_numeric:
+                seen.str_ = True
+                break
+            else:
+                seen.object_ = True
+                break
         else:
             seen.object_ = True
             break
@@ -2669,6 +2678,35 @@ def maybe_convert_objects(ndarray[object] objects,
             return pi._data
         seen.object_ = True
 
+    elif seen.str_:
+        if is_string_array(objects):
+            from pandas._config import get_option
+            opt = get_option("future.infer_string")
+            if opt is True:
+                import pyarrow as pa
+
+                from pandas.core.dtypes.dtypes import ArrowDtype
+
+                obj = pa.array(objects)
+                dtype = ArrowDtype(obj.type)
+                return dtype.construct_array_type()(obj)
+            # elif opt is False:
+            #    # explicitly set to keep the old behavior and avoid the warning
+            #    pass
+            # else:
+            #    from pandas.util._exceptions import find_stack_level
+            #    warnings.warn(
+            #        "Pandas type inference with a sequence of `str` "
+            #        "objects is deprecated. In a future version, this will give "
+            #        "string[pyarrow] dtype, which will require pyarrow to be "
+            #        "installed. To opt in to the new behavior immediately set "
+            #        "`pd.set_option('future.infer_string', True)`. To keep the "
+            #        "old behavior pass `dtype=object`.",
+            #        FutureWarning,
+            #        stacklevel=find_stack_level(),
+            #    )
+
+        seen.object_ = True
     elif seen.interval_:
         if is_interval_array(objects):
             from pandas import IntervalIndex
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index 3f662073f0357..4c02d90827760 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -889,3 +889,14 @@ def register_converter_cb(key) -> None:
         styler_environment,
         validator=is_instance_factory([type(None), str]),
     )
+
+
+with cf.config_prefix("future"):
+    cf.register_option(
+        "future.infer_string",
+        None,
+        "Whether to infer sequence of str objects as pyarrow string "
+        "dtype, which will be the default in pandas 3.0 "
+        "(at which point this option will be deprecated).",
+        validator=is_one_of_factory([True, False, None]),
+    )
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 09105bf49c050..d33d884832c60 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -18,6 +18,8 @@
 
 import numpy as np
 
+from pandas._config import get_option
+
 from pandas._libs import lib
 from pandas._libs.missing import (
     NA,
@@ -796,6 +798,23 @@ def infer_dtype_from_scalar(val) -> tuple[DtypeObj, Any]:
         # coming out as np.str_!
 
         dtype = _dtype_obj
+        opt = get_option("future.infer_string")
+        if opt is True:
+            import pyarrow as pa
+
+            pa_dtype = pa.string()
+            dtype = ArrowDtype(pa_dtype)
+        # elif opt is None:
+        #    warnings.warn(
+        #        "Pandas type inference with a `str` "
+        #        "object is deprecated. In a future version, this will give "
+        #        "string[pyarrow] dtype, which will require pyarrow to be "
+        #        "installed. To opt in to the new behavior immediately set "
+        #        "`pd.set_option('future.infer_string', True)`. To keep the "
+        #        "old behavior pass `dtype=object`.",
+        #        FutureWarning,
+        #        stacklevel=find_stack_level(),
+        #    )
 
     elif isinstance(val, (np.datetime64, dt.datetime)):
         try:

From 0889028e1b20e087aefedab1560e064e814f01f7 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sat, 5 Aug 2023 19:30:54 +0200
Subject: [PATCH 03/11] Remove comments and add tests

---
 pandas/_libs/lib.pyx                          | 15 --------
 pandas/core/config_init.py                    |  6 ++--
 pandas/core/dtypes/cast.py                    | 11 ------
 pandas/tests/frame/test_constructors.py       | 35 +++++++++++++++++++
 .../indexes/base_class/test_constructors.py   | 15 ++++++++
 .../io/parser/dtypes/test_dtypes_basic.py     | 21 +++++++++++
 pandas/tests/io/test_sql.py                   | 17 +++++++++
 pandas/tests/series/test_constructors.py      | 14 ++++++++
 8 files changed, 105 insertions(+), 29 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 183a111249710..2bd99724b1cad 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2690,21 +2690,6 @@ def maybe_convert_objects(ndarray[object] objects,
                 obj = pa.array(objects)
                 dtype = ArrowDtype(obj.type)
                 return dtype.construct_array_type()(obj)
-            # elif opt is False:
-            #    # explicitly set to keep the old behavior and avoid the warning
-            #    pass
-            # else:
-            #    from pandas.util._exceptions import find_stack_level
-            #    warnings.warn(
-            #        "Pandas type inference with a sequence of `str` "
-            #        "objects is deprecated. In a future version, this will give "
-            #        "string[pyarrow] dtype, which will require pyarrow to be "
-            #        "installed. To opt in to the new behavior immediately set "
-            #        "`pd.set_option('future.infer_string', True)`. To keep the "
-            #        "old behavior pass `dtype=object`.",
-            #        FutureWarning,
-            #        stacklevel=find_stack_level(),
-            #    )
 
         seen.object_ = True
     elif seen.interval_:
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index 4c02d90827760..27e9bf8958ab0 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -893,10 +893,10 @@ def register_converter_cb(key) -> None:
 
 with cf.config_prefix("future"):
     cf.register_option(
-        "future.infer_string",
-        None,
+        "infer_string",
+        False,
         "Whether to infer sequence of str objects as pyarrow string "
         "dtype, which will be the default in pandas 3.0 "
         "(at which point this option will be deprecated).",
-        validator=is_one_of_factory([True, False, None]),
+        validator=is_one_of_factory([True, False]),
     )
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index d33d884832c60..9d2530ddc4e12 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -804,17 +804,6 @@ def infer_dtype_from_scalar(val) -> tuple[DtypeObj, Any]:
 
             pa_dtype = pa.string()
             dtype = ArrowDtype(pa_dtype)
-        # elif opt is None:
-        #    warnings.warn(
-        #        "Pandas type inference with a `str` "
-        #        "object is deprecated. In a future version, this will give "
-        #        "string[pyarrow] dtype, which will require pyarrow to be "
-        #        "installed. To opt in to the new behavior immediately set "
-        #        "`pd.set_option('future.infer_string', True)`. To keep the "
-        #        "old behavior pass `dtype=object`.",
-        #        FutureWarning,
-        #        stacklevel=find_stack_level(),
-        #    )
 
     elif isinstance(val, (np.datetime64, dt.datetime)):
         try:
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index c87f04efffcf4..b82dc98cd0210 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -2670,6 +2670,41 @@ def test_construct_with_strings_and_none(self):
         expected = DataFrame({"a": ["1", "2", None]}, dtype="str")
         tm.assert_frame_equal(df, expected)
 
+    def test_frame_string_inference(self):
+        # GH#54430
+        pa = pytest.importorskip("pyarrow")
+        dtype = pd.ArrowDtype(pa.string())
+        expected = DataFrame(
+            {"a": ["a", "b"]}, dtype=dtype, columns=Index(["a"], dtype=dtype)
+        )
+        with pd.option_context("future.infer_string", True):
+            df = DataFrame({"a": ["a", "b"]})
+        tm.assert_frame_equal(df, expected)
+
+        expected = DataFrame(
+            {"a": ["a", "b"]},
+            dtype=dtype,
+            columns=Index(["a"], dtype=dtype),
+            index=Index(["x", "y"], dtype=dtype),
+        )
+        with pd.option_context("future.infer_string", True):
+            df = DataFrame({"a": ["a", "b"]}, index=["x", "y"])
+        tm.assert_frame_equal(df, expected)
+
+        expected = DataFrame(
+            {"a": ["a", 1]}, dtype="object", columns=Index(["a"], dtype=dtype)
+        )
+        with pd.option_context("future.infer_string", True):
+            df = DataFrame({"a": ["a", 1]})
+        tm.assert_frame_equal(df, expected)
+
+        expected = DataFrame(
+            {"a": ["a", "b"]}, dtype="object", columns=Index(["a"], dtype=dtype)
+        )
+        with pd.option_context("future.infer_string", True):
+            df = DataFrame({"a": ["a", "b"]}, dtype="object")
+        tm.assert_frame_equal(df, expected)
+
 
 class TestDataFrameConstructorIndexInference:
     def test_frame_from_dict_of_series_overlapping_monthly_period_indexes(self):
diff --git a/pandas/tests/indexes/base_class/test_constructors.py b/pandas/tests/indexes/base_class/test_constructors.py
index cf8b7214f3b91..638124ac20e06 100644
--- a/pandas/tests/indexes/base_class/test_constructors.py
+++ b/pandas/tests/indexes/base_class/test_constructors.py
@@ -1,6 +1,7 @@
 import numpy as np
 import pytest
 
+import pandas as pd
 from pandas import (
     Index,
     MultiIndex,
@@ -42,3 +43,17 @@ def test_construct_empty_tuples(self, tuple_list):
         expected = MultiIndex.from_tuples(tuple_list)
 
         tm.assert_index_equal(result, expected)
+
+    def test_index_string_inference(self):
+        # GH#54430
+        pa = pytest.importorskip("pyarrow")
+        dtype = pd.ArrowDtype(pa.string())
+        expected = Index(["a", "b"], dtype=dtype)
+        with pd.option_context("future.infer_string", True):
+            ser = Index(["a", "b"])
+        tm.assert_index_equal(ser, expected)
+
+        expected = Index(["a", 1], dtype="object")
+        with pd.option_context("future.infer_string", True):
+            ser = Index(["a", 1])
+        tm.assert_index_equal(ser, expected)
diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
index 915cc9a9a1f95..1a613c91880ea 100644
--- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
+++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
@@ -538,3 +538,24 @@ def test_ea_int_avoid_overflow(all_parsers):
         }
     )
     tm.assert_frame_equal(result, expected)
+
+
+def test_string_inference(all_parsers):
+    # GH#54430
+    pa = pytest.importorskip("pyarrow")
+    dtype = pd.ArrowDtype(pa.string())
+
+    data = """a,b
+x,1
+y,2"""
+    parser = all_parsers
+    if parser.engine == "pyarrow":
+        pytest.skip("TODO: Follow up")
+    with pd.option_context("future.infer_string", True):
+        result = parser.read_csv(StringIO(data))
+
+    expected = DataFrame(
+        {"a": pd.Series(["x", "y"], dtype=dtype), "b": [1, 2]},
+        columns=pd.Index(["a", "b"], dtype=dtype),
+    )
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index 6800e55396d7b..63ca91cc89ede 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -2920,6 +2920,23 @@ def test_read_sql_dtype_backend_table(self, string_storage, func):
         # GH#50048 Not supported for sqlite
         pass
 
+    def test_read_sql_string_inference(self):
+        # GH#54430
+        pa = pytest.importorskip("pyarrow")
+        table = "test"
+        df = DataFrame({"a": ["x", "y"]})
+        df.to_sql(table, self.conn, index=False, if_exists="replace")
+
+        with pd.option_context("future.infer_string", True):
+            result = read_sql_table(table, self.conn)
+
+        dtype = pd.ArrowDtype(pa.string())
+        expected = DataFrame(
+            {"a": ["x", "y"]}, dtype=dtype, columns=Index(["a"], dtype=dtype)
+        )
+
+        tm.assert_frame_equal(result, expected)
+
 
 @pytest.mark.db
 class TestMySQLAlchemy(_TestSQLAlchemy):
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
index 9540d7a014409..e67196edcd444 100644
--- a/pandas/tests/series/test_constructors.py
+++ b/pandas/tests/series/test_constructors.py
@@ -2070,6 +2070,20 @@ def test_series_from_index_dtype_equal_does_not_copy(self):
         ser.iloc[0] = 100
         tm.assert_index_equal(idx, expected)
 
+    def test_series_string_inference(self):
+        # GH#54430
+        pa = pytest.importorskip("pyarrow")
+        dtype = pd.ArrowDtype(pa.string())
+        expected = Series(["a", "b"], dtype=dtype)
+        with pd.option_context("future.infer_string", True):
+            ser = Series(["a", "b"])
+        tm.assert_series_equal(ser, expected)
+
+        expected = Series(["a", 1], dtype="object")
+        with pd.option_context("future.infer_string", True):
+            ser = Series(["a", 1])
+        tm.assert_series_equal(ser, expected)
+
 
 class TestSeriesConstructorIndexCoercion:
     def test_series_constructor_datetimelike_index_coercion(self):

From 35a8240d6ba58d1ba9877b411cb43f1dc38f72f9 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sat, 5 Aug 2023 19:45:44 +0200
Subject: [PATCH 04/11] Add string option to arrow parsers

---
 pandas/_config/__init__.py                      |  2 +-
 pandas/io/orc.py                                |  1 +
 .../tests/io/parser/dtypes/test_dtypes_basic.py |  2 --
 pandas/tests/io/test_feather.py                 | 14 ++++++++++++++
 pandas/tests/io/test_orc.py                     | 17 +++++++++++++++++
 pandas/tests/io/test_parquet.py                 | 16 ++++++++++++++++
 6 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py
index 9f9cf0fd2a5a8..daeb135f5bcf7 100644
--- a/pandas/_config/__init__.py
+++ b/pandas/_config/__init__.py
@@ -42,4 +42,4 @@ def using_nullable_dtypes() -> bool:
 
 def using_pyarrow_string_dtype() -> bool:
     _mode_options = _global_config["future"]
-    return _mode_options["pyarrow_strings"]
+    return _mode_options["infer_string"]
diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index 774f9d797b011..d612f2eaadd02 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -135,6 +135,7 @@ def read_orc(
             df = pa_table.to_pandas(types_mapper=mapping.get)
         return df
     else:
+        print("Ts")
         if using_pyarrow_string_dtype():
             types_mapper = arrow_string_types_mapper()
         else:
diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
index 9f6575ddaa95c..ed225c90a4e02 100644
--- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
+++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
@@ -549,8 +549,6 @@ def test_string_inference(all_parsers):
 x,1
 y,2"""
     parser = all_parsers
-    if parser.engine == "pyarrow":
-        pytest.skip("TODO: Follow up")
     with pd.option_context("future.infer_string", True):
         result = parser.read_csv(StringIO(data))
 
diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py
index 9de097fe8c0e6..a0fee6751bf53 100644
--- a/pandas/tests/io/test_feather.py
+++ b/pandas/tests/io/test_feather.py
@@ -219,3 +219,17 @@ def test_invalid_dtype_backend(self):
             df.to_feather(path)
             with pytest.raises(ValueError, match=msg):
                 read_feather(path, dtype_backend="numpy")
+
+    def test_string_inference(self, tmp_path):
+        # GH#54431
+        import pyarrow as pa
+
+        path = tmp_path / "test_string_inference.p"
+        df = pd.DataFrame(data={"a": ["x", "y"]})
+        df.to_feather(path)
+        with pd.option_context("future.infer_string", True):
+            result = read_feather(path)
+        expected = pd.DataFrame(
+            data={"a": ["x", "y"]}, dtype=pd.ArrowDtype(pa.string())
+        )
+        tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py
index 8483eb0d5c159..047d9bfe90a88 100644
--- a/pandas/tests/io/test_orc.py
+++ b/pandas/tests/io/test_orc.py
@@ -415,3 +415,20 @@ def test_invalid_dtype_backend():
         df.to_orc(path)
         with pytest.raises(ValueError, match=msg):
             read_orc(path, dtype_backend="numpy")
+
+
+def test_string_inference(tmp_path):
+    # GH#54431
+    import pyarrow as pa
+
+    path = tmp_path / "test_string_inference.p"
+    df = pd.DataFrame(data={"a": ["x", "y"]})
+    df.to_orc(path)
+    with pd.option_context("future.infer_string", True):
+        result = read_orc(path)
+    expected = pd.DataFrame(
+        data={"a": ["x", "y"]},
+        dtype=pd.ArrowDtype(pa.string()),
+        columns=pd.Index(["a"], dtype=pd.ArrowDtype(pa.string())),
+    )
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 501e471695a8a..8b18413661703 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -1106,6 +1106,22 @@ def test_df_attrs_persistence(self, tmp_path, pa):
         new_df = read_parquet(path, engine=pa)
         assert new_df.attrs == df.attrs
 
+    def test_string_inference(self, tmp_path, pa):
+        # GH#54431
+        import pyarrow as pa
+
+        path = tmp_path / "test_string_inference.p"
+        df = pd.DataFrame(data={"a": ["x", "y"]}, index=["a", "b"])
+        df.to_parquet(path, engine="pyarrow")
+        with pd.option_context("future.infer_string", True):
+            result = read_parquet(path, engine="pyarrow")
+        expected = pd.DataFrame(
+            data={"a": ["x", "y"]},
+            dtype=pd.ArrowDtype(pa.string()),
+            index=pd.Index(["a", "b"], dtype=pd.ArrowDtype(pa.string())),
+        )
+        tm.assert_frame_equal(result, expected)
+
 
 class TestParquetFastParquet(Base):
     def test_basic(self, fp, df_full):

From b677a89bedcd360a63f3c2d34205d3decfa726e8 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sat, 5 Aug 2023 23:38:44 +0200
Subject: [PATCH 05/11] Update

---
 pandas/io/feather_format.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py
index a8f56565b676b..c463f6e4d2759 100644
--- a/pandas/io/feather_format.py
+++ b/pandas/io/feather_format.py
@@ -141,3 +141,5 @@ def read_feather(
 
         elif using_pyarrow_string_dtype():
             return pa_table.to_pandas(types_mapper=arrow_string_types_mapper())
+        else:
+            raise NotImplementedError

From 11b267eac481ddf54b61946ff72463edc681baf0 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sun, 6 Aug 2023 16:44:07 +0200
Subject: [PATCH 06/11] Update

---
 pandas/tests/io/test_orc.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py
index 047d9bfe90a88..c2d791ba24c87 100644
--- a/pandas/tests/io/test_orc.py
+++ b/pandas/tests/io/test_orc.py
@@ -419,8 +419,6 @@ def test_invalid_dtype_backend():
 
 def test_string_inference(tmp_path):
     # GH#54431
-    import pyarrow as pa
-
     path = tmp_path / "test_string_inference.p"
     df = pd.DataFrame(data={"a": ["x", "y"]})
     df.to_orc(path)

From 8072a860e9577235bb807a937e42f9ed0bb8931e Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 9 Aug 2023 21:44:24 +0200
Subject: [PATCH 07/11] Adjust csv

---
 pandas/tests/io/parser/dtypes/test_dtypes_basic.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
index ed225c90a4e02..1c0f0939029ff 100644
--- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
+++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
@@ -547,13 +547,14 @@ def test_string_inference(all_parsers):
 
     data = """a,b
 x,1
-y,2"""
+y,2
+,3"""
     parser = all_parsers
     with pd.option_context("future.infer_string", True):
         result = parser.read_csv(StringIO(data))
 
     expected = DataFrame(
-        {"a": pd.Series(["x", "y"], dtype=dtype), "b": [1, 2]},
+        {"a": pd.Series(["x", "y", None], dtype=dtype), "b": [1, 2, 3]},
         columns=pd.Index(["a", "b"], dtype=dtype),
     )
     tm.assert_frame_equal(result, expected)

From bed3124f0794b6cd42a23e2d716cfdd7ef0cc158 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 9 Aug 2023 21:48:06 +0200
Subject: [PATCH 08/11] Update

---
 pandas/_libs/lib.pyx       | 6 +++---
 pandas/core/dtypes/cast.py | 5 ++---
 pandas/io/orc.py           | 1 -
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 924cf360a35cc..55819ebd1f15e 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -38,6 +38,8 @@ from cython cimport (
     floating,
 )
 
+from pandas._config import using_pyarrow_string_dtype
+
 from pandas._libs.missing import check_na_tuples_nonequal
 
 import_datetime()
@@ -2680,9 +2682,7 @@ def maybe_convert_objects(ndarray[object] objects,
 
     elif seen.str_:
         if is_string_array(objects, skipna=True):
-            from pandas._config import get_option
-            opt = get_option("future.infer_string")
-            if opt is True:
+            if using_pyarrow_string_dtype():
                 import pyarrow as pa
 
                 from pandas.core.dtypes.dtypes import ArrowDtype
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 9d2530ddc4e12..9f7c0b3e36032 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -18,7 +18,7 @@
 
 import numpy as np
 
-from pandas._config import get_option
+from pandas._config import using_pyarrow_string_dtype
 
 from pandas._libs import lib
 from pandas._libs.missing import (
@@ -798,8 +798,7 @@ def infer_dtype_from_scalar(val) -> tuple[DtypeObj, Any]:
         # coming out as np.str_!
 
         dtype = _dtype_obj
-        opt = get_option("future.infer_string")
-        if opt is True:
+        if using_pyarrow_string_dtype():
             import pyarrow as pa
 
             pa_dtype = pa.string()
diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index d612f2eaadd02..774f9d797b011 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -135,7 +135,6 @@ def read_orc(
             df = pa_table.to_pandas(types_mapper=mapping.get)
         return df
     else:
-        print("Ts")
         if using_pyarrow_string_dtype():
             types_mapper = arrow_string_types_mapper()
         else:

From efb6f4a2bcb65ce2d40fa892b5540bba1aae67ec Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 9 Aug 2023 22:10:43 +0200
Subject: [PATCH 09/11] Update

---
 pandas/_libs/lib.pyi                  |  3 +++
 pandas/_libs/lib.pyx                  |  5 +++--
 pandas/core/dtypes/cast.py            |  1 +
 pandas/io/pytables.py                 |  7 ++++---
 pandas/tests/io/pytables/test_read.py | 12 ++++++++++++
 5 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi
index 7e92032a73325..ec0095a7087c8 100644
--- a/pandas/_libs/lib.pyi
+++ b/pandas/_libs/lib.pyi
@@ -84,6 +84,7 @@ def maybe_convert_objects(
     convert_non_numeric: Literal[False] = ...,
     convert_to_nullable_dtype: Literal[False] = ...,
     dtype_if_all_nat: DtypeObj | None = ...,
+    convert_string: Literal[False] = ...,
 ) -> npt.NDArray[np.object_ | np.number]: ...
 @overload
 def maybe_convert_objects(
@@ -95,6 +96,7 @@ def maybe_convert_objects(
     convert_non_numeric: bool = ...,
     convert_to_nullable_dtype: Literal[True] = ...,
     dtype_if_all_nat: DtypeObj | None = ...,
+    convert_string: bool = ...,
 ) -> ArrayLike: ...
 @overload
 def maybe_convert_objects(
@@ -106,6 +108,7 @@ def maybe_convert_objects(
     convert_non_numeric: bool = ...,
     convert_to_nullable_dtype: bool = ...,
     dtype_if_all_nat: DtypeObj | None = ...,
+    convert_string: bool = ...,
 ) -> ArrayLike: ...
 @overload
 def maybe_convert_numeric(
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 55819ebd1f15e..9a044a23a8cbc 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2443,7 +2443,8 @@ def maybe_convert_objects(ndarray[object] objects,
                           bint convert_numeric=True,  # NB: different default!
                           bint convert_to_nullable_dtype=False,
                           bint convert_non_numeric=False,
-                          object dtype_if_all_nat=None) -> "ArrayLike":
+                          object dtype_if_all_nat=None,
+                          bint convert_string=True) -> "ArrayLike":
     """
     Type inference function-- convert object array to proper dtype
 
@@ -2681,7 +2682,7 @@ def maybe_convert_objects(ndarray[object] objects,
         seen.object_ = True
 
     elif seen.str_:
-        if is_string_array(objects, skipna=True):
+        if convert_string and is_string_array(objects, skipna=True):
             if using_pyarrow_string_dtype():
                 import pyarrow as pa
 
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 9f7c0b3e36032..db1c948d607dd 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -1185,6 +1185,7 @@ def maybe_infer_to_datetimelike(
         convert_numeric=False,
         convert_non_numeric=True,
         dtype_if_all_nat=np.dtype("M8[ns]"),
+        convert_string=False,
     )
 
 
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index afbba5dc74c28..6b2734140de0e 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -37,6 +37,7 @@
     lib,
     writers as libwriters,
 )
+from pandas._libs.lib import is_string_array
 from pandas._libs.tslibs import timezones
 from pandas.compat._optional import import_optional_dependency
 from pandas.compat.pickle_compat import patch_pickle
@@ -3222,7 +3223,7 @@ def read(
         index = self.read_index("index", start=start, stop=stop)
         values = self.read_array("values", start=start, stop=stop)
         result = Series(values, index=index, name=self.name, copy=False)
-        if result.dtype.kind == "O" and using_pyarrow_string_dtype():
+        if using_pyarrow_string_dtype() and is_string_array(result, skipna=True):
             import pyarrow as pa
 
             result = result.astype(pd.ArrowDtype(pa.string()))
@@ -3294,7 +3295,7 @@ def read(
 
             columns = items[items.get_indexer(blk_items)]
             df = DataFrame(values.T, columns=columns, index=axes[1], copy=False)
-            if values.dtype.kind == "O" and using_pyarrow_string_dtype():
+            if using_pyarrow_string_dtype() and is_string_array(values, skipna=True):
                 import pyarrow as pa
 
                 df = df.astype(pd.ArrowDtype(pa.string()))
@@ -4680,7 +4681,7 @@ def read(
                 # Categorical
                 df = DataFrame._from_arrays([values], columns=cols_, index=index_)
             assert (df.dtypes == values.dtype).all(), (df.dtypes, values.dtype)
-            if values.dtype.kind == "O" and using_pyarrow_string_dtype():
+            if using_pyarrow_string_dtype() and is_string_array(values, skipna=True):
                 import pyarrow as pa
 
                 df = df.astype(pd.ArrowDtype(pa.string()))
diff --git a/pandas/tests/io/pytables/test_read.py b/pandas/tests/io/pytables/test_read.py
index 89b234b24522c..eb9deae55c3a9 100644
--- a/pandas/tests/io/pytables/test_read.py
+++ b/pandas/tests/io/pytables/test_read.py
@@ -388,3 +388,15 @@ def test_read_py2_hdf_file_in_py3(datapath):
     ) as store:
         result = store["p"]
         tm.assert_frame_equal(result, expected)
+
+
+def test_read_infer_string(tmp_path, setup_path):
+    # GH#54431
+    pa = pytest.importorskip("pyarrow")
+    df = DataFrame({"a": ["a", "b", None]})
+    path = tmp_path / setup_path
+    df.to_hdf(path, key="data", format="table")
+    with pd.option_context("future.infer_string", True):
+        result = read_hdf(path, key="data", mode="r")
+    expected = DataFrame({"a": ["a", "b", None]}, dtype=pd.ArrowDtype(pa.string()))
+    tm.assert_frame_equal(result, expected)

From 0ac28a18179a0d2987fd20d7076e086c376e746b Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 9 Aug 2023 22:53:18 +0200
Subject: [PATCH 10/11] Add test

---
 pandas/_libs/lib.pyi                  | 3 ---
 pandas/_libs/lib.pyx                  | 5 ++---
 pandas/core/dtypes/cast.py            | 1 -
 pandas/io/pytables.py                 | 3 ++-
 pandas/tests/io/pytables/test_read.py | 6 +++++-
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi
index ec0095a7087c8..7e92032a73325 100644
--- a/pandas/_libs/lib.pyi
+++ b/pandas/_libs/lib.pyi
@@ -84,7 +84,6 @@ def maybe_convert_objects(
     convert_non_numeric: Literal[False] = ...,
     convert_to_nullable_dtype: Literal[False] = ...,
     dtype_if_all_nat: DtypeObj | None = ...,
-    convert_string: Literal[False] = ...,
 ) -> npt.NDArray[np.object_ | np.number]: ...
 @overload
 def maybe_convert_objects(
@@ -96,7 +95,6 @@ def maybe_convert_objects(
     convert_non_numeric: bool = ...,
     convert_to_nullable_dtype: Literal[True] = ...,
     dtype_if_all_nat: DtypeObj | None = ...,
-    convert_string: bool = ...,
 ) -> ArrayLike: ...
 @overload
 def maybe_convert_objects(
@@ -108,7 +106,6 @@ def maybe_convert_objects(
     convert_non_numeric: bool = ...,
     convert_to_nullable_dtype: bool = ...,
     dtype_if_all_nat: DtypeObj | None = ...,
-    convert_string: bool = ...,
 ) -> ArrayLike: ...
 @overload
 def maybe_convert_numeric(
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 9a044a23a8cbc..55819ebd1f15e 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2443,8 +2443,7 @@ def maybe_convert_objects(ndarray[object] objects,
                           bint convert_numeric=True,  # NB: different default!
                           bint convert_to_nullable_dtype=False,
                           bint convert_non_numeric=False,
-                          object dtype_if_all_nat=None,
-                          bint convert_string=True) -> "ArrayLike":
+                          object dtype_if_all_nat=None) -> "ArrayLike":
     """
     Type inference function-- convert object array to proper dtype
 
@@ -2682,7 +2681,7 @@ def maybe_convert_objects(ndarray[object] objects,
         seen.object_ = True
 
     elif seen.str_:
-        if convert_string and is_string_array(objects, skipna=True):
+        if is_string_array(objects, skipna=True):
             if using_pyarrow_string_dtype():
                 import pyarrow as pa
 
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index db1c948d607dd..9f7c0b3e36032 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -1185,7 +1185,6 @@ def maybe_infer_to_datetimelike(
         convert_numeric=False,
         convert_non_numeric=True,
         dtype_if_all_nat=np.dtype("M8[ns]"),
-        convert_string=False,
     )
 
 
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 6b2734140de0e..3c27b186dffe0 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -4680,7 +4680,8 @@ def read(
             else:
                 # Categorical
                 df = DataFrame._from_arrays([values], columns=cols_, index=index_)
-            assert (df.dtypes == values.dtype).all(), (df.dtypes, values.dtype)
+            if not using_pyarrow_string_dtype():
+                assert (df.dtypes == values.dtype).all(), (df.dtypes, values.dtype)
             if using_pyarrow_string_dtype() and is_string_array(values, skipna=True):
                 import pyarrow as pa
 
diff --git a/pandas/tests/io/pytables/test_read.py b/pandas/tests/io/pytables/test_read.py
index eb9deae55c3a9..425828cb881a7 100644
--- a/pandas/tests/io/pytables/test_read.py
+++ b/pandas/tests/io/pytables/test_read.py
@@ -398,5 +398,9 @@ def test_read_infer_string(tmp_path, setup_path):
     df.to_hdf(path, key="data", format="table")
     with pd.option_context("future.infer_string", True):
         result = read_hdf(path, key="data", mode="r")
-    expected = DataFrame({"a": ["a", "b", None]}, dtype=pd.ArrowDtype(pa.string()))
+    expected = DataFrame(
+        {"a": ["a", "b", None]},
+        dtype=pd.ArrowDtype(pa.string()),
+        columns=Index(["a"], dtype=pd.ArrowDtype(pa.string())),
+    )
     tm.assert_frame_equal(result, expected)

From ff38a2908c429c7aca767857776eedd2e9daf42d Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Thu, 10 Aug 2023 09:47:29 +0200
Subject: [PATCH 11/11] Fix mypy

---
 pandas/io/pytables.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 3c27b186dffe0..f26411f65d91f 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -3223,7 +3223,7 @@ def read(
         index = self.read_index("index", start=start, stop=stop)
         values = self.read_array("values", start=start, stop=stop)
         result = Series(values, index=index, name=self.name, copy=False)
-        if using_pyarrow_string_dtype() and is_string_array(result, skipna=True):
+        if using_pyarrow_string_dtype() and is_string_array(values, skipna=True):
             import pyarrow as pa
 
             result = result.astype(pd.ArrowDtype(pa.string()))
@@ -4680,9 +4680,12 @@ def read(
             else:
                 # Categorical
                 df = DataFrame._from_arrays([values], columns=cols_, index=index_)
-            if not using_pyarrow_string_dtype():
+            if not (using_pyarrow_string_dtype() and values.dtype.kind == "O"):
                 assert (df.dtypes == values.dtype).all(), (df.dtypes, values.dtype)
-            if using_pyarrow_string_dtype() and is_string_array(values, skipna=True):
+            if using_pyarrow_string_dtype() and is_string_array(
+                values,  # type: ignore[arg-type]
+                skipna=True,
+            ):
                 import pyarrow as pa
 
                 df = df.astype(pd.ArrowDtype(pa.string()))