pandas-dev · phofl · Aug 29, 2023 · Aug 28, 2023 · Aug 28, 2023 · Aug 28, 2023
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -39,11 +39,15 @@ We are collecting feedback on this decision `here <https://github.com/pandas-dev
 Avoid NumPy object dtype for strings by default
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Previously, all strings were stored in columns with NumPy object dtype.
+Previously, all strings were stored in columns with NumPy object dtype by default.
 This release introduces an option ``future.infer_string`` that infers all
 strings as PyArrow backed strings with dtype ``"string[pyarrow_numpy]"`` instead.
 This is a new string dtype implementation that follows NumPy semantics in comparison
 operations and will return ``np.nan`` as the missing value indicator.
+Setting the option will also infer the dtype ``"string"`` as a :class:`StringDtype` with
+storage set to ``"pyarrow_numpy"``, ignoring the value behind the option
+``mode.string_storage``.
+
 This option only works if PyArrow is installed. PyArrow backed strings have a
 significantly reduced memory footprint and provide a big performance improvement
 compared to NumPy object (:issue:`54430`).

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
@@ -115,7 +115,11 @@ def na_value(self) -> libmissing.NAType | float:  # type: ignore[override]
 
     def __init__(self, storage=None) -> None:
         if storage is None:
-            storage = get_option("mode.string_storage")
+            infer_string = get_option("future.infer_string")
+            if infer_string:
+                storage = "pyarrow_numpy"
+            else:
+                storage = get_option("mode.string_storage")
         if storage not in {"python", "pyarrow", "pyarrow_numpy"}:
             raise ValueError(
                 f"Storage must be 'python' or 'pyarrow'. Got {storage} instead."

diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
@@ -493,7 +493,8 @@ def use_inf_as_na_cb(key) -> None:
 
 string_storage_doc = """
 : string
-    The default storage for StringDtype.
+    The default storage for StringDtype. This option is ignored if
+    ``future.infer_string`` is set to True.
 """
 
 with cf.config_prefix("mode"):

diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
@@ -2115,6 +2115,14 @@ def test_series_string_inference_array_string_dtype(self):
             ser = Series(np.array(["a", "b"]))
         tm.assert_series_equal(ser, expected)
 
+    def test_series_string_inference_storage_definition(self):
+        # GH#54793
+        pytest.importorskip("pyarrow")
+        expected = Series(["a", "b"], dtype="string[pyarrow_numpy]")
+        with pd.option_context("future.infer_string", True):
+            result = Series(["a", "b"], dtype="string")
+        tm.assert_series_equal(result, expected)
+
 
 class TestSeriesConstructorIndexCoercion:
     def test_series_constructor_datetimelike_index_coercion(self):