From cc827bbd527e1faf54e57f860f37e17d40c1cd3d Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 28 Aug 2023 12:32:09 +0200 Subject: [PATCH 1/3] Infer string storage based on infer_string option --- pandas/core/arrays/string_.py | 6 +++++- pandas/tests/series/test_constructors.py | 7 +++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 1eaae0807fee1..72ba95e5fa258 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -115,7 +115,11 @@ def na_value(self) -> libmissing.NAType | float: # type: ignore[override] def __init__(self, storage=None) -> None: if storage is None: - storage = get_option("mode.string_storage") + infer_string = get_option("future.infer_string") + if infer_string: + storage = "pyarrow_numpy" + else: + storage = get_option("mode.string_storage") if storage not in {"python", "pyarrow", "pyarrow_numpy"}: raise ValueError( f"Storage must be 'python' or 'pyarrow'. Got {storage} instead." diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index ef734e9664844..b30758a61f880 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -2115,6 +2115,13 @@ def test_series_string_inference_array_string_dtype(self): ser = Series(np.array(["a", "b"])) tm.assert_series_equal(ser, expected) + def test_series_string_inference_storage_definition(self): + # GH#54793 + expected = Series(["a", "b"], dtype="string[pyarrow_numpy]") + with pd.option_context("future.infer_string", True): + result = Series(["a", "b"], dtype="string") + tm.assert_series_equal(result, expected) + class TestSeriesConstructorIndexCoercion: def test_series_constructor_datetimelike_index_coercion(self): From da7d28e21d36b45c1674edf719d0783bf1b1f06d Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 28 Aug 2023 13:22:52 +0200 Subject: [PATCH 2/3] skip for no arrow --- pandas/tests/series/test_constructors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index b30758a61f880..2c3fdf627788a 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -2117,6 +2117,7 @@ def test_series_string_inference_array_string_dtype(self): def test_series_string_inference_storage_definition(self): # GH#54793 + pytest.importorskip("pyarrow") expected = Series(["a", "b"], dtype="string[pyarrow_numpy]") with pd.option_context("future.infer_string", True): result = Series(["a", "b"], dtype="string") From 4ffa59fc2b35c20e0f11bd0c0e40403c7b810329 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 28 Aug 2023 14:29:15 +0200 Subject: [PATCH 3/3] Update docs --- doc/source/whatsnew/v2.1.0.rst | 6 +++++- pandas/core/config_init.py | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 19a8500928ab7..7e5be1fdde71b 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -39,11 +39,15 @@ We are collecting feedback on this decision `here None: string_storage_doc = """ : string - The default storage for StringDtype. + The default storage for StringDtype. This option is ignored if + ``future.infer_string`` is set to True. """ with cf.config_prefix("mode"):