Skip to content

Commit 8ab673c

Browse files
String dtype: honor mode.string_storage option (and change default to None) (#59488)
* String dtype: honor mode.string_storage option (and change default to None) * fix test + explicitly test default * use 'auto' instead of None
1 parent 96d732e commit 8ab673c

File tree

4 files changed

+24
-18
lines changed

4 files changed

+24
-18
lines changed

pandas/core/arrays/string_.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -140,12 +140,16 @@ def __init__(
140140
# infer defaults
141141
if storage is None:
142142
if na_value is not libmissing.NA:
143-
if HAS_PYARROW:
144-
storage = "pyarrow"
145-
else:
146-
storage = "python"
143+
storage = get_option("mode.string_storage")
144+
if storage == "auto":
145+
if HAS_PYARROW:
146+
storage = "pyarrow"
147+
else:
148+
storage = "python"
147149
else:
148150
storage = get_option("mode.string_storage")
151+
if storage == "auto":
152+
storage = "python"
149153

150154
if storage == "pyarrow_numpy":
151155
# TODO raise a deprecation warning

pandas/core/config_init.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -452,13 +452,12 @@ def is_terminal() -> bool:
452452

453453
string_storage_doc = """
454454
: string
455-
The default storage for StringDtype. This option is ignored if
456-
``future.infer_string`` is set to True.
455+
The default storage for StringDtype.
457456
"""
458457

459458

460459
def is_valid_string_storage(value: Any) -> None:
461-
legal_values = ["python", "pyarrow"]
460+
legal_values = ["auto", "python", "pyarrow"]
462461
if value not in legal_values:
463462
msg = "Value must be one of python|pyarrow"
464463
if value == "pyarrow_numpy":
@@ -473,7 +472,7 @@ def is_valid_string_storage(value: Any) -> None:
473472
with cf.config_prefix("mode"):
474473
cf.register_option(
475474
"string_storage",
476-
"python",
475+
"auto",
477476
string_storage_doc,
478477
# validator=is_one_of_factory(["python", "pyarrow"]),
479478
validator=is_valid_string_storage,

pandas/tests/arrays/string_/test_string_arrow.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import numpy as np
55
import pytest
66

7-
from pandas.compat import HAS_PYARROW
87
import pandas.util._test_decorators as td
98

109
import pandas as pd
@@ -27,11 +26,10 @@ def test_eq_all_na():
2726
tm.assert_extension_array_equal(result, expected)
2827

2928

30-
def test_config(string_storage, request, using_infer_string):
31-
if using_infer_string and string_storage == "python" and HAS_PYARROW:
32-
# string storage with na_value=NaN always uses pyarrow if available
33-
# -> does not yet honor the option
34-
request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
29+
def test_config(string_storage, using_infer_string):
30+
# with the default string_storage setting
31+
# always "python" at the moment
32+
assert StringDtype().storage == "python"
3533

3634
with pd.option_context("string_storage", string_storage):
3735
assert StringDtype().storage == string_storage

pandas/tests/dtypes/test_common.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import numpy as np
44
import pytest
55

6+
from pandas.compat import HAS_PYARROW
67
import pandas.util._test_decorators as td
78

89
from pandas.core.dtypes.astype import astype_array
@@ -802,13 +803,17 @@ def test_pandas_dtype_ea_not_instance():
802803

803804

804805
def test_pandas_dtype_string_dtypes(string_storage):
805-
# TODO(infer_string) remove skip if "python" is supported
806-
pytest.importorskip("pyarrow")
806+
with pd.option_context("future.infer_string", True):
807+
# with the default string_storage setting
808+
result = pandas_dtype("str")
809+
assert result == pd.StringDtype(
810+
"pyarrow" if HAS_PYARROW else "python", na_value=np.nan
811+
)
812+
807813
with pd.option_context("future.infer_string", True):
808814
with pd.option_context("string_storage", string_storage):
809815
result = pandas_dtype("str")
810-
# TODO(infer_string) hardcoded to pyarrow until python is supported
811-
assert result == pd.StringDtype("pyarrow", na_value=np.nan)
816+
assert result == pd.StringDtype(string_storage, na_value=np.nan)
812817

813818
with pd.option_context("future.infer_string", False):
814819
with pd.option_context("string_storage", string_storage):

0 commit comments

Comments
 (0)