From e84b8e90a2b21203e3be5b2c2c41b2b94a5603e9 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 13 Jan 2025 10:48:22 +0100 Subject: [PATCH 1/2] String dtype: disallow specifying the 'str' dtype with storage in [..] in string alias (#60661) (cherry picked from commit 7415aca37159a99f8f99d93a1908070ddf36178c) --- pandas/core/dtypes/dtypes.py | 2 +- pandas/tests/dtypes/test_common.py | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index e7efb8598ec61..542bc85110cad 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -2242,7 +2242,7 @@ def construct_from_string(cls, string: str) -> ArrowDtype: ) if not string.endswith("[pyarrow]"): raise TypeError(f"'{string}' must end with '[pyarrow]'") - if string == "string[pyarrow]": + if string in ("string[pyarrow]", "str[pyarrow]"): # Ensure Registry.find skips ArrowDtype to use StringDtype instead raise TypeError("string[pyarrow] should be constructed by StringDtype") diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index f7442cf5d6d3c..272acfd7b83bf 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -835,3 +835,30 @@ def test_pandas_dtype_string_dtypes(string_storage): with pd.option_context("string_storage", string_storage): result = pandas_dtype("string") assert result == pd.StringDtype(string_storage, na_value=pd.NA) + + +def test_pandas_dtype_string_dtype_alias_with_storage(): + with pytest.raises(TypeError, match="not understood"): + pandas_dtype("str[python]") + + with pytest.raises(TypeError, match="not understood"): + pandas_dtype("str[pyarrow]") + + result = pandas_dtype("string[python]") + assert result == pd.StringDtype("python", na_value=pd.NA) + + if HAS_PYARROW: + result = pandas_dtype("string[pyarrow]") + assert result == pd.StringDtype("pyarrow", na_value=pd.NA) + else: + with pytest.raises( + ImportError, match="required for PyArrow backed StringArray" + ): + pandas_dtype("string[pyarrow]") + + +@td.skip_if_installed("pyarrow") +def test_construct_from_string_without_pyarrow_installed(): + # GH 57928 + with pytest.raises(ImportError, match="pyarrow>=10.0.1 is required"): + pd.Series([-1.5, 0.2, None], dtype="float32[pyarrow]") From 3681349c50640e65e7881a1f079280384b7d7545 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 14 Jan 2025 09:06:35 +0100 Subject: [PATCH 2/2] remove accidentally added test --- pandas/tests/dtypes/test_common.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 272acfd7b83bf..ceebfb1920594 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -855,10 +855,3 @@ def test_pandas_dtype_string_dtype_alias_with_storage(): ImportError, match="required for PyArrow backed StringArray" ): pandas_dtype("string[pyarrow]") - - -@td.skip_if_installed("pyarrow") -def test_construct_from_string_without_pyarrow_installed(): - # GH 57928 - with pytest.raises(ImportError, match="pyarrow>=10.0.1 is required"): - pd.Series([-1.5, 0.2, None], dtype="float32[pyarrow]")