From a096f895f0bd1698166eaf4015158db5e9a6cd6a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 31 Oct 2024 09:19:02 +0100 Subject: [PATCH 1/2] String dtype: deprecate the pyarrow_numpy storage option --- doc/source/whatsnew/v2.3.0.rst | 2 +- pandas/core/arrays/string_.py | 15 +++++++++++++-- pandas/tests/arrays/string_/test_string.py | 6 ++++++ pandas/tests/extension/test_string.py | 4 ++-- 4 files changed, 22 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst index 01c2ed3821d7a..c5a76a243cb2e 100644 --- a/doc/source/whatsnew/v2.3.0.rst +++ b/doc/source/whatsnew/v2.3.0.rst @@ -54,7 +54,7 @@ notable_bug_fix1 Deprecations ~~~~~~~~~~~~ - Deprecated allowing non-``bool`` values for ``na`` in :meth:`.str.contains`, :meth:`.str.startswith`, and :meth:`.str.endswith` for dtypes that do not already disallow these (:issue:`59615`) -- +- Deprecated the ``"pyarrow_numpy"`` storage option for :class:`StringDtype` (:issue:`60152`) .. --------------------------------------------------------------------------- .. _whatsnew_230.performance: diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index f20c4c8625475..d7e6fbf4fcbcd 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -7,6 +7,7 @@ Literal, cast, ) +import warnings import numpy as np @@ -27,6 +28,7 @@ ) from pandas.compat.numpy import function as nv from pandas.util._decorators import doc +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.base import ( ExtensionDtype, @@ -154,7 +156,16 @@ def __init__( storage = "python" if storage == "pyarrow_numpy": - # TODO raise a deprecation warning + warnings.warn( + "The 'pyarrow_numpy' storage option name is deprecated and will be " + 'removed in pandas 3.0. Use \'pd.StringDtype(storage="pyarrow", ' + "na_value-np.nan)' to construct the same dtype.\nOr enable the " + "'pd.options.future.infer_string = True' option globally and use " + 'the "str" alias as a shorthand notation to specify a dtype ' + '(instead of "string[pyarrow_numpy]").', + FutureWarning, + stacklevel=find_stack_level(), + ) storage = "pyarrow" na_value = np.nan @@ -254,7 +265,7 @@ def construct_from_string(cls, string) -> Self: elif string == "string[pyarrow]": return cls(storage="pyarrow") elif string == "string[pyarrow_numpy]": - # TODO deprecate + # this is deprecated in the dtype __init__, remove this in pandas 3.0 return cls(storage="pyarrow_numpy") else: raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'") diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 33708be497f31..ab676087c49e1 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -42,6 +42,12 @@ def cls(dtype): return dtype.construct_array_type() +def test_dtype_constructor(): + with tm.assert_produces_warning(FutureWarning): + dtype = pd.StringDtype("pyarrow_numpy") + assert dtype == pd.StringDtype("pyarrow", na_value=np.nan) + + def test_dtype_equality(): pytest.importorskip("pyarrow") diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 509ae653e4793..d80205d2eb399 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -105,8 +105,8 @@ def test_eq_with_str(self, dtype): # only the NA-variant supports parametrized string alias assert dtype == f"string[{dtype.storage}]" elif dtype.storage == "pyarrow": - # TODO(infer_string) deprecate this - assert dtype == "string[pyarrow_numpy]" + with tm.assert_produces_warning(FutureWarning): + assert dtype == "string[pyarrow_numpy]" def test_is_not_string_type(self, dtype): # Different from BaseDtypeTests.test_is_not_string_type From 86dc02a7dab3178ae4607b78f9d251b14a9cdda1 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 31 Oct 2024 11:09:11 +0100 Subject: [PATCH 2/2] add pyarrow skip --- pandas/tests/arrays/string_/test_string.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index ab676087c49e1..8de5407a187c9 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -43,6 +43,8 @@ def cls(dtype): def test_dtype_constructor(): + pytest.importorskip("pyarrow") + with tm.assert_produces_warning(FutureWarning): dtype = pd.StringDtype("pyarrow_numpy") assert dtype == pd.StringDtype("pyarrow", na_value=np.nan)