Skip to content

Commit 9a261ae

Browse files
String dtype: deprecate the pyarrow_numpy storage option (pandas-dev#60152)
* String dtype: deprecate the pyarrow_numpy storage option * add pyarrow skip (cherry picked from commit 1908f2e)
1 parent aedb17a commit 9a261ae

File tree

4 files changed

+24
-4
lines changed

4 files changed

+24
-4
lines changed

doc/source/whatsnew/v2.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ notable_bug_fix1
5454
Deprecations
5555
~~~~~~~~~~~~
5656
- Deprecated allowing non-``bool`` values for ``na`` in :meth:`.str.contains`, :meth:`.str.startswith`, and :meth:`.str.endswith` for dtypes that do not already disallow these (:issue:`59615`)
57+
- Deprecated the ``"pyarrow_numpy"`` storage option for :class:`StringDtype` (:issue:`60152`)
5758
- The deprecation of setting the argument ``include_groups`` to ``True`` in :meth:`DataFrameGroupBy.apply` has been promoted from a ``DeprecationWarning`` to ``FutureWarning``; only ``False`` will be allowed (:issue:`7155`)
5859

5960
.. ---------------------------------------------------------------------------

pandas/core/arrays/string_.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
Literal,
88
cast,
99
)
10+
import warnings
1011

1112
import numpy as np
1213

@@ -27,6 +28,7 @@
2728
)
2829
from pandas.compat.numpy import function as nv
2930
from pandas.util._decorators import doc
31+
from pandas.util._exceptions import find_stack_level
3032

3133
from pandas.core.dtypes.base import (
3234
ExtensionDtype,
@@ -150,7 +152,16 @@ def __init__(
150152
storage = "python"
151153

152154
if storage == "pyarrow_numpy":
153-
# TODO raise a deprecation warning
155+
warnings.warn(
156+
"The 'pyarrow_numpy' storage option name is deprecated and will be "
157+
'removed in pandas 3.0. Use \'pd.StringDtype(storage="pyarrow", '
158+
"na_value-np.nan)' to construct the same dtype.\nOr enable the "
159+
"'pd.options.future.infer_string = True' option globally and use "
160+
'the "str" alias as a shorthand notation to specify a dtype '
161+
'(instead of "string[pyarrow_numpy]").',
162+
FutureWarning,
163+
stacklevel=find_stack_level(),
164+
)
154165
storage = "pyarrow"
155166
na_value = np.nan
156167

@@ -250,7 +261,7 @@ def construct_from_string(cls, string) -> Self:
250261
elif string == "string[pyarrow]":
251262
return cls(storage="pyarrow")
252263
elif string == "string[pyarrow_numpy]":
253-
# TODO deprecate
264+
# this is deprecated in the dtype __init__, remove this in pandas 3.0
254265
return cls(storage="pyarrow_numpy")
255266
else:
256267
raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'")

pandas/tests/arrays/string_/test_string.py

+8
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,14 @@ def cls(dtype):
4141
return dtype.construct_array_type()
4242

4343

44+
def test_dtype_constructor():
45+
pytest.importorskip("pyarrow")
46+
47+
with tm.assert_produces_warning(FutureWarning):
48+
dtype = pd.StringDtype("pyarrow_numpy")
49+
assert dtype == pd.StringDtype("pyarrow", na_value=np.nan)
50+
51+
4452
def test_dtype_equality():
4553
pytest.importorskip("pyarrow")
4654

pandas/tests/extension/test_string.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,8 @@ def test_eq_with_str(self, dtype):
104104
# only the NA-variant supports parametrized string alias
105105
assert dtype == f"string[{dtype.storage}]"
106106
elif dtype.storage == "pyarrow":
107-
# TODO(infer_string) deprecate this
108-
assert dtype == "string[pyarrow_numpy]"
107+
with tm.assert_produces_warning(FutureWarning):
108+
assert dtype == "string[pyarrow_numpy]"
109109

110110
def test_is_not_string_type(self, dtype):
111111
# Different from BaseDtypeTests.test_is_not_string_type

0 commit comments

Comments
 (0)