Skip to content

Commit 5232bee

Browse files
authored
DOC/TST: Document numpy 2.0 support and add tests for string array (#58202)
1 parent 4dc3f79 commit 5232bee

File tree

3 files changed

+53
-0
lines changed

3 files changed

+53
-0
lines changed

doc/source/whatsnew/v2.2.2.rst

+15
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,21 @@ including other versions of pandas.
99
{{ header }}
1010

1111
.. ---------------------------------------------------------------------------
12+
13+
.. _whatsnew_220.np2_compat:
14+
15+
Pandas 2.2.2 is now compatible with numpy 2.0
16+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
17+
18+
Pandas 2.2.2 is the first version of pandas that is generally compatible with the upcoming
19+
numpy 2.0 release, and wheels for pandas 2.2.2 will work with both numpy 1.x and 2.x.
20+
21+
One major caveat is that arrays created with numpy 2.0's new ``StringDtype`` will convert
22+
to ``object`` dtyped arrays upon :class:`Series`/:class:`DataFrame` creation.
23+
Full support for numpy 2.0's StringDtype is expected to land in pandas 3.0.
24+
25+
As usual please report any bugs discovered to our `issue tracker <https://github.com/pandas-dev/pandas/issues/new/choose>`_
26+
1227
.. _whatsnew_222.regressions:
1328

1429
Fixed regressions

pandas/tests/frame/test_constructors.py

+19
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from pandas._config import using_pyarrow_string_dtype
2525

2626
from pandas._libs import lib
27+
from pandas.compat.numpy import np_version_gt2
2728
from pandas.errors import IntCastingNaNError
2829

2930
from pandas.core.dtypes.common import is_integer_dtype
@@ -3052,6 +3053,24 @@ def test_from_dict_with_columns_na_scalar(self):
30523053
expected = DataFrame({"a": Series([pd.NaT, pd.NaT])})
30533054
tm.assert_frame_equal(result, expected)
30543055

3056+
# TODO: make this not cast to object in pandas 3.0
3057+
@pytest.mark.skipif(
3058+
not np_version_gt2, reason="StringDType only available in numpy 2 and above"
3059+
)
3060+
@pytest.mark.parametrize(
3061+
"data",
3062+
[
3063+
{"a": ["a", "b", "c"], "b": [1.0, 2.0, 3.0], "c": ["d", "e", "f"]},
3064+
],
3065+
)
3066+
def test_np_string_array_object_cast(self, data):
3067+
from numpy.dtypes import StringDType
3068+
3069+
data["a"] = np.array(data["a"], dtype=StringDType())
3070+
res = DataFrame(data)
3071+
assert res["a"].dtype == np.object_
3072+
assert (res["a"] == data["a"]).all()
3073+
30553074

30563075
def get1(obj): # TODO: make a helper in tm?
30573076
if isinstance(obj, Series):

pandas/tests/series/test_constructors.py

+19
Original file line numberDiff line numberDiff line change
@@ -2176,6 +2176,25 @@ def test_series_constructor_infer_multiindex(self, container, data):
21762176
multi = Series(data, index=indexes)
21772177
assert isinstance(multi.index, MultiIndex)
21782178

2179+
# TODO: make this not cast to object in pandas 3.0
2180+
@pytest.mark.skipif(
2181+
not np_version_gt2, reason="StringDType only available in numpy 2 and above"
2182+
)
2183+
@pytest.mark.parametrize(
2184+
"data",
2185+
[
2186+
["a", "b", "c"],
2187+
["a", "b", np.nan],
2188+
],
2189+
)
2190+
def test_np_string_array_object_cast(self, data):
2191+
from numpy.dtypes import StringDType
2192+
2193+
arr = np.array(data, dtype=StringDType())
2194+
res = Series(arr)
2195+
assert res.dtype == np.object_
2196+
assert (res == data).all()
2197+
21792198

21802199
class TestSeriesConstructorInternals:
21812200
def test_constructor_no_pandas_array(self):

0 commit comments

Comments
 (0)