diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 9eb5bbc8f07d5..3abe96e5241f1 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -326,6 +326,7 @@ Numeric Conversion ^^^^^^^^^^ +- Bug in :func:`astype` when called with ``str`` on unpickled array - the array might change in-place (:issue:`54654`) - Bug in :meth:`Series.convert_dtypes` not converting all NA column to ``null[pyarrow]`` (:issue:`55346`) - diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 38b34b4bb853c..bd6534494d973 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -792,7 +792,8 @@ cpdef ndarray[object] ensure_string_array( result = np.asarray(arr, dtype="object") - if copy and result is arr: + if copy and (result is arr or np.shares_memory(arr, result)): + # GH#54654 result = result.copy() elif not copy and result is arr: already_copied = False diff --git a/pandas/tests/copy_view/test_astype.py b/pandas/tests/copy_view/test_astype.py index 3d5556bdd2823..d462ce3d3187d 100644 --- a/pandas/tests/copy_view/test_astype.py +++ b/pandas/tests/copy_view/test_astype.py @@ -1,3 +1,5 @@ +import pickle + import numpy as np import pytest @@ -130,6 +132,15 @@ def test_astype_string_and_object_update_original( tm.assert_frame_equal(df2, df_orig) +def test_astype_string_copy_on_pickle_roundrip(): + # https://github.com/pandas-dev/pandas/issues/54654 + # ensure_string_array may alter array inplace + base = Series(np.array([(1, 2), None, 1], dtype="object")) + base_copy = pickle.loads(pickle.dumps(base)) + base_copy.astype(str) + tm.assert_series_equal(base, base_copy) + + def test_astype_dict_dtypes(using_copy_on_write): df = DataFrame( {"a": [1, 2, 3], "b": [4, 5, 6], "c": Series([1.5, 1.5, 1.5], dtype="float64")}