Skip to content

Commit a39f783

Browse files
BUG: Fix astype str issue 54654 (#54687)
* Fix issue #54654 on pickle roundtrip astype(str) might change original array even when copy is True * changelog * Update v2.2.0.rst rephrase * rephrase * Update lib.pyx add gh comment * Update v2.2.0.rst * Update lib.pyx fix CR --------- Co-authored-by: Itay Azolay <[email protected]>
1 parent bd21f6b commit a39f783

File tree

3 files changed

+14
-1
lines changed

3 files changed

+14
-1
lines changed

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,7 @@ Numeric
349349

350350
Conversion
351351
^^^^^^^^^^
352+
- Bug in :func:`astype` when called with ``str`` on unpickled array - the array might change in-place (:issue:`54654`)
352353
- Bug in :meth:`Series.convert_dtypes` not converting all NA column to ``null[pyarrow]`` (:issue:`55346`)
353354
-
354355

pandas/_libs/lib.pyx

+2-1
Original file line numberDiff line numberDiff line change
@@ -792,7 +792,8 @@ cpdef ndarray[object] ensure_string_array(
792792

793793
result = np.asarray(arr, dtype="object")
794794

795-
if copy and result is arr:
795+
if copy and (result is arr or np.shares_memory(arr, result)):
796+
# GH#54654
796797
result = result.copy()
797798
elif not copy and result is arr:
798799
already_copied = False

pandas/tests/copy_view/test_astype.py

+11
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import pickle
2+
13
import numpy as np
24
import pytest
35

@@ -130,6 +132,15 @@ def test_astype_string_and_object_update_original(
130132
tm.assert_frame_equal(df2, df_orig)
131133

132134

135+
def test_astype_string_copy_on_pickle_roundrip():
136+
# https://github.com/pandas-dev/pandas/issues/54654
137+
# ensure_string_array may alter array inplace
138+
base = Series(np.array([(1, 2), None, 1], dtype="object"))
139+
base_copy = pickle.loads(pickle.dumps(base))
140+
base_copy.astype(str)
141+
tm.assert_series_equal(base, base_copy)
142+
143+
133144
def test_astype_dict_dtypes(using_copy_on_write):
134145
df = DataFrame(
135146
{"a": [1, 2, 3], "b": [4, 5, 6], "c": Series([1.5, 1.5, 1.5], dtype="float64")}

0 commit comments

Comments
 (0)