Skip to content

Commit ddceb8e

Browse files
BUG: avoid StringArray.__setitem__ to mutate the value being set as side-effect (#51299)
* BUG: avoid StringArray.__setitem__ to mutate the value being set as side-effect * add whatsnew
1 parent 68305c1 commit ddceb8e

File tree

3 files changed

+18
-1
lines changed

3 files changed

+18
-1
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1229,6 +1229,7 @@ Strings
12291229
^^^^^^^
12301230
- Bug in :func:`pandas.api.types.is_string_dtype` that would not return ``True`` for :class:`StringDtype` or :class:`ArrowDtype` with ``pyarrow.string()`` (:issue:`15585`)
12311231
- Bug in converting string dtypes to "datetime64[ns]" or "timedelta64[ns]" incorrectly raising ``TypeError`` (:issue:`36153`)
1232+
- Bug in setting values in a string-dtype column with an array, mutating the array as side effect when it contains missing values (:issue:`51299`)
12321233
-
12331234

12341235
Interval

pandas/core/arrays/string_.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,10 @@ def __setitem__(self, key, value):
423423
if len(value) and not lib.is_string_array(value, skipna=True):
424424
raise TypeError("Must provide strings.")
425425

426-
value[isna(value)] = libmissing.NA
426+
mask = isna(value)
427+
if mask.any():
428+
value = value.copy()
429+
value[isna(value)] = libmissing.NA
427430

428431
super().__setitem__(key, value)
429432

pandas/tests/arrays/string_/test_string.py

+13
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,19 @@ def test_setitem_with_scalar_string(dtype):
7272
tm.assert_extension_array_equal(arr, expected)
7373

7474

75+
def test_setitem_with_array_with_missing(dtype):
76+
# ensure that when setting with an array of values, we don't mutate the
77+
# array `value` in __setitem__(self, key, value)
78+
arr = pd.array(["a", "b", "c"], dtype=dtype)
79+
value = np.array(["A", None])
80+
value_orig = value.copy()
81+
arr[[0, 1]] = value
82+
83+
expected = pd.array(["A", pd.NA, "c"], dtype=dtype)
84+
tm.assert_extension_array_equal(arr, expected)
85+
tm.assert_numpy_array_equal(value, value_orig)
86+
87+
7588
def test_astype_roundtrip(dtype):
7689
ser = pd.Series(pd.date_range("2000", periods=12))
7790
ser[0] = None

0 commit comments

Comments
 (0)