From 47f6ad7c4fc366736bc8dd9027d4ce5f5beaeb78 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 10 Feb 2023 16:40:37 +0100 Subject: [PATCH 1/2] BUG: avoid StringArray.__setitem__ to mutate the value being set as side-effect --- pandas/core/arrays/string_.py | 5 ++++- pandas/tests/arrays/string_/test_string.py | 13 +++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 478a91b54fc48..bef1ae0c04c4e 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -423,7 +423,10 @@ def __setitem__(self, key, value): if len(value) and not lib.is_string_array(value, skipna=True): raise TypeError("Must provide strings.") - value[isna(value)] = libmissing.NA + mask = isna(value) + if mask.any(): + value = value.copy() + value[isna(value)] = libmissing.NA super().__setitem__(key, value) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 20bd37e64a4e1..7e17efe4e7380 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -72,6 +72,19 @@ def test_setitem_with_scalar_string(dtype): tm.assert_extension_array_equal(arr, expected) +def test_setitem_with_array_with_missing(dtype): + # ensure that when setting with an array of values, we don't mutate the + # array `value` in __setitem__(self, key, value) + arr = pd.array(["a", "b", "c"], dtype=dtype) + value = np.array(["A", None]) + value_orig = value.copy() + arr[[0, 1]] = value + + expected = pd.array(["A", pd.NA, "c"], dtype=dtype) + tm.assert_extension_array_equal(arr, expected) + tm.assert_numpy_array_equal(value, value_orig) + + def test_astype_roundtrip(dtype): ser = pd.Series(pd.date_range("2000", periods=12)) ser[0] = None From 1c75d31d4eabd77fb79aaab8b4ba4faff8affa44 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 10 Feb 2023 22:49:01 +0100 Subject: [PATCH 2/2] add whatsnew --- doc/source/whatsnew/v2.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 43a34c8e18b2d..410690de1a1ec 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1229,6 +1229,7 @@ Strings ^^^^^^^ - Bug in :func:`pandas.api.types.is_string_dtype` that would not return ``True`` for :class:`StringDtype` or :class:`ArrowDtype` with ``pyarrow.string()`` (:issue:`15585`) - Bug in converting string dtypes to "datetime64[ns]" or "timedelta64[ns]" incorrectly raising ``TypeError`` (:issue:`36153`) +- Bug in setting values in a string-dtype column with an array, mutating the array as side effect when it contains missing values (:issue:`51299`) - Interval