diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 278971ef88a0f..5cb5e91f74e3f 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -465,6 +465,7 @@ Sparse ExtensionArray ^^^^^^^^^^^^^^ +- Bug in :meth:`.arrays.ArrowExtensionArray.__setitem__` which caused wrong behavior when using an integer array with repeated values as a key (:issue:`58530`) - Bug in :meth:`api.types.is_datetime64_any_dtype` where a custom :class:`ExtensionDtype` would return ``False`` for array-likes (:issue:`57055`) Styler diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 0240433cdb683..96e77e4b920d6 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1880,7 +1880,8 @@ def __setitem__(self, key, value) -> None: raise ValueError("Length of indexer and values mismatch") if len(indices) == 0: return - argsort = np.argsort(indices) + # GH#58530 wrong item assignment by repeated key + _, argsort = np.unique(indices, return_index=True) indices = indices[argsort] value = value.take(argsort) mask = np.zeros(len(self), dtype=np.bool_) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 3fb2fc09eaa79..a455b21b9932a 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -202,6 +202,22 @@ def test_setitem_integer_array(self, data, idx, box_in_series): arr[idx] = arr[0] tm.assert_equal(arr, expected) + @pytest.mark.parametrize( + "idx", + [[0, 0, 1], pd.array([0, 0, 1], dtype="Int64"), np.array([0, 0, 1])], + ids=["list", "integer-array", "numpy-array"], + ) + def test_setitem_integer_array_with_repeats(self, data, idx, box_in_series): + arr = data[:5].copy() + expected = data.take([2, 3, 2, 3, 4]) + + if box_in_series: + arr = pd.Series(arr) + expected = pd.Series(expected) + + arr[idx] = [arr[2], arr[2], arr[3]] + tm.assert_equal(arr, expected) + @pytest.mark.parametrize( "idx, box_in_series", [