Skip to content

Commit dd4b872

Browse files
hombitmroeschkepre-commit-ci[bot]
authored
BUG: Fix arrow extension array assignment by repeated key (#58616)
* Failing test_setitem_integer_array_with_repeats * Arrow ext: fix assignment by repeated key * Update whatsnew/v3.0.0.rst * Update doc/source/whatsnew/v3.0.0.rst Co-authored-by: Matthew Roeschke <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: Matthew Roeschke <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent a22073c commit dd4b872

File tree

3 files changed

+19
-1
lines changed

3 files changed

+19
-1
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,7 @@ Sparse
466466

467467
ExtensionArray
468468
^^^^^^^^^^^^^^
469+
- Bug in :meth:`.arrays.ArrowExtensionArray.__setitem__` which caused wrong behavior when using an integer array with repeated values as a key (:issue:`58530`)
469470
- Bug in :meth:`api.types.is_datetime64_any_dtype` where a custom :class:`ExtensionDtype` would return ``False`` for array-likes (:issue:`57055`)
470471

471472
Styler

pandas/core/arrays/arrow/array.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1880,7 +1880,8 @@ def __setitem__(self, key, value) -> None:
18801880
raise ValueError("Length of indexer and values mismatch")
18811881
if len(indices) == 0:
18821882
return
1883-
argsort = np.argsort(indices)
1883+
# GH#58530 wrong item assignment by repeated key
1884+
_, argsort = np.unique(indices, return_index=True)
18841885
indices = indices[argsort]
18851886
value = value.take(argsort)
18861887
mask = np.zeros(len(self), dtype=np.bool_)

pandas/tests/extension/base/setitem.py

+16
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,22 @@ def test_setitem_integer_array(self, data, idx, box_in_series):
202202
arr[idx] = arr[0]
203203
tm.assert_equal(arr, expected)
204204

205+
@pytest.mark.parametrize(
206+
"idx",
207+
[[0, 0, 1], pd.array([0, 0, 1], dtype="Int64"), np.array([0, 0, 1])],
208+
ids=["list", "integer-array", "numpy-array"],
209+
)
210+
def test_setitem_integer_array_with_repeats(self, data, idx, box_in_series):
211+
arr = data[:5].copy()
212+
expected = data.take([2, 3, 2, 3, 4])
213+
214+
if box_in_series:
215+
arr = pd.Series(arr)
216+
expected = pd.Series(expected)
217+
218+
arr[idx] = [arr[2], arr[2], arr[3]]
219+
tm.assert_equal(arr, expected)
220+
205221
@pytest.mark.parametrize(
206222
"idx, box_in_series",
207223
[

0 commit comments

Comments
 (0)