Skip to content

Commit 10000db

Browse files
authored
BUG: Arrow setitem segfaults when len > 145 000 (pandas-dev#52075)
* BUG: Arrow setitem segfaults when len > 145 000 * Add gh ref * Address review * Restrict to bool type
1 parent a7def3e commit 10000db

File tree

2 files changed

+13
-0
lines changed

2 files changed

+13
-0
lines changed

pandas/core/arrays/arrow/array.py

+4
Original file line numberDiff line numberDiff line change
@@ -1634,6 +1634,10 @@ def _replace_with_mask(
16341634
indices = pa.array(indices, type=pa.int64())
16351635
replacements = replacements.take(indices)
16361636
return cls._if_else(mask, replacements, values)
1637+
if isinstance(values, pa.ChunkedArray) and pa.types.is_boolean(values.type):
1638+
# GH#52059 replace_with_mask segfaults for chunked array
1639+
# https://github.com/apache/arrow/issues/34634
1640+
values = values.combine_chunks()
16371641
try:
16381642
return pc.replace_with_mask(values, mask, replacements)
16391643
except pa.ArrowNotImplementedError:

pandas/tests/extension/test_arrow.py

+9
Original file line numberDiff line numberDiff line change
@@ -2377,3 +2377,12 @@ def test_pickle_old_arrowextensionarray():
23772377
tm.assert_extension_array_equal(result, expected)
23782378
assert result._pa_array == pa.chunked_array(data)
23792379
assert not hasattr(result, "_data")
2380+
2381+
2382+
def test_setitem_boolean_replace_with_mask_segfault():
2383+
# GH#52059
2384+
N = 145_000
2385+
arr = ArrowExtensionArray(pa.chunked_array([np.ones((N,), dtype=np.bool_)]))
2386+
expected = arr.copy()
2387+
arr[np.zeros((N,), dtype=np.bool_)] = False
2388+
assert arr._pa_array == expected._pa_array

0 commit comments

Comments
 (0)