Skip to content

Commit 9fc32de

Browse files
mroeschkephofl
andauthored
Backport PR pandas-dev#52075 on branch 2.0.x (BUG: Arrow setitem segfaults when len > 145 000) (pandas-dev#52259)
* BUG: Arrow setitem segfaults when len > 145 000 (pandas-dev#52075) * BUG: Arrow setitem segfaults when len > 145 000 * Add gh ref * Address review * Restrict to bool type (cherry picked from commit 10000db) * _data --------- Co-authored-by: Patrick Hoefler <[email protected]>
1 parent ab9885e commit 9fc32de

File tree

2 files changed

+13
-0
lines changed

2 files changed

+13
-0
lines changed

pandas/core/arrays/arrow/array.py

+4
Original file line numberDiff line numberDiff line change
@@ -1618,6 +1618,10 @@ def _replace_with_mask(
16181618
indices = pa.array(indices, type=pa.int64())
16191619
replacements = replacements.take(indices)
16201620
return cls._if_else(mask, replacements, values)
1621+
if isinstance(values, pa.ChunkedArray) and pa.types.is_boolean(values.type):
1622+
# GH#52059 replace_with_mask segfaults for chunked array
1623+
# https://github.com/apache/arrow/issues/34634
1624+
values = values.combine_chunks()
16211625
try:
16221626
return pc.replace_with_mask(values, mask, replacements)
16231627
except pa.ArrowNotImplementedError:

pandas/tests/extension/test_arrow.py

+9
Original file line numberDiff line numberDiff line change
@@ -2333,3 +2333,12 @@ def test_series_from_string_array(dtype):
23332333
ser = pd.Series(arr, dtype=dtype)
23342334
expected = pd.Series(ArrowExtensionArray(arr), dtype=dtype)
23352335
tm.assert_series_equal(ser, expected)
2336+
2337+
2338+
def test_setitem_boolean_replace_with_mask_segfault():
2339+
# GH#52059
2340+
N = 145_000
2341+
arr = ArrowExtensionArray(pa.chunked_array([np.ones((N,), dtype=np.bool_)]))
2342+
expected = arr.copy()
2343+
arr[np.zeros((N,), dtype=np.bool_)] = False
2344+
assert arr._data == expected._data

0 commit comments

Comments
 (0)