Skip to content

Commit fd94972

Browse files
BUG: Slice Arrow buffer before passing it to numpy (pandas-dev#40896)
Fix mask buffer in test.
1 parent 2a8042c commit fd94972

File tree

2 files changed

+5
-6
lines changed

2 files changed

+5
-6
lines changed

pandas/core/arrays/_arrow_utils.py

-2
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,6 @@ def pyarrow_array_to_numpy_and_mask(arr, dtype):
3333
# Since Arrow buffers might contain padding and the data might be offset,
3434
# the buffer gets sliced here before handing it to numpy.
3535
# See also https://github.com/pandas-dev/pandas/issues/40896
36-
# offset = arr.offset * arr.type.bit_width // 8
37-
# length = len(arr) * arr.type.bit_width // 8
3836
offset = arr.offset * dtype.itemsize
3937
length = len(arr) * dtype.itemsize
4038
data_buf = buflist[1][offset : offset + length]

pandas/tests/arrays/test_arrow_utils.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def test_pyarrow_array_to_numpy_and_mask():
2525

2626
data, mask = pyarrow_array_to_numpy_and_mask(pa_array, dtype)
2727
tm.assert_numpy_array_equal(data, np_expected)
28-
assert (mask == mask_expected).all()
28+
tm.assert_numpy_array_equal(mask, mask_expected)
2929

3030
mask_buffer = pa_array.buffers()[0]
3131
data_buffer = pa_array.buffers()[1].to_pybytes()
@@ -41,18 +41,19 @@ def test_pyarrow_array_to_numpy_and_mask():
4141
pa_array_trail.validate()
4242
data, mask = pyarrow_array_to_numpy_and_mask(pa_array_trail, dtype)
4343
tm.assert_numpy_array_equal(data, np_expected)
44-
assert (mask == mask_expected).all()
44+
tm.assert_numpy_array_equal(mask, mask_expected)
4545

4646
# Add offset to the buffer.
4747
offset = b"\x00" * (pa_array.type.bit_width // 8)
4848
data_buffer_offset = pa.py_buffer(offset + data_buffer)
49+
mask_buffer_offset = pa.py_buffer(b"\x0F")
4950
pa_array_offset = pa.Array.from_buffers(
5051
type=pa_array.type,
5152
length=len(pa_array),
52-
buffers=[mask_buffer, data_buffer_offset],
53+
buffers=[mask_buffer_offset, data_buffer_offset],
5354
offset=pa_array.offset + 1,
5455
)
5556
pa_array_offset.validate()
5657
data, mask = pyarrow_array_to_numpy_and_mask(pa_array_offset, dtype)
5758
tm.assert_numpy_array_equal(data, np_expected)
58-
assert (mask == mask_expected).all()
59+
tm.assert_numpy_array_equal(mask, mask_expected)

0 commit comments

Comments
 (0)