Skip to content

Commit ff85a80

Browse files
BUG: Slice Arrow buffer before passing it to numpy (pandas-dev#40896)
Enforce creation of Arrow bitmask buffer.
1 parent bd70705 commit ff85a80

File tree

1 file changed

+10
-7
lines changed

1 file changed

+10
-7
lines changed

pandas/tests/arrays/masked/test_arrow_compat.py

+10-7
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,12 @@ def np_dtype_to_arrays(any_real_dtype):
6969
np_dtype = np.dtype(any_real_dtype)
7070
pa_type = pa.from_numpy_dtype(np_dtype)
7171

72-
pa_array = pa.array([0, 1, 2], type=pa_type)
72+
# None ensures the creation of a bitmask buffer.
73+
pa_array = pa.array([0, 1, 2, None], type=pa_type)
74+
# Since masked Arrow buffer slots are not required to contain a specific
75+
# value, assert only the first three values of the created np.array
7376
np_expected = np.array([0, 1, 2], dtype=np_dtype)
74-
mask_expected = np.array([True, True, True])
77+
mask_expected = np.array([True, True, True, False])
7578
return np_dtype, pa_array, np_expected, mask_expected
7679

7780

@@ -89,7 +92,7 @@ def test_pyarrow_array_to_numpy_and_mask(np_dtype_to_arrays):
8992

9093
np_dtype, pa_array, np_expected, mask_expected = np_dtype_to_arrays
9194
data, mask = pyarrow_array_to_numpy_and_mask(pa_array, np_dtype)
92-
tm.assert_numpy_array_equal(data, np_expected)
95+
tm.assert_numpy_array_equal(data[:3], np_expected)
9396
tm.assert_numpy_array_equal(mask, mask_expected)
9497

9598
mask_buffer = pa_array.buffers()[0]
@@ -106,13 +109,13 @@ def test_pyarrow_array_to_numpy_and_mask(np_dtype_to_arrays):
106109
)
107110
pa_array_trail.validate()
108111
data, mask = pyarrow_array_to_numpy_and_mask(pa_array_trail, np_dtype)
109-
tm.assert_numpy_array_equal(data, np_expected)
112+
tm.assert_numpy_array_equal(data[:3], np_expected)
110113
tm.assert_numpy_array_equal(mask, mask_expected)
111114

112115
# Add offset to the buffer.
113116
offset = b"\x00" * (pa_array.type.bit_width // 8)
114117
data_buffer_offset = pa.py_buffer(offset + data_buffer_bytes)
115-
mask_buffer_offset = pa.py_buffer(b"\x0F")
118+
mask_buffer_offset = pa.py_buffer(b"\x0E")
116119
pa_array_offset = pa.Array.from_buffers(
117120
type=pa_array.type,
118121
length=len(pa_array),
@@ -121,7 +124,7 @@ def test_pyarrow_array_to_numpy_and_mask(np_dtype_to_arrays):
121124
)
122125
pa_array_offset.validate()
123126
data, mask = pyarrow_array_to_numpy_and_mask(pa_array_offset, np_dtype)
124-
tm.assert_numpy_array_equal(data, np_expected)
127+
tm.assert_numpy_array_equal(data[:3], np_expected)
125128
tm.assert_numpy_array_equal(mask, mask_expected)
126129

127130
# Empty array
@@ -136,5 +139,5 @@ def test_pyarrow_array_to_numpy_and_mask(np_dtype_to_arrays):
136139
)
137140
pa_array_offset.validate()
138141
data, mask = pyarrow_array_to_numpy_and_mask(pa_array_offset, np_dtype)
139-
tm.assert_numpy_array_equal(data, np_expected_empty)
142+
tm.assert_numpy_array_equal(data[:3], np_expected_empty)
140143
tm.assert_numpy_array_equal(mask, mask_expected_empty)

0 commit comments

Comments
 (0)