Skip to content

Commit efccff8

Browse files
authored
BUG: __from_arrow__ doesn't accept pyarrow null arrays for numeric ma… (#52223)
* BUG: __from_arrow__ doesn't accept pyarrow null arrays for numeric masked types * simplify * move whatsnew * update whatsnew and add more tests
1 parent 8e19396 commit efccff8

File tree

5 files changed

+28
-2
lines changed

5 files changed

+28
-2
lines changed

doc/source/whatsnew/v2.1.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,9 @@ Sparse
316316

317317
ExtensionArray
318318
^^^^^^^^^^^^^^
319+
- Bug where the ``__from_arrow__`` method of masked ExtensionDtypes(e.g. :class:`Float64Dtype`, :class:`BooleanDtype`) would not accept pyarrow arrays of type ``pyarrow.null()`` (:issue:`52223`)
319320
- Bug in :meth:`Series.rank` returning wrong order for small values with ``Float64`` dtype (:issue:`52471`)
321+
-
320322

321323
Styler
322324
^^^^^^

pandas/core/arrays/arrow/_arrow_utils.py

+5
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,11 @@ def pyarrow_array_to_numpy_and_mask(
4242
"""
4343
dtype = np.dtype(dtype)
4444

45+
if pyarrow.types.is_null(arr.type):
46+
# No initialization of data is needed since everything is null
47+
data = np.empty(len(arr), dtype=dtype)
48+
mask = np.zeros(len(arr), dtype=bool)
49+
return data, mask
4550
buflist = arr.buffers()
4651
# Since Arrow buffers might contain padding and the data might be offset,
4752
# the buffer gets sliced here before handing it to numpy.

pandas/core/arrays/boolean.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -108,14 +108,22 @@ def __from_arrow__(
108108
"""
109109
import pyarrow
110110

111-
if array.type != pyarrow.bool_():
111+
if array.type != pyarrow.bool_() and not pyarrow.types.is_null(array.type):
112112
raise TypeError(f"Expected array of boolean type, got {array.type} instead")
113113

114114
if isinstance(array, pyarrow.Array):
115115
chunks = [array]
116+
length = len(array)
116117
else:
117118
# pyarrow.ChunkedArray
118119
chunks = array.chunks
120+
length = array.length()
121+
122+
if pyarrow.types.is_null(array.type):
123+
mask = np.ones(length, dtype=bool)
124+
# No need to init data, since all null
125+
data = np.empty(length, dtype=bool)
126+
return BooleanArray(data, mask)
119127

120128
results = []
121129
for arr in chunks:

pandas/core/arrays/numeric.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,9 @@ def __from_arrow__(
7676
array_class = self.construct_array_type()
7777

7878
pyarrow_type = pyarrow.from_numpy_dtype(self.type)
79-
if not array.type.equals(pyarrow_type):
79+
if not array.type.equals(pyarrow_type) and not pyarrow.types.is_null(
80+
array.type
81+
):
8082
# test_from_arrow_type_error raise for string, but allow
8183
# through itemsize conversion GH#31896
8284
rt_dtype = pandas_dtype(array.type.to_pandas_dtype())

pandas/tests/arrays/masked/test_arrow_compat.py

+9
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,15 @@ def test_pyarrow_array_to_numpy_and_mask(np_dtype_to_arrays):
184184
tm.assert_numpy_array_equal(mask, mask_expected_empty)
185185

186186

187+
@pytest.mark.parametrize(
188+
"arr", [pa.nulls(10), pa.chunked_array([pa.nulls(4), pa.nulls(6)])]
189+
)
190+
def test_from_arrow_null(data, arr):
191+
res = data.dtype.__from_arrow__(arr)
192+
assert res.isna().all()
193+
assert len(res) == 10
194+
195+
187196
def test_from_arrow_type_error(data):
188197
# ensure that __from_arrow__ returns a TypeError when getting a wrong
189198
# array type

0 commit comments

Comments
 (0)