diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index a037e50593737..fd19c84f8ab23 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -316,7 +316,9 @@ Sparse ExtensionArray ^^^^^^^^^^^^^^ +- Bug where the ``__from_arrow__`` method of masked ExtensionDtypes(e.g. :class:`Float64Dtype`, :class:`BooleanDtype`) would not accept pyarrow arrays of type ``pyarrow.null()`` (:issue:`52223`) - Bug in :meth:`Series.rank` returning wrong order for small values with ``Float64`` dtype (:issue:`52471`) +- Styler ^^^^^^ diff --git a/pandas/core/arrays/arrow/_arrow_utils.py b/pandas/core/arrays/arrow/_arrow_utils.py index 6e6ef6a2c20a8..2a053fac2985c 100644 --- a/pandas/core/arrays/arrow/_arrow_utils.py +++ b/pandas/core/arrays/arrow/_arrow_utils.py @@ -42,6 +42,11 @@ def pyarrow_array_to_numpy_and_mask( """ dtype = np.dtype(dtype) + if pyarrow.types.is_null(arr.type): + # No initialization of data is needed since everything is null + data = np.empty(len(arr), dtype=dtype) + mask = np.zeros(len(arr), dtype=bool) + return data, mask buflist = arr.buffers() # Since Arrow buffers might contain padding and the data might be offset, # the buffer gets sliced here before handing it to numpy. diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 54bd4220bc060..f6bc8a87a4c60 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -108,14 +108,22 @@ def __from_arrow__( """ import pyarrow - if array.type != pyarrow.bool_(): + if array.type != pyarrow.bool_() and not pyarrow.types.is_null(array.type): raise TypeError(f"Expected array of boolean type, got {array.type} instead") if isinstance(array, pyarrow.Array): chunks = [array] + length = len(array) else: # pyarrow.ChunkedArray chunks = array.chunks + length = array.length() + + if pyarrow.types.is_null(array.type): + mask = np.ones(length, dtype=bool) + # No need to init data, since all null + data = np.empty(length, dtype=bool) + return BooleanArray(data, mask) results = [] for arr in chunks: diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py index 8d629b88edd26..344946ad68d32 100644 --- a/pandas/core/arrays/numeric.py +++ b/pandas/core/arrays/numeric.py @@ -76,7 +76,9 @@ def __from_arrow__( array_class = self.construct_array_type() pyarrow_type = pyarrow.from_numpy_dtype(self.type) - if not array.type.equals(pyarrow_type): + if not array.type.equals(pyarrow_type) and not pyarrow.types.is_null( + array.type + ): # test_from_arrow_type_error raise for string, but allow # through itemsize conversion GH#31896 rt_dtype = pandas_dtype(array.type.to_pandas_dtype()) diff --git a/pandas/tests/arrays/masked/test_arrow_compat.py b/pandas/tests/arrays/masked/test_arrow_compat.py index 6b0081321ef22..fc2094bd9f4a8 100644 --- a/pandas/tests/arrays/masked/test_arrow_compat.py +++ b/pandas/tests/arrays/masked/test_arrow_compat.py @@ -184,6 +184,15 @@ def test_pyarrow_array_to_numpy_and_mask(np_dtype_to_arrays): tm.assert_numpy_array_equal(mask, mask_expected_empty) +@pytest.mark.parametrize( + "arr", [pa.nulls(10), pa.chunked_array([pa.nulls(4), pa.nulls(6)])] +) +def test_from_arrow_null(data, arr): + res = data.dtype.__from_arrow__(arr) + assert res.isna().all() + assert len(res) == 10 + + def test_from_arrow_type_error(data): # ensure that __from_arrow__ returns a TypeError when getting a wrong # array type