pandas-dev · jorisvandenbossche · Apr 21, 2021 · Apr 20, 2021 · Apr 20, 2021 · Apr 21, 2021
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -624,6 +624,7 @@ Performance improvements
 - Performance improvement in :class:`Styler` where render times are more than 50% reduced (:issue:`39972` :issue:`39952`)
 - Performance improvement in :meth:`core.window.ewm.ExponentialMovingWindow.mean` with ``times`` (:issue:`39784`)
 - Performance improvement in :meth:`.GroupBy.apply` when requiring the python fallback implementation (:issue:`40176`)
+- Performance improvement in the conversion of pyarrow boolean array to a pandas nullable boolean array (:issue:`41051`)
 - Performance improvement for concatenation of data with type :class:`CategoricalDtype` (:issue:`40193`)
 
 .. ---------------------------------------------------------------------------

diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
@@ -114,6 +114,9 @@ def __from_arrow__(
         """
         import pyarrow
 
+        if array.type != pyarrow.bool_():
+            raise TypeError(f"Expected array of boolean type, got {array.type} instead")
+
         if isinstance(array, pyarrow.Array):
             chunks = [array]
         else:
@@ -122,8 +125,19 @@ def __from_arrow__(
 
         results = []
         for arr in chunks:
-            # TODO should optimize this without going through object array
-            bool_arr = BooleanArray._from_sequence(np.array(arr))
+            buflist = arr.buffers()
+            data = pyarrow.BooleanArray.from_buffers(
+                arr.type, len(arr), [None, buflist[1]], offset=arr.offset
+            ).to_numpy(zero_copy_only=False)
+            if arr.null_count != 0:
+                mask = pyarrow.BooleanArray.from_buffers(
+                    arr.type, len(arr), [None, buflist[0]], offset=arr.offset
+                ).to_numpy(zero_copy_only=False)
+                mask = ~mask
+            else:
+                mask = np.zeros(len(arr), dtype=bool)
+
+            bool_arr = BooleanArray(data, mask)
             results.append(bool_arr)
 
         return BooleanArray._concat_same_type(results)

diff --git a/pandas/tests/arrays/masked/test_arrow_compat.py b/pandas/tests/arrays/masked/test_arrow_compat.py
@@ -55,12 +55,39 @@ def test_arrow_from_arrow_uint():
 
 
 @td.skip_if_no("pyarrow", min_version="0.16.0")
-def test_arrow_sliced():
+def test_arrow_sliced(data):
     # https://github.com/pandas-dev/pandas/issues/38525
     import pyarrow as pa
 
-    df = pd.DataFrame({"a": pd.array([0, None, 2, 3, None], dtype="Int64")})
+    df = pd.DataFrame({"a": data})
     table = pa.table(df)
     result = table.slice(2, None).to_pandas()
     expected = df.iloc[2:].reset_index(drop=True)
     tm.assert_frame_equal(result, expected)
+
+    # no missing values
+    df2 = df.fillna(data[0])
+    table = pa.table(df2)
+    result = table.slice(2, None).to_pandas()
+    expected = df2.iloc[2:].reset_index(drop=True)
+    tm.assert_frame_equal(result, expected)
+
+
+@td.skip_if_no("pyarrow", min_version="0.16.0")
+def test_from_arrow_type_error(request, data):
+    # ensure that __from_arrow__ returns a TypeError when getting a wrong
+    # array type
+    import pyarrow as pa
+
+    if data.dtype != "boolean":
+        # TODO numeric dtypes cast any incoming array to the correct dtype
+        # instead of erroring
+        request.node.add_marker(
+            pytest.mark.xfail(reason="numeric dtypes don't error but cast")
+        )
+
+    arr = pa.array(data).cast("string")
+    with pytest.raises(TypeError, match=None):
+        # we don't test the exact error message, only the fact that it raises
+        # a TypeError is relevant
+        data.dtype.__from_arrow__(arr)