pandas-dev · mroeschke · Apr 25, 2023 · Apr 21, 2023 · Apr 21, 2023 · Apr 21, 2023
diff --git a/doc/source/whatsnew/v2.0.1.rst b/doc/source/whatsnew/v2.0.1.rst
@@ -28,6 +28,7 @@ Bug fixes
 - Bug in :attr:`Series.dt.days` that would overflow ``int32`` number of days (:issue:`52391`)
 - Bug in :class:`arrays.DatetimeArray` constructor returning an incorrect unit when passed a non-nanosecond numpy datetime array (:issue:`52555`)
 - Bug in :func:`Series.median` with :class:`ArrowDtype` returning an approximate median (:issue:`52679`)
+- Bug in :func:`api.interchange.from_dataframe` was unnecessarily raising on bitmasks (:issue:`49888`)
 - Bug in :func:`api.interchange.from_dataframe` was unnecessarily raising on categorical dtypes (:issue:`49889`)
 - Bug in :func:`api.interchange.from_dataframe` was unnecessarily raising on large string dtypes (:issue:`52795`)
 - Bug in :func:`pandas.testing.assert_series_equal` where ``check_dtype=False`` would still raise for datetime or timedelta types with different resolutions (:issue:`52449`)

diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py
@@ -6,6 +6,8 @@
 
 import numpy as np
 
+from pandas.compat._optional import import_optional_dependency
+
 import pandas as pd
 from pandas.core.interchange.dataframe_protocol import (
     Buffer,
@@ -23,7 +25,7 @@
     DtypeKind.INT: {8: np.int8, 16: np.int16, 32: np.int32, 64: np.int64},
     DtypeKind.UINT: {8: np.uint8, 16: np.uint16, 32: np.uint32, 64: np.uint64},
     DtypeKind.FLOAT: {32: np.float32, 64: np.float64},
-    DtypeKind.BOOL: {8: bool},
+    DtypeKind.BOOL: {1: bool, 8: bool},
 }
 
 
@@ -406,74 +408,26 @@ def buffer_to_ndarray(
     # and size in the buffer plus the dtype on the column. Use DLPack as NumPy supports
     # it since https://github.com/numpy/numpy/pull/19083
     ctypes_type = np.ctypeslib.as_ctypes_type(column_dtype)
-    data_pointer = ctypes.cast(
-        buffer.ptr + (offset * bit_width // 8), ctypes.POINTER(ctypes_type)
-    )
 
     if bit_width == 1:
         assert length is not None, "`length` must be specified for a bit-mask buffer."
-        arr = np.ctypeslib.as_array(data_pointer, shape=(buffer.bufsize,))
-        return bitmask_to_bool_ndarray(arr, length, first_byte_offset=offset % 8)
+        pa = import_optional_dependency("pyarrow")
+        arr = pa.BooleanArray.from_buffers(
+            pa.bool_(),
+            length,
+            [None, pa.foreign_buffer(buffer.ptr, length)],
+            offset=offset,
+        )
+        return np.asarray(arr)
     else:
+        data_pointer = ctypes.cast(
+            buffer.ptr + (offset * bit_width // 8), ctypes.POINTER(ctypes_type)
+        )
         return np.ctypeslib.as_array(
             data_pointer, shape=(buffer.bufsize // (bit_width // 8),)
         )
 
 
-def bitmask_to_bool_ndarray(
-    bitmask: np.ndarray, mask_length: int, first_byte_offset: int = 0
-) -> np.ndarray:
-    """
-    Convert bit-mask to a boolean NumPy array.
-
-    Parameters
-    ----------
-    bitmask : np.ndarray[uint8]
-        NumPy array of uint8 dtype representing the bitmask.
-    mask_length : int
-        Number of elements in the mask to interpret.
-    first_byte_offset : int, default: 0
-        Number of elements to offset from the start of the first byte.
-
-    Returns
-    -------
-    np.ndarray[bool]
-    """
-    bytes_to_skip = first_byte_offset // 8
-    bitmask = bitmask[bytes_to_skip:]
-    first_byte_offset %= 8
-
-    bool_mask = np.zeros(mask_length, dtype=bool)
-
-    # Processing the first byte separately as it has its own offset
-    val = bitmask[0]
-    mask_idx = 0
-    bits_in_first_byte = min(8 - first_byte_offset, mask_length)
-    for j in range(bits_in_first_byte):
-        if val & (1 << (j + first_byte_offset)):
-            bool_mask[mask_idx] = True
-        mask_idx += 1
-
-    # `mask_length // 8` describes how many full bytes to process
-    for i in range((mask_length - bits_in_first_byte) // 8):
-        # doing `+ 1` as we already processed the first byte
-        val = bitmask[i + 1]
-        for j in range(8):
-            if val & (1 << j):
-                bool_mask[mask_idx] = True
-            mask_idx += 1
-
-    if len(bitmask) > 1:
-        # Processing reminder of last byte
-        val = bitmask[-1]
-        for j in range(len(bool_mask) - mask_idx):
-            if val & (1 << j):
-                bool_mask[mask_idx] = True
-            mask_idx += 1
-
-    return bool_mask
-
-
 def set_nulls(
     data: np.ndarray | pd.Series,
     col: Column,

diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py
@@ -104,6 +104,21 @@ def test_large_string_pyarrow():
     assert pa.Table.equals(pa.interchange.from_dataframe(result), table)
 
 
+def test_bitmasks_pyarrow():
+    # GH 52795
+    pa = pytest.importorskip("pyarrow", "11.0.0")
+
+    arr = [3.3, None, 2.1]
+    table = pa.table({"arr": arr})
+    exchange_df = table.__dataframe__()
+    result = from_dataframe(exchange_df)
+    expected = pd.DataFrame({"arr": [3.3, float("nan"), 2.1]})
+    tm.assert_frame_equal(result, expected)
+
+    # check round-trip
+    assert pa.Table.equals(pa.interchange.from_dataframe(result), table)
+
+
 @pytest.mark.parametrize(
     "data", [int_data, uint_data, float_data, bool_data, datetime_data]
 )