Skip to content

Backport PR #54685 on branch 2.1.x (ENH: support integer bitwise ops in ArrowExtensionArray) #54691

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,7 @@ Other enhancements
- :meth:`Series.cummax`, :meth:`Series.cummin` and :meth:`Series.cumprod` are now supported for pyarrow dtypes with pyarrow version 13.0 and above (:issue:`52085`)
- Added support for the DataFrame Consortium Standard (:issue:`54383`)
- Performance improvement in :meth:`.DataFrameGroupBy.quantile` and :meth:`.SeriesGroupBy.quantile` (:issue:`51722`)
- PyArrow-backed integer dtypes now support bitwise operations (:issue:`54495`)

.. ---------------------------------------------------------------------------
.. _whatsnew_210.api_breaking:
Expand Down
22 changes: 20 additions & 2 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,15 @@
"rxor": lambda x, y: pc.xor(y, x),
}

ARROW_BIT_WISE_FUNCS = {
"and_": pc.bit_wise_and,
"rand_": lambda x, y: pc.bit_wise_and(y, x),
"or_": pc.bit_wise_or,
"ror_": lambda x, y: pc.bit_wise_or(y, x),
"xor": pc.bit_wise_xor,
"rxor": lambda x, y: pc.bit_wise_xor(y, x),
}

def cast_for_truediv(
arrow_array: pa.ChunkedArray, pa_object: pa.Array | pa.Scalar
) -> pa.ChunkedArray:
Expand Down Expand Up @@ -582,7 +591,11 @@ def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
return self.to_numpy(dtype=dtype)

def __invert__(self) -> Self:
return type(self)(pc.invert(self._pa_array))
# This is a bit wise op for integer types
if pa.types.is_integer(self._pa_array.type):
return type(self)(pc.bit_wise_not(self._pa_array))
else:
return type(self)(pc.invert(self._pa_array))

def __neg__(self) -> Self:
return type(self)(pc.negate_checked(self._pa_array))
Expand Down Expand Up @@ -657,7 +670,12 @@ def _evaluate_op_method(self, other, op, arrow_funcs):
return type(self)(result)

def _logical_method(self, other, op):
return self._evaluate_op_method(other, op, ARROW_LOGICAL_FUNCS)
# For integer types `^`, `|`, `&` are bitwise operators and return
# integer types. Otherwise these are boolean ops.
if pa.types.is_integer(self._pa_array.type):
return self._evaluate_op_method(other, op, ARROW_BIT_WISE_FUNCS)
else:
return self._evaluate_op_method(other, op, ARROW_LOGICAL_FUNCS)

def _arith_method(self, other, op):
return self._evaluate_op_method(other, op, ARROW_ARITHMETIC_FUNCS)
Expand Down
27 changes: 26 additions & 1 deletion pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,7 +753,7 @@ def test_EA_types(self, engine, data, dtype_backend, request):
class TestBaseUnaryOps(base.BaseUnaryOpsTests):
def test_invert(self, data, request):
pa_dtype = data.dtype.pyarrow_dtype
if not pa.types.is_boolean(pa_dtype):
if not (pa.types.is_boolean(pa_dtype) or pa.types.is_integer(pa_dtype)):
request.node.add_marker(
pytest.mark.xfail(
raises=pa.ArrowNotImplementedError,
Expand Down Expand Up @@ -1339,6 +1339,31 @@ def test_logical_masked_numpy(self, op, exp):
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize("pa_type", tm.ALL_INT_PYARROW_DTYPES)
def test_bitwise(pa_type):
# GH 54495
dtype = ArrowDtype(pa_type)
left = pd.Series([1, None, 3, 4], dtype=dtype)
right = pd.Series([None, 3, 5, 4], dtype=dtype)

result = left | right
expected = pd.Series([None, None, 3 | 5, 4 | 4], dtype=dtype)
tm.assert_series_equal(result, expected)

result = left & right
expected = pd.Series([None, None, 3 & 5, 4 & 4], dtype=dtype)
tm.assert_series_equal(result, expected)

result = left ^ right
expected = pd.Series([None, None, 3 ^ 5, 4 ^ 4], dtype=dtype)
tm.assert_series_equal(result, expected)

result = ~left
expected = ~(left.fillna(0).to_numpy())
expected = pd.Series(expected, dtype=dtype).mask(left.isnull())
tm.assert_series_equal(result, expected)


def test_arrowdtype_construct_from_string_type_with_unsupported_parameters():
with pytest.raises(NotImplementedError, match="Passing pyarrow type"):
ArrowDtype.construct_from_string("not_a_real_dype[s, tz=UTC][pyarrow]")
Expand Down