Skip to content

Commit 8c8ddc7

Browse files
Backport PR #54685 on branch 2.1.x (ENH: support integer bitwise ops in ArrowExtensionArray) (#54691)
Backport PR #54685: ENH: support integer bitwise ops in ArrowExtensionArray Co-authored-by: Luke Manley <[email protected]>
1 parent 8b03024 commit 8c8ddc7

File tree

3 files changed

+47
-3
lines changed

3 files changed

+47
-3
lines changed

doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,7 @@ Other enhancements
268268
- :meth:`Series.cummax`, :meth:`Series.cummin` and :meth:`Series.cumprod` are now supported for pyarrow dtypes with pyarrow version 13.0 and above (:issue:`52085`)
269269
- Added support for the DataFrame Consortium Standard (:issue:`54383`)
270270
- Performance improvement in :meth:`.DataFrameGroupBy.quantile` and :meth:`.SeriesGroupBy.quantile` (:issue:`51722`)
271+
- PyArrow-backed integer dtypes now support bitwise operations (:issue:`54495`)
271272

272273
.. ---------------------------------------------------------------------------
273274
.. _whatsnew_210.api_breaking:

pandas/core/arrays/arrow/array.py

+20-2
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,15 @@
8484
"rxor": lambda x, y: pc.xor(y, x),
8585
}
8686

87+
ARROW_BIT_WISE_FUNCS = {
88+
"and_": pc.bit_wise_and,
89+
"rand_": lambda x, y: pc.bit_wise_and(y, x),
90+
"or_": pc.bit_wise_or,
91+
"ror_": lambda x, y: pc.bit_wise_or(y, x),
92+
"xor": pc.bit_wise_xor,
93+
"rxor": lambda x, y: pc.bit_wise_xor(y, x),
94+
}
95+
8796
def cast_for_truediv(
8897
arrow_array: pa.ChunkedArray, pa_object: pa.Array | pa.Scalar
8998
) -> pa.ChunkedArray:
@@ -582,7 +591,11 @@ def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
582591
return self.to_numpy(dtype=dtype)
583592

584593
def __invert__(self) -> Self:
585-
return type(self)(pc.invert(self._pa_array))
594+
# This is a bit wise op for integer types
595+
if pa.types.is_integer(self._pa_array.type):
596+
return type(self)(pc.bit_wise_not(self._pa_array))
597+
else:
598+
return type(self)(pc.invert(self._pa_array))
586599

587600
def __neg__(self) -> Self:
588601
return type(self)(pc.negate_checked(self._pa_array))
@@ -657,7 +670,12 @@ def _evaluate_op_method(self, other, op, arrow_funcs):
657670
return type(self)(result)
658671

659672
def _logical_method(self, other, op):
660-
return self._evaluate_op_method(other, op, ARROW_LOGICAL_FUNCS)
673+
# For integer types `^`, `|`, `&` are bitwise operators and return
674+
# integer types. Otherwise these are boolean ops.
675+
if pa.types.is_integer(self._pa_array.type):
676+
return self._evaluate_op_method(other, op, ARROW_BIT_WISE_FUNCS)
677+
else:
678+
return self._evaluate_op_method(other, op, ARROW_LOGICAL_FUNCS)
661679

662680
def _arith_method(self, other, op):
663681
return self._evaluate_op_method(other, op, ARROW_ARITHMETIC_FUNCS)

pandas/tests/extension/test_arrow.py

+26-1
Original file line numberDiff line numberDiff line change
@@ -753,7 +753,7 @@ def test_EA_types(self, engine, data, dtype_backend, request):
753753
class TestBaseUnaryOps(base.BaseUnaryOpsTests):
754754
def test_invert(self, data, request):
755755
pa_dtype = data.dtype.pyarrow_dtype
756-
if not pa.types.is_boolean(pa_dtype):
756+
if not (pa.types.is_boolean(pa_dtype) or pa.types.is_integer(pa_dtype)):
757757
request.node.add_marker(
758758
pytest.mark.xfail(
759759
raises=pa.ArrowNotImplementedError,
@@ -1339,6 +1339,31 @@ def test_logical_masked_numpy(self, op, exp):
13391339
tm.assert_series_equal(result, expected)
13401340

13411341

1342+
@pytest.mark.parametrize("pa_type", tm.ALL_INT_PYARROW_DTYPES)
1343+
def test_bitwise(pa_type):
1344+
# GH 54495
1345+
dtype = ArrowDtype(pa_type)
1346+
left = pd.Series([1, None, 3, 4], dtype=dtype)
1347+
right = pd.Series([None, 3, 5, 4], dtype=dtype)
1348+
1349+
result = left | right
1350+
expected = pd.Series([None, None, 3 | 5, 4 | 4], dtype=dtype)
1351+
tm.assert_series_equal(result, expected)
1352+
1353+
result = left & right
1354+
expected = pd.Series([None, None, 3 & 5, 4 & 4], dtype=dtype)
1355+
tm.assert_series_equal(result, expected)
1356+
1357+
result = left ^ right
1358+
expected = pd.Series([None, None, 3 ^ 5, 4 ^ 4], dtype=dtype)
1359+
tm.assert_series_equal(result, expected)
1360+
1361+
result = ~left
1362+
expected = ~(left.fillna(0).to_numpy())
1363+
expected = pd.Series(expected, dtype=dtype).mask(left.isnull())
1364+
tm.assert_series_equal(result, expected)
1365+
1366+
13421367
def test_arrowdtype_construct_from_string_type_with_unsupported_parameters():
13431368
with pytest.raises(NotImplementedError, match="Passing pyarrow type"):
13441369
ArrowDtype.construct_from_string("not_a_real_dype[s, tz=UTC][pyarrow]")

0 commit comments

Comments
 (0)