From 4475bfa7b61d00cc0245ef0418edb9c0bf09f984 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Tue, 22 Aug 2023 14:22:53 -0400 Subject: [PATCH] Backport PR #54685: ENH: support integer bitwise ops in ArrowExtensionArray --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/arrays/arrow/array.py | 22 ++++++++++++++++++++-- pandas/tests/extension/test_arrow.py | 27 ++++++++++++++++++++++++++- 3 files changed, 47 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 43a64a79e691b..9341237acfaa1 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -268,6 +268,7 @@ Other enhancements - :meth:`Series.cummax`, :meth:`Series.cummin` and :meth:`Series.cumprod` are now supported for pyarrow dtypes with pyarrow version 13.0 and above (:issue:`52085`) - Added support for the DataFrame Consortium Standard (:issue:`54383`) - Performance improvement in :meth:`.DataFrameGroupBy.quantile` and :meth:`.SeriesGroupBy.quantile` (:issue:`51722`) +- PyArrow-backed integer dtypes now support bitwise operations (:issue:`54495`) .. --------------------------------------------------------------------------- .. _whatsnew_210.api_breaking: diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 43320cf68cbec..48ff769f6c737 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -84,6 +84,15 @@ "rxor": lambda x, y: pc.xor(y, x), } + ARROW_BIT_WISE_FUNCS = { + "and_": pc.bit_wise_and, + "rand_": lambda x, y: pc.bit_wise_and(y, x), + "or_": pc.bit_wise_or, + "ror_": lambda x, y: pc.bit_wise_or(y, x), + "xor": pc.bit_wise_xor, + "rxor": lambda x, y: pc.bit_wise_xor(y, x), + } + def cast_for_truediv( arrow_array: pa.ChunkedArray, pa_object: pa.Array | pa.Scalar ) -> pa.ChunkedArray: @@ -582,7 +591,11 @@ def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: return self.to_numpy(dtype=dtype) def __invert__(self) -> Self: - return type(self)(pc.invert(self._pa_array)) + # This is a bit wise op for integer types + if pa.types.is_integer(self._pa_array.type): + return type(self)(pc.bit_wise_not(self._pa_array)) + else: + return type(self)(pc.invert(self._pa_array)) def __neg__(self) -> Self: return type(self)(pc.negate_checked(self._pa_array)) @@ -657,7 +670,12 @@ def _evaluate_op_method(self, other, op, arrow_funcs): return type(self)(result) def _logical_method(self, other, op): - return self._evaluate_op_method(other, op, ARROW_LOGICAL_FUNCS) + # For integer types `^`, `|`, `&` are bitwise operators and return + # integer types. Otherwise these are boolean ops. + if pa.types.is_integer(self._pa_array.type): + return self._evaluate_op_method(other, op, ARROW_BIT_WISE_FUNCS) + else: + return self._evaluate_op_method(other, op, ARROW_LOGICAL_FUNCS) def _arith_method(self, other, op): return self._evaluate_op_method(other, op, ARROW_ARITHMETIC_FUNCS) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index e748f320b3f09..a9b7a8c655032 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -753,7 +753,7 @@ def test_EA_types(self, engine, data, dtype_backend, request): class TestBaseUnaryOps(base.BaseUnaryOpsTests): def test_invert(self, data, request): pa_dtype = data.dtype.pyarrow_dtype - if not pa.types.is_boolean(pa_dtype): + if not (pa.types.is_boolean(pa_dtype) or pa.types.is_integer(pa_dtype)): request.node.add_marker( pytest.mark.xfail( raises=pa.ArrowNotImplementedError, @@ -1339,6 +1339,31 @@ def test_logical_masked_numpy(self, op, exp): tm.assert_series_equal(result, expected) +@pytest.mark.parametrize("pa_type", tm.ALL_INT_PYARROW_DTYPES) +def test_bitwise(pa_type): + # GH 54495 + dtype = ArrowDtype(pa_type) + left = pd.Series([1, None, 3, 4], dtype=dtype) + right = pd.Series([None, 3, 5, 4], dtype=dtype) + + result = left | right + expected = pd.Series([None, None, 3 | 5, 4 | 4], dtype=dtype) + tm.assert_series_equal(result, expected) + + result = left & right + expected = pd.Series([None, None, 3 & 5, 4 & 4], dtype=dtype) + tm.assert_series_equal(result, expected) + + result = left ^ right + expected = pd.Series([None, None, 3 ^ 5, 4 ^ 4], dtype=dtype) + tm.assert_series_equal(result, expected) + + result = ~left + expected = ~(left.fillna(0).to_numpy()) + expected = pd.Series(expected, dtype=dtype).mask(left.isnull()) + tm.assert_series_equal(result, expected) + + def test_arrowdtype_construct_from_string_type_with_unsupported_parameters(): with pytest.raises(NotImplementedError, match="Passing pyarrow type"): ArrowDtype.construct_from_string("not_a_real_dype[s, tz=UTC][pyarrow]")