From ff9876e20c078c00548413755db0a02d46bd1a69 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Tue, 28 Feb 2023 05:16:39 -0500 Subject: [PATCH 1/3] BUG: ArrowExtensionArray logical ops raising --- pandas/core/arrays/arrow/array.py | 15 ++- pandas/tests/extension/test_arrow.py | 144 +++++++++++++++++++++++++++ 2 files changed, 154 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index bb4bdae188fd2..8c5c1d9e11be4 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -81,10 +81,10 @@ } ARROW_LOGICAL_FUNCS = { - "and": pc.and_kleene, - "rand": lambda x, y: pc.and_kleene(y, x), - "or": pc.or_kleene, - "ror": lambda x, y: pc.or_kleene(y, x), + "and_": pc.and_kleene, + "rand_": lambda x, y: pc.and_kleene(y, x), + "or_": pc.or_kleene, + "ror_": lambda x, y: pc.or_kleene(y, x), "xor": pc.xor, "rxor": lambda x, y: pc.xor(y, x), } @@ -491,7 +491,12 @@ def _evaluate_op_method(self, other, op, arrow_funcs): elif isinstance(other, (np.ndarray, list)): result = pc_func(self._data, pa.array(other, from_pandas=True)) elif is_scalar(other): - result = pc_func(self._data, pa.scalar(other)) + if isna(other) and op.__name__ in ARROW_LOGICAL_FUNCS: + # pyarrow kleene ops require null to be typed + pa_scalar = pa.scalar(None, type=self._data.type) + else: + pa_scalar = pa.scalar(other) + result = pc_func(self._data, pa_scalar) else: raise NotImplementedError( f"{op.__name__} not implemented for {type(other)}" diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index f9af3a3063386..ed747bbd1be99 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1265,6 +1265,150 @@ def test_invalid_other_comp(self, data, comparison_op): comparison_op(data, object()) +class TestLogicalOps: + """Various Series and DataFrame logical ops methods.""" + + def test_kleene_or(self): + a = pd.Series([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean[pyarrow]") + b = pd.Series([True, False, None] * 3, dtype="boolean[pyarrow]") + result = a | b + expected = pd.Series( + [True, True, True, True, False, None, True, None, None], + dtype="boolean[pyarrow]", + ) + tm.assert_series_equal(result, expected) + + result = b | a + tm.assert_series_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_series_equal( + a, + pd.Series([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean[pyarrow]"), + ) + tm.assert_series_equal( + b, pd.Series([True, False, None] * 3, dtype="boolean[pyarrow]") + ) + + @pytest.mark.parametrize( + "other, expected", + [ + (None, [True, None, None]), + (pd.NA, [True, None, None]), + (True, [True, True, True]), + (np.bool_(True), [True, True, True]), + (False, [True, False, None]), + (np.bool_(False), [True, False, None]), + ], + ) + def test_kleene_or_scalar(self, other, expected): + a = pd.Series([True, False, None], dtype="boolean[pyarrow]") + result = a | other + expected = pd.Series(expected, dtype="boolean[pyarrow]") + tm.assert_series_equal(result, expected) + + result = other | a + tm.assert_series_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_series_equal( + a, pd.Series([True, False, None], dtype="boolean[pyarrow]") + ) + + def test_kleene_and(self): + a = pd.Series([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean[pyarrow]") + b = pd.Series([True, False, None] * 3, dtype="boolean[pyarrow]") + result = a & b + expected = pd.Series( + [True, False, None, False, False, False, None, False, None], + dtype="boolean[pyarrow]", + ) + tm.assert_series_equal(result, expected) + + result = b & a + tm.assert_series_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_series_equal( + a, + pd.Series([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean[pyarrow]"), + ) + tm.assert_series_equal( + b, pd.Series([True, False, None] * 3, dtype="boolean[pyarrow]") + ) + + @pytest.mark.parametrize( + "other, expected", + [ + (None, [None, False, None]), + (pd.NA, [None, False, None]), + (True, [True, False, None]), + (False, [False, False, False]), + (np.bool_(True), [True, False, None]), + (np.bool_(False), [False, False, False]), + ], + ) + def test_kleene_and_scalar(self, other, expected): + a = pd.Series([True, False, None], dtype="boolean[pyarrow]") + result = a & other + expected = pd.Series(expected, dtype="boolean[pyarrow]") + tm.assert_series_equal(result, expected) + + result = other & a + tm.assert_series_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_series_equal( + a, pd.Series([True, False, None], dtype="boolean[pyarrow]") + ) + + def test_kleene_xor(self): + a = pd.Series([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean[pyarrow]") + b = pd.Series([True, False, None] * 3, dtype="boolean[pyarrow]") + result = a ^ b + expected = pd.Series( + [False, True, None, True, False, None, None, None, None], + dtype="boolean[pyarrow]", + ) + tm.assert_series_equal(result, expected) + + result = b ^ a + tm.assert_series_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_series_equal( + a, + pd.Series([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean[pyarrow]"), + ) + tm.assert_series_equal( + b, pd.Series([True, False, None] * 3, dtype="boolean[pyarrow]") + ) + + @pytest.mark.parametrize( + "other, expected", + [ + (None, [None, None, None]), + (pd.NA, [None, None, None]), + (True, [False, True, None]), + (np.bool_(True), [False, True, None]), + (np.bool_(False), [True, False, None]), + ], + ) + def test_kleene_xor_scalar(self, other, expected): + a = pd.Series([True, False, None], dtype="boolean[pyarrow]") + result = a ^ other + expected = pd.Series(expected, dtype="boolean[pyarrow]") + tm.assert_series_equal(result, expected) + + result = other ^ a + tm.assert_series_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_series_equal( + a, pd.Series([True, False, None], dtype="boolean[pyarrow]") + ) + + def test_arrowdtype_construct_from_string_type_with_unsupported_parameters(): with pytest.raises(NotImplementedError, match="Passing pyarrow type"): ArrowDtype.construct_from_string("not_a_real_dype[s, tz=UTC][pyarrow]") From f4b739669ae05400b2b04ff8cb5c4fff40998ecc Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Tue, 28 Feb 2023 05:26:48 -0500 Subject: [PATCH 2/3] whatsnew --- doc/source/whatsnew/v2.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 45b5c16415f9d..b265c5cf0431d 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -202,6 +202,7 @@ Sparse ExtensionArray ^^^^^^^^^^^^^^ - Bug in :meth:`Series.any` and :meth:`Series.all` returning ``NA`` for empty or all null pyarrow-backed data when ``skipna=True`` (:issue:`51624`) +- Bug in :class:`~arrays.ArrowExtensionArray` logical operations ``&`` and ``|`` raising ``KeyError`` (:issue:`51688`) - Styler From 0f93e76458e8366d5350923d804fe796b72834ba Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Mon, 6 Mar 2023 05:35:50 -0500 Subject: [PATCH 3/3] move to whatsnew to 2.0.0 --- doc/source/whatsnew/v2.0.0.rst | 1 + doc/source/whatsnew/v2.1.0.rst | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index e09f3e2753ee6..15e3d66ecc551 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1371,6 +1371,7 @@ ExtensionArray - Bug in :class:`Series` constructor unnecessarily overflowing for nullable unsigned integer dtypes (:issue:`38798`, :issue:`25880`) - Bug in setting non-string value into ``StringArray`` raising ``ValueError`` instead of ``TypeError`` (:issue:`49632`) - Bug in :meth:`DataFrame.reindex` not honoring the default ``copy=True`` keyword in case of columns with ExtensionDtype (and as a result also selecting multiple columns with getitem (``[]``) didn't correctly result in a copy) (:issue:`51197`) +- Bug in :class:`~arrays.ArrowExtensionArray` logical operations ``&`` and ``|`` raising ``KeyError`` (:issue:`51688`) Styler ^^^^^^ diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 1875d7d05889a..12d35288d1ee6 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -206,7 +206,6 @@ Sparse ExtensionArray ^^^^^^^^^^^^^^ - Bug in :meth:`Series.any` and :meth:`Series.all` returning ``NA`` for empty or all null pyarrow-backed data when ``skipna=True`` (:issue:`51624`) -- Bug in :class:`~arrays.ArrowExtensionArray` logical operations ``&`` and ``|`` raising ``KeyError`` (:issue:`51688`) - Styler