From f1b6ea27bf968f7cc1e10327d7a5f2ac5d9a016a Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 13 Aug 2024 13:50:56 -0700 Subject: [PATCH 1/3] BUG: ArrowEA comparisons with mismatched types --- doc/source/whatsnew/v2.2.2.rst | 1 + pandas/core/arrays/arrow/array.py | 8 ++++++- pandas/core/arrays/string_arrow.py | 6 +---- pandas/tests/series/test_logical_ops.py | 30 ++++++++++++++++++++----- 4 files changed, 34 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v2.2.2.rst b/doc/source/whatsnew/v2.2.2.rst index 72a2f84c4aaee..163a968813e1f 100644 --- a/doc/source/whatsnew/v2.2.2.rst +++ b/doc/source/whatsnew/v2.2.2.rst @@ -42,6 +42,7 @@ Bug fixes - :meth:`DataFrame.__dataframe__` was showing bytemask instead of bitmask for ``'string[pyarrow]'`` validity buffer (:issue:`57762`) - :meth:`DataFrame.__dataframe__` was showing non-null validity buffer (instead of ``None``) ``'string[pyarrow]'`` without missing values (:issue:`57761`) - :meth:`DataFrame.to_sql` was failing to find the right table when using the schema argument (:issue:`57539`) +- Bug in comparison between object with :class:`ArrowDtype` and incompatible-dtyped (e.g. string vs bool) incorrectly raising instead of returning all-``False`` (for ``==``) or all-``True`` (for ``!=``) (:issue:`??`) .. --------------------------------------------------------------------------- .. _whatsnew_222.other: diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index d07bfeda50e1d..e95fa441e18fb 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -709,7 +709,13 @@ def _cmp_method(self, other, op) -> ArrowExtensionArray: if isinstance( other, (ArrowExtensionArray, np.ndarray, list, BaseMaskedArray) ) or isinstance(getattr(other, "dtype", None), CategoricalDtype): - result = pc_func(self._pa_array, self._box_pa(other)) + try: + result = pc_func(self._pa_array, self._box_pa(other)) + except pa.ArrowNotImplementedError: + # TODO: could this be wrong if other is object dtype? + # in which case we need to operate pointwise? + result = ops.invalid_comparison(self, other, op) + result = pa.array(result, type=pa.bool_()) elif is_scalar(other): try: result = pc_func(self._pa_array, self._box_pa(other)) diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index f48aec19685d3..8e33179de0512 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -36,7 +36,6 @@ BaseStringArray, StringDtype, ) -from pandas.core.ops import invalid_comparison from pandas.core.strings.object_array import ObjectStringArrayMixin if not pa_version_under10p1: @@ -563,10 +562,7 @@ def _convert_int_dtype(self, result): return result def _cmp_method(self, other, op): - try: - result = super()._cmp_method(other, op) - except pa.ArrowNotImplementedError: - return invalid_comparison(self, other, op) + result = super()._cmp_method(other, op) if op == operator.ne: return result.to_numpy(np.bool_, na_value=True) else: diff --git a/pandas/tests/series/test_logical_ops.py b/pandas/tests/series/test_logical_ops.py index 262ec35b472ad..f54203ec34039 100644 --- a/pandas/tests/series/test_logical_ops.py +++ b/pandas/tests/series/test_logical_ops.py @@ -9,6 +9,7 @@ from pandas.compat import HAS_PYARROW from pandas import ( + ArrowDtype, DataFrame, Index, Series, @@ -523,18 +524,37 @@ def test_int_dtype_different_index_not_bool(self): result = ser1 ^ ser2 tm.assert_series_equal(result, expected) + # TODO: this belongs in comparison tests def test_pyarrow_numpy_string_invalid(self): # GH#56008 - pytest.importorskip("pyarrow") + pa = pytest.importorskip("pyarrow") ser = Series([False, True]) ser2 = Series(["a", "b"], dtype="string[pyarrow_numpy]") result = ser == ser2 - expected = Series(False, index=ser.index) - tm.assert_series_equal(result, expected) + expected_eq = Series(False, index=ser.index) + tm.assert_series_equal(result, expected_eq) result = ser != ser2 - expected = Series(True, index=ser.index) - tm.assert_series_equal(result, expected) + expected_ne = Series(True, index=ser.index) + tm.assert_series_equal(result, expected_ne) with pytest.raises(TypeError, match="Invalid comparison"): ser > ser2 + + ser3 = ser2.astype("string[pyarrow]") + result3_eq = ser3 == ser + tm.assert_series_equal(result3_eq, expected_eq.astype("bool[pyarrow]")) + result3_ne = ser3 != ser + tm.assert_series_equal(result3_ne, expected_ne.astype("bool[pyarrow]")) + + with pytest.raises(TypeError, match="Invalid comparison"): + ser > ser3 + + ser4 = ser2.astype(ArrowDtype(pa.string())) + result4_eq = ser4 == ser + tm.assert_series_equal(result4_eq, expected_eq.astype("bool[pyarrow]")) + result4_ne = ser4 != ser + tm.assert_series_equal(result4_ne, expected_ne.astype("bool[pyarrow]")) + + with pytest.raises(TypeError, match="Invalid comparison"): + ser > ser4 From 9d8920ec12ff1aa2a0b7ed5996c11f7aef391131 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 13 Aug 2024 13:53:18 -0700 Subject: [PATCH 2/3] move whatsnew --- doc/source/whatsnew/v2.2.2.rst | 1 - doc/source/whatsnew/v3.0.0.rst | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.2.rst b/doc/source/whatsnew/v2.2.2.rst index 163a968813e1f..72a2f84c4aaee 100644 --- a/doc/source/whatsnew/v2.2.2.rst +++ b/doc/source/whatsnew/v2.2.2.rst @@ -42,7 +42,6 @@ Bug fixes - :meth:`DataFrame.__dataframe__` was showing bytemask instead of bitmask for ``'string[pyarrow]'`` validity buffer (:issue:`57762`) - :meth:`DataFrame.__dataframe__` was showing non-null validity buffer (instead of ``None``) ``'string[pyarrow]'`` without missing values (:issue:`57761`) - :meth:`DataFrame.to_sql` was failing to find the right table when using the schema argument (:issue:`57539`) -- Bug in comparison between object with :class:`ArrowDtype` and incompatible-dtyped (e.g. string vs bool) incorrectly raising instead of returning all-``False`` (for ``==``) or all-``True`` (for ``!=``) (:issue:`??`) .. --------------------------------------------------------------------------- .. _whatsnew_222.other: diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index f26c6506477d4..c7a63d51edf28 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -668,6 +668,7 @@ ExtensionArray - Bug in :meth:`.arrays.ArrowExtensionArray.__setitem__` which caused wrong behavior when using an integer array with repeated values as a key (:issue:`58530`) - Bug in :meth:`api.types.is_datetime64_any_dtype` where a custom :class:`ExtensionDtype` would return ``False`` for array-likes (:issue:`57055`) - Bug in various :class:`DataFrame` reductions for pyarrow temporal dtypes returning incorrect dtype when result was null (:issue:`59234`) +- Bug in comparison between object with :class:`ArrowDtype` and incompatible-dtyped (e.g. string vs bool) incorrectly raising instead of returning all-``False`` (for ``==``) or all-``True`` (for ``!=``) (:issue:`??`) Styler ^^^^^^ From 9f3792993da53dfc501ac27c8e36d976a1625013 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 13 Aug 2024 13:54:24 -0700 Subject: [PATCH 3/3] GH ref --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/tests/series/test_logical_ops.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index c7a63d51edf28..06f196eae388c 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -667,8 +667,8 @@ ExtensionArray ^^^^^^^^^^^^^^ - Bug in :meth:`.arrays.ArrowExtensionArray.__setitem__` which caused wrong behavior when using an integer array with repeated values as a key (:issue:`58530`) - Bug in :meth:`api.types.is_datetime64_any_dtype` where a custom :class:`ExtensionDtype` would return ``False`` for array-likes (:issue:`57055`) +- Bug in comparison between object with :class:`ArrowDtype` and incompatible-dtyped (e.g. string vs bool) incorrectly raising instead of returning all-``False`` (for ``==``) or all-``True`` (for ``!=``) (:issue:`59505`) - Bug in various :class:`DataFrame` reductions for pyarrow temporal dtypes returning incorrect dtype when result was null (:issue:`59234`) -- Bug in comparison between object with :class:`ArrowDtype` and incompatible-dtyped (e.g. string vs bool) incorrectly raising instead of returning all-``False`` (for ``==``) or all-``True`` (for ``!=``) (:issue:`??`) Styler ^^^^^^ diff --git a/pandas/tests/series/test_logical_ops.py b/pandas/tests/series/test_logical_ops.py index f54203ec34039..baed3ba936699 100644 --- a/pandas/tests/series/test_logical_ops.py +++ b/pandas/tests/series/test_logical_ops.py @@ -541,6 +541,7 @@ def test_pyarrow_numpy_string_invalid(self): with pytest.raises(TypeError, match="Invalid comparison"): ser > ser2 + # GH#59505 ser3 = ser2.astype("string[pyarrow]") result3_eq = ser3 == ser tm.assert_series_equal(result3_eq, expected_eq.astype("bool[pyarrow]"))