From 66e72954dc797741c7696d5df8e8fed1b4cb81d0 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 26 Apr 2021 09:55:06 +0200 Subject: [PATCH 1/6] REF: move check for disallowed bool arithmetic ops out of numexpr-related expressions.py --- pandas/core/computation/expressions.py | 34 ++++++++++++++------------ pandas/core/ops/array_ops.py | 32 ++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 16 deletions(-) diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index 957a493925405..a8852ae06f578 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -114,6 +114,11 @@ def _evaluate_numexpr(op, op_str, a, b): # numexpr raises eg for array ** array with integers # (https://github.com/pydata/numexpr/issues/379) pass + except NotImplementedError: + if _bool_arith_fallback(op_str, a, b): + pass + else: + raise if is_reversed: # reverse order to original for fallback @@ -197,26 +202,24 @@ def _has_bool_dtype(x): return isinstance(x, (bool, np.bool_)) -def _bool_arith_check( - op_str, a, b, not_allowed=frozenset(("/", "//", "**")), unsupported=None -): - if unsupported is None: - unsupported = {"+": "|", "*": "&", "-": "^"} +_BOOL_OP_UNSUPPORTED = {"+": "|", "*": "&", "-": "^"} + +def _bool_arith_fallback(op_str, a, b): + """ + Check if we should fallback to the python `_evaluate_standard` in case + of an unsupported operation by numexpr, which is the case for some + boolean ops. + """ if _has_bool_dtype(a) and _has_bool_dtype(b): - if op_str in unsupported: + if op_str in _BOOL_OP_UNSUPPORTED: warnings.warn( f"evaluating in Python space because the {repr(op_str)} " - "operator is not supported by numexpr for " - f"the bool dtype, use {repr(unsupported[op_str])} instead" + "operator is not supported by numexpr for the bool dtype, " + f"use {repr(_BOOL_OP_UNSUPPORTED[op_str])} instead" ) - return False - - if op_str in not_allowed: - raise NotImplementedError( - f"operator {repr(op_str)} not implemented for bool dtypes" - ) - return True + return True + return False def evaluate(op, a, b, use_numexpr: bool = True): @@ -233,7 +236,6 @@ def evaluate(op, a, b, use_numexpr: bool = True): """ op_str = _op_str_mapping[op] if op_str is not None: - use_numexpr = use_numexpr and _bool_arith_check(op_str, a, b) if use_numexpr: # error: "None" not callable return _evaluate(op, op_str, a, b) # type: ignore[misc] diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 387d8b463b8b4..10feb44d646bb 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -201,6 +201,8 @@ def arithmetic_op(left: ArrayLike, right: Any, op): # casts integer dtypes to timedelta64 when operating with timedelta64 - GH#22390) right = _maybe_upcast_for_op(right, left.shape) + _bool_arith_check(op, left, right) + if should_extension_dispatch(left, right) or isinstance(right, Timedelta): # Timedelta is included because numexpr will fail on it, see GH#31457 res_values = op(left, right) @@ -473,3 +475,33 @@ def _maybe_upcast_for_op(obj, shape: Shape): return Timedelta(obj) return obj + + +def _has_bool_dtype(x): + try: + return x.dtype == bool + except AttributeError: + return isinstance(x, (bool, np.bool_)) + + +_BOOL_OP_NOT_ALLOWED = { + operator.truediv, + roperator.rtruediv, + operator.floordiv, + roperator.rfloordiv, + operator.pow, + roperator.rpow, +} + + +def _bool_arith_check(op, a, b): + """ + In contrast to numpy, pandas raises an error for certain operations + with booleans. + """ + if op in _BOOL_OP_NOT_ALLOWED: + if _has_bool_dtype(a) and _has_bool_dtype(b): + op_name = op.__name__.strip("_").lstrip("r") + raise NotImplementedError( + f"operator {op_name} not implemented for bool dtypes" + ) From 0a89f59f5b34583aef1d2f001f189e3304dd28eb Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 26 Apr 2021 10:13:20 +0200 Subject: [PATCH 2/6] clean-up --- pandas/core/ops/array_ops.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 10feb44d646bb..05efec0baf4db 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -477,13 +477,6 @@ def _maybe_upcast_for_op(obj, shape: Shape): return obj -def _has_bool_dtype(x): - try: - return x.dtype == bool - except AttributeError: - return isinstance(x, (bool, np.bool_)) - - _BOOL_OP_NOT_ALLOWED = { operator.truediv, roperator.rtruediv, @@ -500,7 +493,9 @@ def _bool_arith_check(op, a, b): with booleans. """ if op in _BOOL_OP_NOT_ALLOWED: - if _has_bool_dtype(a) and _has_bool_dtype(b): + if is_bool_dtype(a.dtype) and ( + is_bool_dtype(b) or isinstance(b, (bool, np.bool_)) + ): op_name = op.__name__.strip("_").lstrip("r") raise NotImplementedError( f"operator {op_name} not implemented for bool dtypes" From f4062f1eadd7668903310f19a3d6caa2cf3ff654 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 26 Apr 2021 13:00:31 +0200 Subject: [PATCH 3/6] for now yet handle EAs --- pandas/core/ops/array_ops.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 05efec0baf4db..bc5636af41a56 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -26,6 +26,7 @@ ) from pandas.core.dtypes.common import ( ensure_object, + get_dtype, is_bool_dtype, is_integer_dtype, is_list_like, @@ -487,14 +488,28 @@ def _maybe_upcast_for_op(obj, shape: Shape): } +def _is_bool_dtype(arr_or_dtype): + # version of the common is_bool_dtype that only checks for numpy bool + # and not for boolean EA + if arr_or_dtype is None: + return False + try: + dtype = get_dtype(arr_or_dtype) + except (TypeError, ValueError): + return False + return isinstance(dtype, np.dtype) and dtype.kind == "b" + + def _bool_arith_check(op, a, b): """ In contrast to numpy, pandas raises an error for certain operations with booleans. """ if op in _BOOL_OP_NOT_ALLOWED: - if is_bool_dtype(a.dtype) and ( - is_bool_dtype(b) or isinstance(b, (bool, np.bool_)) + # TODO we should handle EAs consistently + # and replace `_is_bool_dtype` with `is_bool_dtype` + if _is_bool_dtype(a.dtype) and ( + _is_bool_dtype(b) or isinstance(b, (bool, np.bool_)) ): op_name = op.__name__.strip("_").lstrip("r") raise NotImplementedError( From bac4048e0bf4f71993033db363f4eb1ffbd83f3a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 26 Apr 2021 13:06:31 +0200 Subject: [PATCH 4/6] fix warning/error test --- pandas/core/ops/array_ops.py | 2 +- pandas/tests/frame/test_arithmetic.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index bc5636af41a56..aa7664ddacc8e 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -513,5 +513,5 @@ def _bool_arith_check(op, a, b): ): op_name = op.__name__.strip("_").lstrip("r") raise NotImplementedError( - f"operator {op_name} not implemented for bool dtypes" + f"operator '{op_name}' not implemented for bool dtypes" ) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index c6816fa6481f4..8a3d587273a7e 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -931,16 +931,16 @@ def test_binop_other(self, op, value, dtype): elif (op, dtype) in skip: if op in [operator.add, operator.mul]: - with tm.assert_produces_warning(UserWarning): - # "evaluating in Python space because ..." - op(s, e.value) + # TODO we should assert this or not depending on whether + # numexpr is used or not + # with tm.assert_produces_warning(UserWarning): + # # "evaluating in Python space because ..." + op(s, e.value) else: msg = "operator '.*' not implemented for .* dtypes" with pytest.raises(NotImplementedError, match=msg): - with tm.assert_produces_warning(UserWarning): - # "evaluating in Python space because ..." - op(s, e.value) + op(s, e.value) else: # FIXME: Since dispatching to Series, this test no longer From 6b53c4c76423e035bc91967631f504f0f3b46609 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 26 Apr 2021 13:09:17 +0200 Subject: [PATCH 5/6] fix msg in test_expressions --- pandas/tests/test_expressions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index e94cb23b359d0..6ac85f9d36fdc 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -242,7 +242,7 @@ def testit(): def test_bool_ops_raise_on_arithmetic(self, op_str, opname): df = DataFrame({"a": np.random.rand(10) > 0.5, "b": np.random.rand(10) > 0.5}) - msg = f"operator {repr(op_str)} not implemented for bool dtypes" + msg = f"operator '{opname}' not implemented for bool dtypes" f = getattr(operator, opname) err_msg = re.escape(msg) From 509d9f5fa89562601b3284de8daf85315bcaa63e Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 26 Apr 2021 13:26:23 +0200 Subject: [PATCH 6/6] simplify logic for skipping it for EA for now --- pandas/core/ops/array_ops.py | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index aa7664ddacc8e..7af40eccaa19c 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -26,7 +26,6 @@ ) from pandas.core.dtypes.common import ( ensure_object, - get_dtype, is_bool_dtype, is_integer_dtype, is_list_like, @@ -202,12 +201,14 @@ def arithmetic_op(left: ArrayLike, right: Any, op): # casts integer dtypes to timedelta64 when operating with timedelta64 - GH#22390) right = _maybe_upcast_for_op(right, left.shape) - _bool_arith_check(op, left, right) - if should_extension_dispatch(left, right) or isinstance(right, Timedelta): # Timedelta is included because numexpr will fail on it, see GH#31457 res_values = op(left, right) else: + # TODO we should handle EAs consistently and move this check before the if/else + # (https://github.com/pandas-dev/pandas/issues/41165) + _bool_arith_check(op, left, right) + res_values = _na_arithmetic_op(left, right, op) return res_values @@ -488,28 +489,14 @@ def _maybe_upcast_for_op(obj, shape: Shape): } -def _is_bool_dtype(arr_or_dtype): - # version of the common is_bool_dtype that only checks for numpy bool - # and not for boolean EA - if arr_or_dtype is None: - return False - try: - dtype = get_dtype(arr_or_dtype) - except (TypeError, ValueError): - return False - return isinstance(dtype, np.dtype) and dtype.kind == "b" - - def _bool_arith_check(op, a, b): """ In contrast to numpy, pandas raises an error for certain operations with booleans. """ if op in _BOOL_OP_NOT_ALLOWED: - # TODO we should handle EAs consistently - # and replace `_is_bool_dtype` with `is_bool_dtype` - if _is_bool_dtype(a.dtype) and ( - _is_bool_dtype(b) or isinstance(b, (bool, np.bool_)) + if is_bool_dtype(a.dtype) and ( + is_bool_dtype(b) or isinstance(b, (bool, np.bool_)) ): op_name = op.__name__.strip("_").lstrip("r") raise NotImplementedError(