Skip to content

REF: move check for disallowed bool arithmetic ops out of numexpr-related expressions.py #41161

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

34 changes: 18 additions & 16 deletions pandas/core/computation/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,11 @@ def _evaluate_numexpr(op, op_str, a, b):
# numexpr raises eg for array ** array with integers
# (https://github.com/pydata/numexpr/issues/379)
pass
except NotImplementedError:
if _bool_arith_fallback(op_str, a, b):
pass
else:
raise

if is_reversed:
# reverse order to original for fallback
Expand Down Expand Up @@ -197,26 +202,24 @@ def _has_bool_dtype(x):
return isinstance(x, (bool, np.bool_))


def _bool_arith_check(
op_str, a, b, not_allowed=frozenset(("/", "//", "**")), unsupported=None
):
if unsupported is None:
unsupported = {"+": "|", "*": "&", "-": "^"}
_BOOL_OP_UNSUPPORTED = {"+": "|", "*": "&", "-": "^"}


def _bool_arith_fallback(op_str, a, b):
"""
Check if we should fallback to the python `_evaluate_standard` in case
of an unsupported operation by numexpr, which is the case for some
boolean ops.
"""
if _has_bool_dtype(a) and _has_bool_dtype(b):
if op_str in unsupported:
if op_str in _BOOL_OP_UNSUPPORTED:
warnings.warn(
f"evaluating in Python space because the {repr(op_str)} "
"operator is not supported by numexpr for "
f"the bool dtype, use {repr(unsupported[op_str])} instead"
"operator is not supported by numexpr for the bool dtype, "
f"use {repr(_BOOL_OP_UNSUPPORTED[op_str])} instead"
)
return False

if op_str in not_allowed:
raise NotImplementedError(
f"operator {repr(op_str)} not implemented for bool dtypes"
)
return True
return True
return False


def evaluate(op, a, b, use_numexpr: bool = True):
Expand All @@ -233,7 +236,6 @@ def evaluate(op, a, b, use_numexpr: bool = True):
"""
op_str = _op_str_mapping[op]
if op_str is not None:
use_numexpr = use_numexpr and _bool_arith_check(op_str, a, b)
if use_numexpr:
# error: "None" not callable
return _evaluate(op, op_str, a, b) # type: ignore[misc]
Expand Down
29 changes: 29 additions & 0 deletions pandas/core/ops/array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,10 @@ def arithmetic_op(left: ArrayLike, right: Any, op):
# because numexpr will fail on it, see GH#31457
res_values = op(left, right)
else:
# TODO we should handle EAs consistently and move this check before the if/else
# (https://github.com/pandas-dev/pandas/issues/41165)
_bool_arith_check(op, left, right)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Opened an issue for this: #41165


res_values = _na_arithmetic_op(left, right, op)

return res_values
Expand Down Expand Up @@ -492,3 +496,28 @@ def _maybe_upcast_for_op(obj, shape: Shape):
return Timedelta(obj)

return obj


_BOOL_OP_NOT_ALLOWED = {
operator.truediv,
roperator.rtruediv,
operator.floordiv,
roperator.rfloordiv,
operator.pow,
roperator.rpow,
}


def _bool_arith_check(op, a, b):
"""
In contrast to numpy, pandas raises an error for certain operations
with booleans.
"""
if op in _BOOL_OP_NOT_ALLOWED:
if is_bool_dtype(a.dtype) and (
is_bool_dtype(b) or isinstance(b, (bool, np.bool_))
):
op_name = op.__name__.strip("_").lstrip("r")
raise NotImplementedError(
f"operator '{op_name}' not implemented for bool dtypes"
)
12 changes: 6 additions & 6 deletions pandas/tests/frame/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -941,16 +941,16 @@ def test_binop_other(self, op, value, dtype):
elif (op, dtype) in skip:

if op in [operator.add, operator.mul]:
with tm.assert_produces_warning(UserWarning):
# "evaluating in Python space because ..."
op(s, e.value)
# TODO we should assert this or not depending on whether
# numexpr is used or not
# with tm.assert_produces_warning(UserWarning):
# # "evaluating in Python space because ..."
op(s, e.value)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fact that it is no longer warning is I think correct, since we are not using numexpr for such a small test case (so the warning about "not supported by numexpr" is not relevant for the user).
But, we should still test that we do raise the warning when numexpr is used. This can probably be done in a similar was as #40463

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will take care of this in #41178


else:
msg = "operator '.*' not implemented for .* dtypes"
with pytest.raises(NotImplementedError, match=msg):
with tm.assert_produces_warning(UserWarning):
# "evaluating in Python space because ..."
op(s, e.value)
op(s, e.value)
Copy link
Member Author

@jorisvandenbossche jorisvandenbossche Apr 26, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was a wrong test, but because putting assert_produces_warning inside pytest.raises doesn't fail if it doesn't warn, this wasn't noticed (encountered this in #40325 (comment) as well).
The ops for which we raise a warning vs raise NotImplementedError are not overlapping.


else:
# FIXME: Since dispatching to Series, this test no longer
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/test_expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ def testit():
def test_bool_ops_raise_on_arithmetic(self, op_str, opname):
df = DataFrame({"a": np.random.rand(10) > 0.5, "b": np.random.rand(10) > 0.5})

msg = f"operator {repr(op_str)} not implemented for bool dtypes"
msg = f"operator '{opname}' not implemented for bool dtypes"
f = getattr(operator, opname)
err_msg = re.escape(msg)

Expand Down