Skip to content

Commit d7b5a10

Browse files
REF: move check for disallowed bool arithmetic ops out of numexpr-related expressions.py (#41161)
1 parent d6e2586 commit d7b5a10

File tree

4 files changed

+54
-23
lines changed

4 files changed

+54
-23
lines changed

pandas/core/computation/expressions.py

+18-16
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,11 @@ def _evaluate_numexpr(op, op_str, a, b):
114114
# numexpr raises eg for array ** array with integers
115115
# (https://github.com/pydata/numexpr/issues/379)
116116
pass
117+
except NotImplementedError:
118+
if _bool_arith_fallback(op_str, a, b):
119+
pass
120+
else:
121+
raise
117122

118123
if is_reversed:
119124
# reverse order to original for fallback
@@ -197,26 +202,24 @@ def _has_bool_dtype(x):
197202
return isinstance(x, (bool, np.bool_))
198203

199204

200-
def _bool_arith_check(
201-
op_str, a, b, not_allowed=frozenset(("/", "//", "**")), unsupported=None
202-
):
203-
if unsupported is None:
204-
unsupported = {"+": "|", "*": "&", "-": "^"}
205+
_BOOL_OP_UNSUPPORTED = {"+": "|", "*": "&", "-": "^"}
205206

207+
208+
def _bool_arith_fallback(op_str, a, b):
209+
"""
210+
Check if we should fallback to the python `_evaluate_standard` in case
211+
of an unsupported operation by numexpr, which is the case for some
212+
boolean ops.
213+
"""
206214
if _has_bool_dtype(a) and _has_bool_dtype(b):
207-
if op_str in unsupported:
215+
if op_str in _BOOL_OP_UNSUPPORTED:
208216
warnings.warn(
209217
f"evaluating in Python space because the {repr(op_str)} "
210-
"operator is not supported by numexpr for "
211-
f"the bool dtype, use {repr(unsupported[op_str])} instead"
218+
"operator is not supported by numexpr for the bool dtype, "
219+
f"use {repr(_BOOL_OP_UNSUPPORTED[op_str])} instead"
212220
)
213-
return False
214-
215-
if op_str in not_allowed:
216-
raise NotImplementedError(
217-
f"operator {repr(op_str)} not implemented for bool dtypes"
218-
)
219-
return True
221+
return True
222+
return False
220223

221224

222225
def evaluate(op, a, b, use_numexpr: bool = True):
@@ -233,7 +236,6 @@ def evaluate(op, a, b, use_numexpr: bool = True):
233236
"""
234237
op_str = _op_str_mapping[op]
235238
if op_str is not None:
236-
use_numexpr = use_numexpr and _bool_arith_check(op_str, a, b)
237239
if use_numexpr:
238240
# error: "None" not callable
239241
return _evaluate(op, op_str, a, b) # type: ignore[misc]

pandas/core/ops/array_ops.py

+29
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,10 @@ def arithmetic_op(left: ArrayLike, right: Any, op):
218218
# because numexpr will fail on it, see GH#31457
219219
res_values = op(left, right)
220220
else:
221+
# TODO we should handle EAs consistently and move this check before the if/else
222+
# (https://github.com/pandas-dev/pandas/issues/41165)
223+
_bool_arith_check(op, left, right)
224+
221225
res_values = _na_arithmetic_op(left, right, op)
222226

223227
return res_values
@@ -492,3 +496,28 @@ def _maybe_upcast_for_op(obj, shape: Shape):
492496
return Timedelta(obj)
493497

494498
return obj
499+
500+
501+
_BOOL_OP_NOT_ALLOWED = {
502+
operator.truediv,
503+
roperator.rtruediv,
504+
operator.floordiv,
505+
roperator.rfloordiv,
506+
operator.pow,
507+
roperator.rpow,
508+
}
509+
510+
511+
def _bool_arith_check(op, a, b):
512+
"""
513+
In contrast to numpy, pandas raises an error for certain operations
514+
with booleans.
515+
"""
516+
if op in _BOOL_OP_NOT_ALLOWED:
517+
if is_bool_dtype(a.dtype) and (
518+
is_bool_dtype(b) or isinstance(b, (bool, np.bool_))
519+
):
520+
op_name = op.__name__.strip("_").lstrip("r")
521+
raise NotImplementedError(
522+
f"operator '{op_name}' not implemented for bool dtypes"
523+
)

pandas/tests/frame/test_arithmetic.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -941,16 +941,16 @@ def test_binop_other(self, op, value, dtype):
941941
elif (op, dtype) in skip:
942942

943943
if op in [operator.add, operator.mul]:
944-
with tm.assert_produces_warning(UserWarning):
945-
# "evaluating in Python space because ..."
946-
op(s, e.value)
944+
# TODO we should assert this or not depending on whether
945+
# numexpr is used or not
946+
# with tm.assert_produces_warning(UserWarning):
947+
# # "evaluating in Python space because ..."
948+
op(s, e.value)
947949

948950
else:
949951
msg = "operator '.*' not implemented for .* dtypes"
950952
with pytest.raises(NotImplementedError, match=msg):
951-
with tm.assert_produces_warning(UserWarning):
952-
# "evaluating in Python space because ..."
953-
op(s, e.value)
953+
op(s, e.value)
954954

955955
else:
956956
# FIXME: Since dispatching to Series, this test no longer

pandas/tests/test_expressions.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ def testit():
242242
def test_bool_ops_raise_on_arithmetic(self, op_str, opname):
243243
df = DataFrame({"a": np.random.rand(10) > 0.5, "b": np.random.rand(10) > 0.5})
244244

245-
msg = f"operator {repr(op_str)} not implemented for bool dtypes"
245+
msg = f"operator '{opname}' not implemented for bool dtypes"
246246
f = getattr(operator, opname)
247247
err_msg = re.escape(msg)
248248

0 commit comments

Comments
 (0)