-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG/TST: run and fix all arithmetic tests with+without numexpr #40463
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
a4dea4a
24d087e
6851089
d6f23ba
5122675
82a7247
340232a
e294267
0804e63
d97ab38
dcf38cf
036cf62
06d76e0
6ff3be0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,19 +2,21 @@ | |
Functions for arithmetic and comparison operations on NumPy arrays and | ||
ExtensionArrays. | ||
""" | ||
from datetime import timedelta | ||
import datetime | ||
from functools import partial | ||
import operator | ||
from typing import Any | ||
|
||
import numpy as np | ||
|
||
from pandas._libs import ( | ||
NaT, | ||
Timedelta, | ||
Timestamp, | ||
lib, | ||
ops as libops, | ||
) | ||
from pandas._libs.tslibs import BaseOffset | ||
from pandas._typing import ( | ||
ArrayLike, | ||
Shape, | ||
|
@@ -154,8 +156,14 @@ def _na_arithmetic_op(left, right, op, is_cmp: bool = False): | |
------ | ||
TypeError : invalid operation | ||
""" | ||
if isinstance(right, str): | ||
# can never use numexpr | ||
func = op | ||
else: | ||
func = partial(expressions.evaluate, op) | ||
|
||
try: | ||
result = expressions.evaluate(op, left, right) | ||
result = func(left, right) | ||
except TypeError: | ||
if is_object_dtype(left) or is_object_dtype(right) and not is_cmp: | ||
# For object dtype, fallback to a masked operation (only operating | ||
|
@@ -201,8 +209,13 @@ def arithmetic_op(left: ArrayLike, right: Any, op): | |
# casts integer dtypes to timedelta64 when operating with timedelta64 - GH#22390) | ||
right = _maybe_upcast_for_op(right, left.shape) | ||
|
||
if should_extension_dispatch(left, right) or isinstance(right, Timedelta): | ||
# Timedelta is included because numexpr will fail on it, see GH#31457 | ||
if ( | ||
should_extension_dispatch(left, right) | ||
or isinstance(right, (Timedelta, BaseOffset, Timestamp)) | ||
or right is NaT | ||
): | ||
# Timedelta/Timestamp and other custom scalars are included in the check | ||
# because numexpr will fail on it, see GH#31457 | ||
res_values = op(left, right) | ||
else: | ||
res_values = _na_arithmetic_op(left, right, op) | ||
|
@@ -246,7 +259,10 @@ def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike: | |
"Lengths must match to compare", lvalues.shape, rvalues.shape | ||
) | ||
|
||
if should_extension_dispatch(lvalues, rvalues): | ||
if should_extension_dispatch(lvalues, rvalues) or ( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is the same check as above, (L212) can you put the common parts in a function There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's slightly different: here I need an additional There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sure i see that, that's why i said common parts (meaning the datetimes like + NaT) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, but note the brackets: currently the checks for the scalars is first combined with |
||
(isinstance(rvalues, (Timedelta, BaseOffset, Timestamp)) or right is NaT) | ||
and not is_object_dtype(lvalues.dtype) | ||
): | ||
# Call the method on lvalues | ||
res_values = op(lvalues, rvalues) | ||
|
||
|
@@ -261,7 +277,7 @@ def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike: | |
# GH#36377 going through the numexpr path would incorrectly raise | ||
return invalid_comparison(lvalues, rvalues, op) | ||
|
||
elif is_object_dtype(lvalues.dtype): | ||
elif is_object_dtype(lvalues.dtype) or isinstance(rvalues, str): | ||
res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues) | ||
|
||
else: | ||
|
@@ -438,11 +454,14 @@ def _maybe_upcast_for_op(obj, shape: Shape): | |
Be careful to call this *after* determining the `name` attribute to be | ||
attached to the result of the arithmetic operation. | ||
""" | ||
if type(obj) is timedelta: | ||
if type(obj) is datetime.timedelta: | ||
# GH#22390 cast up to Timedelta to rely on Timedelta | ||
# implementation; otherwise operation against numeric-dtype | ||
# raises TypeError | ||
return Timedelta(obj) | ||
elif type(obj) is datetime.datetime: | ||
# cast up to Timestamp to rely on Timestamp implementation, see Timedelta above | ||
return Timestamp(obj) | ||
jorisvandenbossche marked this conversation as resolved.
Show resolved
Hide resolved
|
||
elif isinstance(obj, np.datetime64): | ||
# GH#28080 numpy casts integer-dtype to datetime64 when doing | ||
# array[int] + datetime64, which we do not allow | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,18 @@ | |
UInt64Index, | ||
) | ||
import pandas._testing as tm | ||
from pandas.core.computation import expressions as expr | ||
|
||
|
||
@pytest.fixture( | ||
autouse=True, scope="module", params=[0, 1000000], ids=["numexpr", "python"] | ||
) | ||
def switch_numexpr_min_elements(request): | ||
_MIN_ELEMENTS = expr._MIN_ELEMENTS | ||
expr._MIN_ELEMENTS = request.param | ||
yield request.param | ||
expr._MIN_ELEMENTS = _MIN_ELEMENTS | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we get rid of some of the setup/teardown in test_expressions with this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Potentially something similar could be used there as well, yes. But this PR is focusing on the |
||
|
||
|
||
# ------------------------------------------------------------------ | ||
# Helper Functions | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,7 @@ | |
) | ||
import pandas._testing as tm | ||
from pandas.core import ops | ||
from pandas.core.computation import expressions as expr | ||
|
||
|
||
@pytest.fixture(params=[Index, Series, tm.to_array]) | ||
|
@@ -391,7 +392,7 @@ def test_div_negative_zero(self, zero, numeric_idx, op): | |
# ------------------------------------------------------------------ | ||
|
||
@pytest.mark.parametrize("dtype1", [np.int64, np.float64, np.uint64]) | ||
def test_ser_div_ser(self, dtype1, any_real_dtype): | ||
def test_ser_div_ser(self, switch_numexpr_min_elements, dtype1, any_real_dtype): | ||
# no longer do integer div for any ops, but deal with the 0's | ||
dtype2 = any_real_dtype | ||
|
||
|
@@ -405,6 +406,11 @@ def test_ser_div_ser(self, dtype1, any_real_dtype): | |
name=None, | ||
) | ||
expected.iloc[0:3] = np.inf | ||
if first.dtype == "int64" and second.dtype == "float32": | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is the reverse excluded as well? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The reverse (float32 + int64) is not tested, as the (but yeah, the reverse order would also result in float32 instead of float64 when numexpr is used) |
||
# when using numexpr, the casting rules are slightly different | ||
# and int64/float32 combo results in float32 instead of float64 | ||
if expr.USE_NUMEXPR and switch_numexpr_min_elements == 0: | ||
expected = expected.astype("float32") | ||
|
||
result = first / second | ||
tm.assert_series_equal(result, expected) | ||
|
@@ -890,7 +896,13 @@ def test_series_frame_radd_bug(self): | |
|
||
# really raise this time | ||
now = pd.Timestamp.now().to_pydatetime() | ||
msg = "unsupported operand type" | ||
msg = "|".join( | ||
[ | ||
"unsupported operand type", | ||
# wrong error message, see https://github.com/numpy/numpy/issues/18832 | ||
"Concatenation operation", | ||
] | ||
) | ||
with pytest.raises(TypeError, match=msg): | ||
now + ts | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Basically with a string argument, numexpr will fail with a "wrong" error message. Alternatively,
_can_use_numexpr
in expressions.py could also be updated to check for this and avoid using the numexpr path (currently that only checks object with dtypes, not for scalars)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lets make an effort to keep numexpr-specific lgoic in _can_use_numexpre/expressions
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@jbrockmendel would you be OK with leaving the check here as is, short term? I have a next PR that moves this check inside a
can_use_numexpr
function insideexpressions.py
(#41122), so that will clean this up.But I would like to merge this PR before #41122 since this one is adding a lot of test coverage for with/without numexpr.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yeah ok for now, but let's for sure move later