diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 387d8b463b8b4..67b68ce7365cc 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -2,7 +2,7 @@ Functions for arithmetic and comparison operations on NumPy arrays and ExtensionArrays. """ -from datetime import timedelta +import datetime from functools import partial import operator from typing import Any @@ -10,11 +10,13 @@ import numpy as np from pandas._libs import ( + NaT, Timedelta, Timestamp, lib, ops as libops, ) +from pandas._libs.tslibs import BaseOffset from pandas._typing import ( ArrayLike, Shape, @@ -154,8 +156,14 @@ def _na_arithmetic_op(left, right, op, is_cmp: bool = False): ------ TypeError : invalid operation """ + if isinstance(right, str): + # can never use numexpr + func = op + else: + func = partial(expressions.evaluate, op) + try: - result = expressions.evaluate(op, left, right) + result = func(left, right) except TypeError: if is_object_dtype(left) or is_object_dtype(right) and not is_cmp: # For object dtype, fallback to a masked operation (only operating @@ -201,8 +209,13 @@ def arithmetic_op(left: ArrayLike, right: Any, op): # casts integer dtypes to timedelta64 when operating with timedelta64 - GH#22390) right = _maybe_upcast_for_op(right, left.shape) - if should_extension_dispatch(left, right) or isinstance(right, Timedelta): - # Timedelta is included because numexpr will fail on it, see GH#31457 + if ( + should_extension_dispatch(left, right) + or isinstance(right, (Timedelta, BaseOffset, Timestamp)) + or right is NaT + ): + # Timedelta/Timestamp and other custom scalars are included in the check + # because numexpr will fail on it, see GH#31457 res_values = op(left, right) else: res_values = _na_arithmetic_op(left, right, op) @@ -246,7 +259,10 @@ def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike: "Lengths must match to compare", lvalues.shape, rvalues.shape ) - if should_extension_dispatch(lvalues, rvalues): + if should_extension_dispatch(lvalues, rvalues) or ( + (isinstance(rvalues, (Timedelta, BaseOffset, Timestamp)) or right is NaT) + and not is_object_dtype(lvalues.dtype) + ): # Call the method on lvalues res_values = op(lvalues, rvalues) @@ -261,7 +277,7 @@ def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike: # GH#36377 going through the numexpr path would incorrectly raise return invalid_comparison(lvalues, rvalues, op) - elif is_object_dtype(lvalues.dtype): + elif is_object_dtype(lvalues.dtype) or isinstance(rvalues, str): res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues) else: @@ -438,11 +454,14 @@ def _maybe_upcast_for_op(obj, shape: Shape): Be careful to call this *after* determining the `name` attribute to be attached to the result of the arithmetic operation. """ - if type(obj) is timedelta: + if type(obj) is datetime.timedelta: # GH#22390 cast up to Timedelta to rely on Timedelta # implementation; otherwise operation against numeric-dtype # raises TypeError return Timedelta(obj) + elif type(obj) is datetime.datetime: + # cast up to Timestamp to rely on Timestamp implementation, see Timedelta above + return Timestamp(obj) elif isinstance(obj, np.datetime64): # GH#28080 numpy casts integer-dtype to datetime64 when doing # array[int] + datetime64, which we do not allow diff --git a/pandas/tests/arithmetic/conftest.py b/pandas/tests/arithmetic/conftest.py index d90592c68e351..1e97db152c294 100644 --- a/pandas/tests/arithmetic/conftest.py +++ b/pandas/tests/arithmetic/conftest.py @@ -9,6 +9,18 @@ UInt64Index, ) import pandas._testing as tm +from pandas.core.computation import expressions as expr + + +@pytest.fixture( + autouse=True, scope="module", params=[0, 1000000], ids=["numexpr", "python"] +) +def switch_numexpr_min_elements(request): + _MIN_ELEMENTS = expr._MIN_ELEMENTS + expr._MIN_ELEMENTS = request.param + yield request.param + expr._MIN_ELEMENTS = _MIN_ELEMENTS + # ------------------------------------------------------------------ # Helper Functions diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index bdd954c1e2222..9e1d13eac5039 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -27,6 +27,7 @@ ) import pandas._testing as tm from pandas.core import ops +from pandas.core.computation import expressions as expr @pytest.fixture(params=[Index, Series, tm.to_array]) @@ -391,7 +392,7 @@ def test_div_negative_zero(self, zero, numeric_idx, op): # ------------------------------------------------------------------ @pytest.mark.parametrize("dtype1", [np.int64, np.float64, np.uint64]) - def test_ser_div_ser(self, dtype1, any_real_dtype): + def test_ser_div_ser(self, switch_numexpr_min_elements, dtype1, any_real_dtype): # no longer do integer div for any ops, but deal with the 0's dtype2 = any_real_dtype @@ -405,6 +406,11 @@ def test_ser_div_ser(self, dtype1, any_real_dtype): name=None, ) expected.iloc[0:3] = np.inf + if first.dtype == "int64" and second.dtype == "float32": + # when using numexpr, the casting rules are slightly different + # and int64/float32 combo results in float32 instead of float64 + if expr.USE_NUMEXPR and switch_numexpr_min_elements == 0: + expected = expected.astype("float32") result = first / second tm.assert_series_equal(result, expected) @@ -890,7 +896,13 @@ def test_series_frame_radd_bug(self): # really raise this time now = pd.Timestamp.now().to_pydatetime() - msg = "unsupported operand type" + msg = "|".join( + [ + "unsupported operand type", + # wrong error message, see https://github.com/numpy/numpy/issues/18832 + "Concatenation operation", + ] + ) with pytest.raises(TypeError, match=msg): now + ts diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index c6816fa6481f4..4c31d15541412 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -174,9 +174,19 @@ def test_timestamp_compare(self): with pytest.raises(TypeError, match=msg): right_f(pd.Timestamp("20010109"), df) # nats - expected = left_f(df, pd.Timestamp("nat")) - result = right_f(pd.Timestamp("nat"), df) - tm.assert_frame_equal(result, expected) + if left in ["eq", "ne"]: + expected = left_f(df, pd.Timestamp("nat")) + result = right_f(pd.Timestamp("nat"), df) + tm.assert_frame_equal(result, expected) + else: + msg = ( + "'(<|>)=?' not supported between " + "instances of 'numpy.ndarray' and 'NaTType'" + ) + with pytest.raises(TypeError, match=msg): + left_f(df, pd.Timestamp("nat")) + with pytest.raises(TypeError, match=msg): + right_f(pd.Timestamp("nat"), df) def test_mixed_comparison(self): # GH#13128, GH#22163 != datetime64 vs non-dt64 should be False,