Skip to content

REGR: Series[numeric] comparison with str raising on numexpr path #36440

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Sep 18, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.1.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ Fixed regressions
- Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a tuple (:issue:`35534`)
- Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a frozenset (:issue:`35747`)
- Fixed regression in :meth:`read_excel` with ``engine="odf"`` caused ``UnboundLocalError`` in some cases where cells had nested child nodes (:issue:`36122`,:issue:`35802`)
-
- Fixed regression in :class:`DataFrame` and :class:`Series` comparisons between numeric arrays and strings (:issue:`35700`,:issue:`36377`)

.. ---------------------------------------------------------------------------
Expand Down
6 changes: 5 additions & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,10 +138,14 @@ def cmp_method(self, other):
with np.errstate(all="ignore"):
result = ops.comp_method_OBJECT_ARRAY(op, self._values, other)

else:
elif is_interval_dtype(self.dtype):
with np.errstate(all="ignore"):
result = op(self._values, np.asarray(other))

else:
with np.errstate(all="ignore"):
result = ops.comparison_op(self._values, np.asarray(other), op)

if is_bool_dtype(result):
return result
return ops.invalid_comparison(self, other, op)
Expand Down
5 changes: 5 additions & 0 deletions pandas/core/ops/array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
is_bool_dtype,
is_integer_dtype,
is_list_like,
is_numeric_v_string_like,
is_object_dtype,
is_scalar,
)
Expand Down Expand Up @@ -235,6 +236,10 @@ def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike:
else:
res_values = np.zeros(lvalues.shape, dtype=bool)

elif is_numeric_v_string_like(lvalues, rvalues):
# GH#36377 going through the numexpr path would incorrectly raise
return invalid_comparison(lvalues, rvalues, op)

elif is_object_dtype(lvalues.dtype):
res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues)

Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/arithmetic/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,26 @@ def test_compare_invalid(self):
b.name = pd.Timestamp("2000-01-01")
tm.assert_series_equal(a / b, 1 / (b / a))

def test_numeric_cmp_string_numexpr_path(self, box):
# GH#36377, GH#35700
xbox = box if box is not pd.Index else np.ndarray

obj = pd.Series(np.random.randn(10 ** 5))
obj = tm.box_expected(obj, box, transpose=False)

result = obj == "a"

expected = pd.Series(np.zeros(10 ** 5, dtype=bool))
expected = tm.box_expected(expected, xbox, transpose=False)
tm.assert_equal(result, expected)

result = obj != "a"
tm.assert_equal(result, ~expected)

msg = "Invalid comparison between dtype=float64 and str"
with pytest.raises(TypeError, match=msg):
obj < "a"


# ------------------------------------------------------------------
# Numeric dtypes Arithmetic with Datetime/Timedelta Scalar
Expand Down
15 changes: 0 additions & 15 deletions pandas/tests/indexes/test_numpy_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,18 +114,3 @@ def test_numpy_ufuncs_other(index, func):
else:
with pytest.raises(Exception):
func(index)


def test_elementwise_comparison_warning():
# https://github.com/pandas-dev/pandas/issues/22698#issuecomment-458968300
# np.array([1, 2]) == 'a' returns False, and produces a
# FutureWarning that it'll be [False, False] in the future.
# We just want to ensure that comes through.
# When NumPy dev actually enforces this change, we'll need to skip
# this test.
idx = Index([1, 2])
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = idx == "a"

expected = np.array([False, False])
tm.assert_numpy_array_equal(result, expected)