From 6437e19bd7dc64a9f130bbd282d7d5ae69f54d26 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 17 Sep 2020 20:56:15 -0700 Subject: [PATCH 1/3] REGR: Series[numeric] comparison with str raising on numexpr path --- doc/source/whatsnew/v1.1.3.rst | 2 +- pandas/core/ops/array_ops.py | 5 +++++ pandas/tests/arithmetic/test_numeric.py | 22 ++++++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index 3f8413bd492ca..1d386fa372ce1 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -34,7 +34,7 @@ Fixed regressions - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a tuple (:issue:`35534`) - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a frozenset (:issue:`35747`) - Fixed regression in :meth:`read_excel` with ``engine="odf"`` caused ``UnboundLocalError`` in some cases where cells had nested child nodes (:issue:`36122`,:issue:`35802`) -- +- Fixed regression in :class:`DataFrame` and :class:`Series` comparisons between numeric arrays and strings (:issue:`35700`,:issue:`36377`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index aab10cea33632..fd5f126051c53 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -23,6 +23,7 @@ is_bool_dtype, is_integer_dtype, is_list_like, + is_numeric_v_string_like, is_object_dtype, is_scalar, ) @@ -235,6 +236,10 @@ def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike: else: res_values = np.zeros(lvalues.shape, dtype=bool) + elif is_numeric_v_string_like(lvalues, rvalues): + # GH#36377 going through the numexpr path would incorrectly raise + return invalid_comparison(lvalues, rvalues, op) + elif is_object_dtype(lvalues.dtype): res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index ecac08ffe3ba2..445cce5dcf313 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -89,6 +89,28 @@ def test_compare_invalid(self): b.name = pd.Timestamp("2000-01-01") tm.assert_series_equal(a / b, 1 / (b / a)) + def test_numeric_cmp_string_numexpr_path(self, box): + # GH#36377, GH#35700 + xbox = box if box is not pd.Index else np.ndarray + + obj = pd.Series(np.random.randn(10 ** 5)) + obj = tm.box_expected(obj, box, transpose=False) + + result = obj == "a" + + expected = pd.Series(np.zeros(10 ** 5, dtype=bool)) + expected = tm.box_expected(expected, xbox, transpose=False) + tm.assert_equal(result, expected) + + result = obj != "a" + tm.assert_equal(result, ~expected) + + msg = "Invalid comparison between dtype=float64 and str" + if box is pd.Index: + msg = "'<' not supported between instances of 'numpy.ndarray' and" + with pytest.raises(TypeError, match=msg): + obj < "a" + # ------------------------------------------------------------------ # Numeric dtypes Arithmetic with Datetime/Timedelta Scalar From af4a12cbb28d89944a5f09f65c0ab089945dafa9 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 18 Sep 2020 07:53:47 -0700 Subject: [PATCH 2/3] Suppress numpy futurewarning --- pandas/core/indexes/base.py | 2 +- pandas/tests/arithmetic/test_numeric.py | 2 -- pandas/tests/indexes/test_numpy_compat.py | 15 --------------- 3 files changed, 1 insertion(+), 18 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 222ae589ea7fc..f0f3b7cd6ad6c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -140,7 +140,7 @@ def cmp_method(self, other): else: with np.errstate(all="ignore"): - result = op(self._values, np.asarray(other)) + result = ops.comparison_op(self._values, np.asarray(other), op) if is_bool_dtype(result): return result diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 445cce5dcf313..139401bdf5806 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -106,8 +106,6 @@ def test_numeric_cmp_string_numexpr_path(self, box): tm.assert_equal(result, ~expected) msg = "Invalid comparison between dtype=float64 and str" - if box is pd.Index: - msg = "'<' not supported between instances of 'numpy.ndarray' and" with pytest.raises(TypeError, match=msg): obj < "a" diff --git a/pandas/tests/indexes/test_numpy_compat.py b/pandas/tests/indexes/test_numpy_compat.py index a83684464caf6..b71417b2a625d 100644 --- a/pandas/tests/indexes/test_numpy_compat.py +++ b/pandas/tests/indexes/test_numpy_compat.py @@ -114,18 +114,3 @@ def test_numpy_ufuncs_other(index, func): else: with pytest.raises(Exception): func(index) - - -def test_elementwise_comparison_warning(): - # https://github.com/pandas-dev/pandas/issues/22698#issuecomment-458968300 - # np.array([1, 2]) == 'a' returns False, and produces a - # FutureWarning that it'll be [False, False] in the future. - # We just want to ensure that comes through. - # When NumPy dev actually enforces this change, we'll need to skip - # this test. - idx = Index([1, 2]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = idx == "a" - - expected = np.array([False, False]) - tm.assert_numpy_array_equal(result, expected) From f0fe49be11f057abb2e7b046fc39c3ba0fb6173f Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 18 Sep 2020 10:19:54 -0700 Subject: [PATCH 3/3] special case for IntervalIndex --- pandas/core/indexes/base.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f0f3b7cd6ad6c..5ca51ca496b80 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -138,6 +138,10 @@ def cmp_method(self, other): with np.errstate(all="ignore"): result = ops.comp_method_OBJECT_ARRAY(op, self._values, other) + elif is_interval_dtype(self.dtype): + with np.errstate(all="ignore"): + result = op(self._values, np.asarray(other)) + else: with np.errstate(all="ignore"): result = ops.comparison_op(self._values, np.asarray(other), op)