From 495ce187b0aa108c1d2f12ae16d7f57206545b7d Mon Sep 17 00:00:00 2001 From: Connor Charles Date: Mon, 19 Aug 2019 18:20:23 +0100 Subject: [PATCH] Backport PR #27773: BUG: _can_use_numexpr fails when passed large Series --- doc/source/whatsnew/v0.25.1.rst | 2 +- pandas/core/computation/expressions.py | 7 ++++--- pandas/tests/test_expressions.py | 29 ++++++++++++++++++++++++-- 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index 87e46f97d3157..cec927d73edca 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -54,7 +54,7 @@ Numeric ^^^^^^^ - Bug in :meth:`Series.interpolate` when using a timezone aware :class:`DatetimeIndex` (:issue:`27548`) - Bug when printing negative floating point complex numbers would raise an ``IndexError`` (:issue:`27484`) -- +- Bug where :class:`DataFrame` arithmetic operators such as :meth:`DataFrame.mul` with a :class:`Series` with axis=1 would raise an ``AttributeError`` on :class:`DataFrame` larger than the minimum threshold to invoke numexpr (:issue:`27636`) - Conversion diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index ea61467080291..9621fb1d65509 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -76,16 +76,17 @@ def _can_use_numexpr(op, op_str, a, b, dtype_check): # required min elements (otherwise we are adding overhead) if np.prod(a.shape) > _MIN_ELEMENTS: - # check for dtype compatibility dtypes = set() for o in [a, b]: - if hasattr(o, "dtypes"): + # Series implements dtypes, check for dimension count as well + if hasattr(o, "dtypes") and o.ndim > 1: s = o.dtypes.value_counts() if len(s) > 1: return False dtypes |= set(s.index.astype(str)) - elif isinstance(o, np.ndarray): + # ndarray and Series Case + elif hasattr(o, "dtype"): dtypes |= {o.dtype.name} # allowed are a superset diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 4070624985068..ca514f62f451d 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -66,7 +66,7 @@ def run_arithmetic(self, df, other, assert_func, check_dtype=False, test_flex=Tr operator_name = "truediv" if test_flex: - op = lambda x, y: getattr(df, arith)(y) + op = lambda x, y: getattr(x, arith)(y) op.__name__ = arith else: op = getattr(operator, operator_name) @@ -318,7 +318,6 @@ def testit(): for f in [self.frame, self.frame2, self.mixed, self.mixed2]: for cond in [True, False]: - c = np.empty(f.shape, dtype=np.bool_) c.fill(cond) result = expr.where(c, f.values, f.values + 1) @@ -431,3 +430,29 @@ def test_bool_ops_column_name_dtype(self, test_input, expected): # GH 22383 - .ne fails if columns containing column name 'dtype' result = test_input.loc[:, ["a", "dtype"]].ne(test_input.loc[:, ["a", "dtype"]]) assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "arith", ("add", "sub", "mul", "mod", "truediv", "floordiv") + ) + @pytest.mark.parametrize("axis", (0, 1)) + def test_frame_series_axis(self, axis, arith): + # GH#26736 Dataframe.floordiv(Series, axis=1) fails + if axis == 1 and arith == "floordiv": + pytest.xfail("'floordiv' does not succeed with axis=1 #27636") + + df = self.frame + if axis == 1: + other = self.frame.iloc[0, :] + else: + other = self.frame.iloc[:, 0] + + expr._MIN_ELEMENTS = 0 + + op_func = getattr(df, arith) + + expr.set_use_numexpr(False) + expected = op_func(other, axis=axis) + expr.set_use_numexpr(True) + + result = op_func(other, axis=axis) + assert_frame_equal(expected, result)