From 1a23578cd6f439e439ffcdb970f03c6dbb2a809b Mon Sep 17 00:00:00 2001 From: Connor Charles Date: Tue, 6 Aug 2019 11:06:29 +0100 Subject: [PATCH 1/4] BUG: _can_use_numexpr did not handle Series case correctly --- doc/source/whatsnew/v0.25.1.rst | 2 +- pandas/core/computation/expressions.py | 2 +- pandas/tests/test_expressions.py | 29 ++++++++++++++++++++++++-- 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index dfa216b1db56e..21f8f33e2b439 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -54,7 +54,7 @@ Numeric ^^^^^^^ - Bug in :meth:`Series.interpolate` when using a timezone aware :class:`DatetimeIndex` (:issue:`27548`) - Bug when printing negative floating point complex numbers would raise an ``IndexError`` (:issue:`27484`) -- +- Bug where :class:`DataFrame` arithmetic operators such as :meth:`DataFrame.mul` with a :class:`Series` with axis=1 would raise an ``AttributeError`` on :class:`DataFrame` larger than the minimum threshold to invoke numexpr (:issue:`27636`) - Conversion diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index d9dc194d484ae..d6c7ae018ebfe 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -80,7 +80,7 @@ def _can_use_numexpr(op, op_str, a, b, dtype_check): # check for dtype compatibility dtypes = set() for o in [a, b]: - if hasattr(o, "dtypes"): + if hasattr(o, "dtypes") and o.ndim > 1: s = o.dtypes.value_counts() if len(s) > 1: return False diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 4070624985068..ca514f62f451d 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -66,7 +66,7 @@ def run_arithmetic(self, df, other, assert_func, check_dtype=False, test_flex=Tr operator_name = "truediv" if test_flex: - op = lambda x, y: getattr(df, arith)(y) + op = lambda x, y: getattr(x, arith)(y) op.__name__ = arith else: op = getattr(operator, operator_name) @@ -318,7 +318,6 @@ def testit(): for f in [self.frame, self.frame2, self.mixed, self.mixed2]: for cond in [True, False]: - c = np.empty(f.shape, dtype=np.bool_) c.fill(cond) result = expr.where(c, f.values, f.values + 1) @@ -431,3 +430,29 @@ def test_bool_ops_column_name_dtype(self, test_input, expected): # GH 22383 - .ne fails if columns containing column name 'dtype' result = test_input.loc[:, ["a", "dtype"]].ne(test_input.loc[:, ["a", "dtype"]]) assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "arith", ("add", "sub", "mul", "mod", "truediv", "floordiv") + ) + @pytest.mark.parametrize("axis", (0, 1)) + def test_frame_series_axis(self, axis, arith): + # GH#26736 Dataframe.floordiv(Series, axis=1) fails + if axis == 1 and arith == "floordiv": + pytest.xfail("'floordiv' does not succeed with axis=1 #27636") + + df = self.frame + if axis == 1: + other = self.frame.iloc[0, :] + else: + other = self.frame.iloc[:, 0] + + expr._MIN_ELEMENTS = 0 + + op_func = getattr(df, arith) + + expr.set_use_numexpr(False) + expected = op_func(other, axis=axis) + expr.set_use_numexpr(True) + + result = op_func(other, axis=axis) + assert_frame_equal(expected, result) From 6d212c705d3378b7e81442e0e1f1b24c72dc4e0c Mon Sep 17 00:00:00 2001 From: Connor Charles Date: Mon, 12 Aug 2019 21:27:47 +0100 Subject: [PATCH 2/4] Handle Series case correctly Added comments to remind of Series case --- pandas/core/computation/expressions.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index d6c7ae018ebfe..1787cf5d72067 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -80,11 +80,15 @@ def _can_use_numexpr(op, op_str, a, b, dtype_check): # check for dtype compatibility dtypes = set() for o in [a, b]: - if hasattr(o, "dtypes") and o.ndim > 1: - s = o.dtypes.value_counts() - if len(s) > 1: - return False - dtypes |= set(s.index.astype(str)) + if hasattr(o, "dtypes"): + # Series implements dtypes, check for dimension count + if o.ndim > 1: + s = o.dtypes.value_counts() + if len(s) > 1: + return False + dtypes |= set(s.index.astype(str)) + else: + dtypes |= {o.dtypes.name} elif isinstance(o, np.ndarray): dtypes |= {o.dtype.name} From e16e8932640ce0e0c2b3c3dc453f30347fc12e2e Mon Sep 17 00:00:00 2001 From: Connor Charles Date: Wed, 14 Aug 2019 17:56:30 +0100 Subject: [PATCH 3/4] Refactored so Series case uses dtype attribute --- pandas/core/computation/expressions.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index 1787cf5d72067..1f692da9af29e 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -80,16 +80,14 @@ def _can_use_numexpr(op, op_str, a, b, dtype_check): # check for dtype compatibility dtypes = set() for o in [a, b]: - if hasattr(o, "dtypes"): - # Series implements dtypes, check for dimension count - if o.ndim > 1: - s = o.dtypes.value_counts() - if len(s) > 1: - return False - dtypes |= set(s.index.astype(str)) - else: - dtypes |= {o.dtypes.name} - elif isinstance(o, np.ndarray): + # Series implements dtypes, check for dimension count as well + if hasattr(o, "dtypes") and o.ndim > 1: + s = o.dtypes.value_counts() + if len(s) > 1: + return False + dtypes |= set(s.index.astype(str)) + # ndarray and Series Case + elif hasattr(o, "dtype"): dtypes |= {o.dtype.name} # allowed are a superset @@ -183,7 +181,7 @@ def _has_bool_dtype(x): def _bool_arith_check( - op_str, a, b, not_allowed=frozenset(("/", "//", "**")), unsupported=None + op_str, a, b, not_allowed=frozenset(("/", "//", "**")), unsupported=None ): if unsupported is None: unsupported = {"+": "|", "*": "&", "-": "^"} From 0100aba65a833542f78d2e9e410b1019f266158b Mon Sep 17 00:00:00 2001 From: Connor Charles Date: Wed, 14 Aug 2019 17:59:33 +0100 Subject: [PATCH 4/4] Reformatted code --- pandas/core/computation/expressions.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index 1f692da9af29e..1959242a88897 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -76,7 +76,6 @@ def _can_use_numexpr(op, op_str, a, b, dtype_check): # required min elements (otherwise we are adding overhead) if np.prod(a.shape) > _MIN_ELEMENTS: - # check for dtype compatibility dtypes = set() for o in [a, b]: @@ -181,7 +180,7 @@ def _has_bool_dtype(x): def _bool_arith_check( - op_str, a, b, not_allowed=frozenset(("/", "//", "**")), unsupported=None + op_str, a, b, not_allowed=frozenset(("/", "//", "**")), unsupported=None ): if unsupported is None: unsupported = {"+": "|", "*": "&", "-": "^"}