From a36988b30d0e43a0e0c6edec37a17dcdb6bf310a Mon Sep 17 00:00:00 2001 From: sinhrks Date: Sun, 12 Jul 2015 10:27:14 +0900 Subject: [PATCH] BUG: pd.eval with numexpr engine coerces 1 element numpy array to scalar --- doc/source/whatsnew/v0.17.0.txt | 1 + pandas/computation/align.py | 17 +++---- pandas/computation/tests/test_eval.py | 70 +++++++++++++++++++-------- 3 files changed, 60 insertions(+), 28 deletions(-) diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index b2a1e10469a0f..6ab299eb70eb5 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -366,6 +366,7 @@ Bug Fixes - Bug that caused segfault when resampling an empty Series (:issue:`10228`) - Bug in ``DatetimeIndex`` and ``PeriodIndex.value_counts`` resets name from its result, but retains in result's ``Index``. (:issue:`10150`) +- Bug in `pd.eval` using ``numexpr`` engine coerces 1 element numpy array to scalar (:issue:`10546`) - Bug in `pandas.concat` with ``axis=0`` when column is of dtype ``category`` (:issue:`10177`) - Bug in ``read_msgpack`` where input type is not always checked (:issue:`10369`) - Bug in `pandas.read_csv` with kwargs ``index_col=False``, ``index_col=['a', 'b']`` or ``dtype`` diff --git a/pandas/computation/align.py b/pandas/computation/align.py index 2e0845bddf7e2..9834dd1a9e7fc 100644 --- a/pandas/computation/align.py +++ b/pandas/computation/align.py @@ -172,12 +172,11 @@ def _reconstruct_object(typ, obj, axes, dtype): ret_value = res_t.type(obj) else: ret_value = typ(obj).astype(res_t) - - try: - ret = ret_value.item() - except (ValueError, IndexError): - # XXX: we catch IndexError to absorb a - # regression in numpy 1.7.0 - # fixed by numpy/numpy@04b89c63 - ret = ret_value - return ret + # The condition is to distinguish 0-dim array (returned in case of scalar) + # and 1 element array + # e.g. np.array(0) and np.array([0]) + if len(obj.shape) == 1 and len(obj) == 1: + if not isinstance(ret_value, np.ndarray): + ret_value = np.array([ret_value]).astype(res_t) + + return ret_value diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py index 61bc40e34b5a3..d455d9d0d8679 100644 --- a/pandas/computation/tests/test_eval.py +++ b/pandas/computation/tests/test_eval.py @@ -10,7 +10,7 @@ from numpy.random import randn, rand, randint import numpy as np -from numpy.testing import assert_array_equal, assert_allclose +from numpy.testing import assert_allclose from numpy.testing.decorators import slow import pandas as pd @@ -220,7 +220,7 @@ def check_complex_cmp_op(self, lhs, cmp1, rhs, binop, cmp2): expected = _eval_single_bin( lhs_new, binop, rhs_new, self.engine) result = pd.eval(ex, engine=self.engine, parser=self.parser) - assert_array_equal(result, expected) + tm.assert_numpy_array_equivalent(result, expected) def check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs): skip_these = _scalar_skip @@ -240,7 +240,7 @@ def check_operands(left, right, cmp_op): for ex in (ex1, ex2, ex3): result = pd.eval(ex, engine=self.engine, parser=self.parser) - assert_array_equal(result, expected) + tm.assert_numpy_array_equivalent(result, expected) def check_simple_cmp_op(self, lhs, cmp1, rhs): ex = 'lhs {0} rhs'.format(cmp1) @@ -251,13 +251,13 @@ def check_simple_cmp_op(self, lhs, cmp1, rhs): else: expected = _eval_single_bin(lhs, cmp1, rhs, self.engine) result = pd.eval(ex, engine=self.engine, parser=self.parser) - assert_array_equal(result, expected) + tm.assert_numpy_array_equivalent(result, expected) def check_binary_arith_op(self, lhs, arith1, rhs): ex = 'lhs {0} rhs'.format(arith1) result = pd.eval(ex, engine=self.engine, parser=self.parser) expected = _eval_single_bin(lhs, arith1, rhs, self.engine) - assert_array_equal(result, expected) + tm.assert_numpy_array_equivalent(result, expected) ex = 'lhs {0} rhs {0} rhs'.format(arith1) result = pd.eval(ex, engine=self.engine, parser=self.parser) nlhs = _eval_single_bin(lhs, arith1, rhs, @@ -273,7 +273,7 @@ def check_alignment(self, result, nlhs, ghs, op): pass else: expected = self.ne.evaluate('nlhs {0} ghs'.format(op)) - assert_array_equal(result, expected) + tm.assert_numpy_array_equivalent(result, expected) # modulus, pow, and floor division require special casing @@ -291,7 +291,7 @@ def check_floor_division(self, lhs, arith1, rhs): if self.engine == 'python': res = pd.eval(ex, engine=self.engine, parser=self.parser) expected = lhs // rhs - assert_array_equal(res, expected) + tm.assert_numpy_array_equivalent(res, expected) else: self.assertRaises(TypeError, pd.eval, ex, local_dict={'lhs': lhs, 'rhs': rhs}, @@ -325,7 +325,7 @@ def check_pow(self, lhs, arith1, rhs): if (np.isscalar(lhs) and np.isscalar(rhs) and _is_py3_complex_incompat(result, expected)): - self.assertRaises(AssertionError, assert_array_equal, result, + self.assertRaises(AssertionError, tm.assert_numpy_array_equivalent, result, expected) else: assert_allclose(result, expected) @@ -345,11 +345,11 @@ def check_single_invert_op(self, lhs, cmp1, rhs): elb = np.array([bool(el)]) expected = ~elb result = pd.eval('~elb', engine=self.engine, parser=self.parser) - assert_array_equal(expected, result) + tm.assert_numpy_array_equivalent(expected, result) for engine in self.current_engines: tm.skip_if_no_ne(engine) - assert_array_equal(result, pd.eval('~elb', engine=engine, + tm.assert_numpy_array_equivalent(result, pd.eval('~elb', engine=engine, parser=self.parser)) def check_compound_invert_op(self, lhs, cmp1, rhs): @@ -370,13 +370,13 @@ def check_compound_invert_op(self, lhs, cmp1, rhs): else: expected = ~expected result = pd.eval(ex, engine=self.engine, parser=self.parser) - assert_array_equal(expected, result) + tm.assert_numpy_array_equivalent(expected, result) # make sure the other engines work the same as this one for engine in self.current_engines: tm.skip_if_no_ne(engine) ev = pd.eval(ex, engine=self.engine, parser=self.parser) - assert_array_equal(ev, result) + tm.assert_numpy_array_equivalent(ev, result) def ex(self, op, var_name='lhs'): return '{0}{1}'.format(op, var_name) @@ -620,6 +620,38 @@ def test_disallow_scalar_bool_ops(self): with tm.assertRaises(NotImplementedError): pd.eval(ex, engine=self.engine, parser=self.parser) + def test_identical(self): + # GH 10546 + x = 1 + result = pd.eval('x', engine=self.engine, parser=self.parser) + self.assertEqual(result, 1) + self.assertTrue(np.isscalar(result)) + + x = 1.5 + result = pd.eval('x', engine=self.engine, parser=self.parser) + self.assertEqual(result, 1.5) + self.assertTrue(np.isscalar(result)) + + x = False + result = pd.eval('x', engine=self.engine, parser=self.parser) + self.assertEqual(result, False) + self.assertTrue(np.isscalar(result)) + + x = np.array([1]) + result = pd.eval('x', engine=self.engine, parser=self.parser) + tm.assert_numpy_array_equivalent(result, np.array([1])) + self.assertEqual(result.shape, (1, )) + + x = np.array([1.5]) + result = pd.eval('x', engine=self.engine, parser=self.parser) + tm.assert_numpy_array_equivalent(result, np.array([1.5])) + self.assertEqual(result.shape, (1, )) + + x = np.array([False]) + result = pd.eval('x', engine=self.engine, parser=self.parser) + tm.assert_numpy_array_equivalent(result, np.array([False])) + self.assertEqual(result.shape, (1, )) + class TestEvalNumexprPython(TestEvalNumexprPandas): @@ -675,7 +707,7 @@ def check_alignment(self, result, nlhs, ghs, op): pass else: expected = eval('nlhs {0} ghs'.format(op)) - assert_array_equal(result, expected) + tm.assert_numpy_array_equivalent(result, expected) class TestEvalPythonPandas(TestEvalPythonPython): @@ -1086,10 +1118,10 @@ def test_truediv(self): if PY3: res = self.eval(ex, truediv=False) - assert_array_equal(res, np.array([1.0])) + tm.assert_numpy_array_equivalent(res, np.array([1.0])) res = self.eval(ex, truediv=True) - assert_array_equal(res, np.array([1.0])) + tm.assert_numpy_array_equivalent(res, np.array([1.0])) res = self.eval('1 / 2', truediv=True) expec = 0.5 @@ -1108,10 +1140,10 @@ def test_truediv(self): self.assertEqual(res, expec) else: res = self.eval(ex, truediv=False) - assert_array_equal(res, np.array([1])) + tm.assert_numpy_array_equivalent(res, np.array([1])) res = self.eval(ex, truediv=True) - assert_array_equal(res, np.array([1.0])) + tm.assert_numpy_array_equivalent(res, np.array([1.0])) res = self.eval('1 / 2', truediv=True) expec = 0.5 @@ -1414,8 +1446,8 @@ class TestScope(object): def check_global_scope(self, e, engine, parser): tm.skip_if_no_ne(engine) - assert_array_equal(_var_s * 2, pd.eval(e, engine=engine, - parser=parser)) + tm.assert_numpy_array_equivalent(_var_s * 2, pd.eval(e, engine=engine, + parser=parser)) def test_global_scope(self): e = '_var_s * 2'