Merge pull request #10558 from sinhrks/numexpr_0dim

sinhrks · sinhrks · commit d25a9f38119f · 2015-07-21T06:19:08.000+09:00
BUG: pd.eval with numexpr engine coerces 1 element numpy array to scalar
diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
@@ -366,6 +366,7 @@ Bug Fixes
 
 - Bug that caused segfault when resampling an empty Series (:issue:`10228`)
 - Bug in ``DatetimeIndex`` and ``PeriodIndex.value_counts`` resets name from its result, but retains in result's ``Index``. (:issue:`10150`)
+- Bug in `pd.eval` using ``numexpr`` engine coerces 1 element numpy array to scalar (:issue:`10546`)
 - Bug in `pandas.concat` with ``axis=0`` when column is of dtype ``category`` (:issue:`10177`)
 - Bug in ``read_msgpack`` where input type is not always checked (:issue:`10369`)
 - Bug in `pandas.read_csv` with kwargs ``index_col=False``, ``index_col=['a', 'b']`` or ``dtype``
diff --git a/pandas/computation/align.py b/pandas/computation/align.py
@@ -172,12 +172,11 @@ def _reconstruct_object(typ, obj, axes, dtype):
         ret_value = res_t.type(obj)
     else:
         ret_value = typ(obj).astype(res_t)
-
-    try:
-        ret = ret_value.item()
-    except (ValueError, IndexError):
-        # XXX: we catch IndexError to absorb a
-        # regression in numpy 1.7.0
-        # fixed by numpy/numpy@04b89c63
-        ret = ret_value
-    return ret
+        # The condition is to distinguish 0-dim array (returned in case of scalar)
+        # and 1 element array
+        # e.g. np.array(0) and np.array([0])
+        if len(obj.shape) == 1 and len(obj) == 1:
+            if not isinstance(ret_value, np.ndarray):
+                ret_value = np.array([ret_value]).astype(res_t)
+
+    return ret_value
diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py
@@ -10,7 +10,7 @@
 
 from numpy.random import randn, rand, randint
 import numpy as np
-from numpy.testing import assert_array_equal, assert_allclose
+from numpy.testing import assert_allclose
 from numpy.testing.decorators import slow
 
 import pandas as pd
@@ -220,7 +220,7 @@ def check_complex_cmp_op(self, lhs, cmp1, rhs, binop, cmp2):
                 expected = _eval_single_bin(
                     lhs_new, binop, rhs_new, self.engine)
                 result = pd.eval(ex, engine=self.engine, parser=self.parser)
-                assert_array_equal(result, expected)
+                tm.assert_numpy_array_equivalent(result, expected)
 
     def check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs):
         skip_these = _scalar_skip
@@ -240,7 +240,7 @@ def check_operands(left, right, cmp_op):
             for ex in (ex1, ex2, ex3):
                 result = pd.eval(ex, engine=self.engine,
                                  parser=self.parser)
-                assert_array_equal(result, expected)
+                tm.assert_numpy_array_equivalent(result, expected)
 
     def check_simple_cmp_op(self, lhs, cmp1, rhs):
         ex = 'lhs {0} rhs'.format(cmp1)
@@ -251,13 +251,13 @@ def check_simple_cmp_op(self, lhs, cmp1, rhs):
         else:
             expected = _eval_single_bin(lhs, cmp1, rhs, self.engine)
             result = pd.eval(ex, engine=self.engine, parser=self.parser)
-            assert_array_equal(result, expected)
+            tm.assert_numpy_array_equivalent(result, expected)
 
     def check_binary_arith_op(self, lhs, arith1, rhs):
         ex = 'lhs {0} rhs'.format(arith1)
         result = pd.eval(ex, engine=self.engine, parser=self.parser)
         expected = _eval_single_bin(lhs, arith1, rhs, self.engine)
-        assert_array_equal(result, expected)
+        tm.assert_numpy_array_equivalent(result, expected)
         ex = 'lhs {0} rhs {0} rhs'.format(arith1)
         result = pd.eval(ex, engine=self.engine, parser=self.parser)
         nlhs = _eval_single_bin(lhs, arith1, rhs,
@@ -273,7 +273,7 @@ def check_alignment(self, result, nlhs, ghs, op):
             pass
         else:
             expected = self.ne.evaluate('nlhs {0} ghs'.format(op))
-            assert_array_equal(result, expected)
+            tm.assert_numpy_array_equivalent(result, expected)
 
     # modulus, pow, and floor division require special casing
 
@@ -291,7 +291,7 @@ def check_floor_division(self, lhs, arith1, rhs):
         if self.engine == 'python':
             res = pd.eval(ex, engine=self.engine, parser=self.parser)
             expected = lhs // rhs
-            assert_array_equal(res, expected)
+            tm.assert_numpy_array_equivalent(res, expected)
         else:
             self.assertRaises(TypeError, pd.eval, ex, local_dict={'lhs': lhs,
                                                                   'rhs': rhs},
@@ -325,7 +325,7 @@ def check_pow(self, lhs, arith1, rhs):
 
         if (np.isscalar(lhs) and np.isscalar(rhs) and
                 _is_py3_complex_incompat(result, expected)):
-            self.assertRaises(AssertionError, assert_array_equal, result,
+            self.assertRaises(AssertionError, tm.assert_numpy_array_equivalent, result,
                               expected)
         else:
             assert_allclose(result, expected)
@@ -345,11 +345,11 @@ def check_single_invert_op(self, lhs, cmp1, rhs):
                 elb = np.array([bool(el)])
             expected = ~elb
             result = pd.eval('~elb', engine=self.engine, parser=self.parser)
-            assert_array_equal(expected, result)
+            tm.assert_numpy_array_equivalent(expected, result)
 
             for engine in self.current_engines:
                 tm.skip_if_no_ne(engine)
-                assert_array_equal(result, pd.eval('~elb', engine=engine,
+                tm.assert_numpy_array_equivalent(result, pd.eval('~elb', engine=engine,
                                                    parser=self.parser))
 
     def check_compound_invert_op(self, lhs, cmp1, rhs):
@@ -370,13 +370,13 @@ def check_compound_invert_op(self, lhs, cmp1, rhs):
             else:
                 expected = ~expected
             result = pd.eval(ex, engine=self.engine, parser=self.parser)
-            assert_array_equal(expected, result)
+            tm.assert_numpy_array_equivalent(expected, result)
 
             # make sure the other engines work the same as this one
             for engine in self.current_engines:
                 tm.skip_if_no_ne(engine)
                 ev = pd.eval(ex, engine=self.engine, parser=self.parser)
-                assert_array_equal(ev, result)
+                tm.assert_numpy_array_equivalent(ev, result)
 
     def ex(self, op, var_name='lhs'):
         return '{0}{1}'.format(op, var_name)
@@ -620,6 +620,38 @@ def test_disallow_scalar_bool_ops(self):
             with tm.assertRaises(NotImplementedError):
                 pd.eval(ex, engine=self.engine, parser=self.parser)
 
+    def test_identical(self):
+        # GH 10546
+        x = 1
+        result = pd.eval('x', engine=self.engine, parser=self.parser)
+        self.assertEqual(result, 1)
+        self.assertTrue(np.isscalar(result))
+
+        x = 1.5
+        result = pd.eval('x', engine=self.engine, parser=self.parser)
+        self.assertEqual(result, 1.5)
+        self.assertTrue(np.isscalar(result))
+
+        x = False
+        result = pd.eval('x', engine=self.engine, parser=self.parser)
+        self.assertEqual(result, False)
+        self.assertTrue(np.isscalar(result))
+
+        x = np.array([1])
+        result = pd.eval('x', engine=self.engine, parser=self.parser)
+        tm.assert_numpy_array_equivalent(result, np.array([1]))
+        self.assertEqual(result.shape, (1, ))
+
+        x = np.array([1.5])
+        result = pd.eval('x', engine=self.engine, parser=self.parser)
+        tm.assert_numpy_array_equivalent(result, np.array([1.5]))
+        self.assertEqual(result.shape, (1, ))
+
+        x = np.array([False])
+        result = pd.eval('x', engine=self.engine, parser=self.parser)
+        tm.assert_numpy_array_equivalent(result, np.array([False]))
+        self.assertEqual(result.shape, (1, ))
+
 
 class TestEvalNumexprPython(TestEvalNumexprPandas):
 
@@ -675,7 +707,7 @@ def check_alignment(self, result, nlhs, ghs, op):
             pass
         else:
             expected = eval('nlhs {0} ghs'.format(op))
-            assert_array_equal(result, expected)
+            tm.assert_numpy_array_equivalent(result, expected)
 
 
 class TestEvalPythonPandas(TestEvalPythonPython):
@@ -1086,10 +1118,10 @@ def test_truediv(self):
 
         if PY3:
             res = self.eval(ex, truediv=False)
-            assert_array_equal(res, np.array([1.0]))
+            tm.assert_numpy_array_equivalent(res, np.array([1.0]))
 
             res = self.eval(ex, truediv=True)
-            assert_array_equal(res, np.array([1.0]))
+            tm.assert_numpy_array_equivalent(res, np.array([1.0]))
 
             res = self.eval('1 / 2', truediv=True)
             expec = 0.5
@@ -1108,10 +1140,10 @@ def test_truediv(self):
             self.assertEqual(res, expec)
         else:
             res = self.eval(ex, truediv=False)
-            assert_array_equal(res, np.array([1]))
+            tm.assert_numpy_array_equivalent(res, np.array([1]))
 
             res = self.eval(ex, truediv=True)
-            assert_array_equal(res, np.array([1.0]))
+            tm.assert_numpy_array_equivalent(res, np.array([1.0]))
 
             res = self.eval('1 / 2', truediv=True)
             expec = 0.5
@@ -1414,8 +1446,8 @@ class TestScope(object):
 
     def check_global_scope(self, e, engine, parser):
         tm.skip_if_no_ne(engine)
-        assert_array_equal(_var_s * 2, pd.eval(e, engine=engine,
-                                               parser=parser))
+        tm.assert_numpy_array_equivalent(_var_s * 2, pd.eval(e, engine=engine,
+                                         parser=parser))
 
     def test_global_scope(self):
         e = '_var_s * 2'