COMPAT: do not upcast results to float64 when float32 scalar *+/- float64 array

jennolsen84 · jreback · commit cc1025a62019 · 2016-05-30T10:39:18.000-04:00
closes pandas-dev#12388 Author: Jenn Olsen <jennolsen84@gmail.com> Closes pandas-dev#12559 from jennolsen84/noevalupcast and squashes the following commits: 3f61252 [Jenn Olsen] do not upcast to float64 everytime
diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt
@@ -89,6 +89,7 @@ Other enhancements
 
 - ``pd.read_html()`` has gained support for the ``decimal`` option (:issue:`12907`)
 
+- ``eval``'s upcasting rules for ``float32`` types have been updated to be more consistent with NumPy's rules.  New behavior will not upcast to ``float64`` if you multiply a pandas ``float32`` object by a scalar float64. (:issue:`12388`)
 
 
 .. _whatsnew_0182.api:
diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py
@@ -5,6 +5,7 @@
 import tokenize
 
 from functools import partial
+import numpy as np
 
 import pandas as pd
 from pandas import compat
@@ -356,6 +357,19 @@ def _possibly_transform_eq_ne(self, node, left=None, right=None):
                                                                 right)
         return op, op_class, left, right
 
+    def _possibly_downcast_constants(self, left, right):
+        f32 = np.dtype(np.float32)
+        if left.isscalar and not right.isscalar and right.return_type == f32:
+            # right is a float32 array, left is a scalar
+            name = self.env.add_tmp(np.float32(left.value))
+            left = self.term_type(name, self.env)
+        if right.isscalar and not left.isscalar and left.return_type == f32:
+            # left is a float32 array, right is a scalar
+            name = self.env.add_tmp(np.float32(right.value))
+            right = self.term_type(name, self.env)
+
+        return left, right
+
     def _possibly_eval(self, binop, eval_in_python):
         # eval `in` and `not in` (for now) in "partial" python space
         # things that can be evaluated in "eval" space will be turned into
@@ -399,6 +413,7 @@ def _possibly_evaluate_binop(self, op, op_class, lhs, rhs,
 
     def visit_BinOp(self, node, **kwargs):
         op, op_class, left, right = self._possibly_transform_eq_ne(node)
+        left, right = self._possibly_downcast_constants(left, right)
         return self._possibly_evaluate_binop(op, op_class, left, right)
 
     def visit_Div(self, node, **kwargs):
diff --git a/pandas/computation/ops.py b/pandas/computation/ops.py
@@ -276,18 +276,26 @@ def _not_in(x, y):
     _binary_ops_dict.update(d)
 
 
-def _cast_inplace(terms, dtype):
+def _cast_inplace(terms, acceptable_dtypes, dtype):
     """Cast an expression inplace.
 
     Parameters
     ----------
     terms : Op
         The expression that should cast.
+    acceptable_dtypes : list of acceptable numpy.dtype
+        Will not cast if term's dtype in this list.
+
+        .. versionadded:: 0.18.2
+
     dtype : str or numpy.dtype
         The dtype to cast to.
     """
     dt = np.dtype(dtype)
     for term in terms:
+        if term.type in acceptable_dtypes:
+            continue
+
         try:
             new_value = term.value.astype(dt)
         except AttributeError:
@@ -452,7 +460,9 @@ def __init__(self, lhs, rhs, truediv, *args, **kwargs):
                                                       rhs.return_type))
 
         if truediv or PY3:
-            _cast_inplace(com.flatten(self), np.float_)
+            # do not upcast float32s to float64 un-necessarily
+            acceptable_dtypes = [np.float32, np.float_]
+            _cast_inplace(com.flatten(self), acceptable_dtypes, np.float_)
 
 
 _unary_ops_syms = '+', '-', '~', 'not'
diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py
@@ -749,6 +749,35 @@ def check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs):
 
 ENGINES_PARSERS = list(product(_engines, expr._parsers))
 
+#-------------------------------------
+# typecasting rules consistency with python
+# issue #12388
+
+class TestTypeCasting(tm.TestCase):
+
+    def check_binop_typecasting(self, engine, parser, op, dt):
+        tm.skip_if_no_ne(engine)
+        df = mkdf(5, 3, data_gen_f=f, dtype=dt)
+        s = 'df {} 3'.format(op)
+        res = pd.eval(s, engine=engine, parser=parser)
+        self.assertTrue(df.values.dtype == dt)
+        self.assertTrue(res.values.dtype == dt)
+        assert_frame_equal(res, eval(s))
+
+        s = '3 {} df'.format(op)
+        res = pd.eval(s, engine=engine, parser=parser)
+        self.assertTrue(df.values.dtype == dt)
+        self.assertTrue(res.values.dtype == dt)
+        assert_frame_equal(res, eval(s))
+
+    def test_binop_typecasting(self):
+        for engine, parser in ENGINES_PARSERS:
+            for op in ['+', '-', '*', '**', '/']:
+                # maybe someday... numexpr has too many upcasting rules now
+                #for dt in chain(*(np.sctypes[x] for x in ['uint', 'int', 'float'])):
+                for dt in [np.float32, np.float64]:
+                    yield self.check_binop_typecasting, engine, parser, op, dt
+
 
 #-------------------------------------
 # basic and complex alignment

Original file line number	Diff line number	Diff line change
`@@ -89,6 +89,7 @@ Other enhancements`
`89`	`89`
`90`	`90`	- ``pd.read_html()`` has gained support for the ``decimal`` option (:issue:`12907`)
`91`	`91`
	`92`	+- ``eval``'s upcasting rules for ``float32`` types have been updated to be more consistent with NumPy's rules. New behavior will not upcast to ``float64`` if you multiply a pandas ``float32`` object by a scalar float64. (:issue:`12388`)
`92`	`93`
`93`	`94`
`94`	`95`	`.. _whatsnew_0182.api:`