diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt index dfb5ebc9379b1..6421f31c0267e 100644 --- a/doc/source/whatsnew/v0.18.2.txt +++ b/doc/source/whatsnew/v0.18.2.txt @@ -89,6 +89,7 @@ Other enhancements - ``pd.read_html()`` has gained support for the ``decimal`` option (:issue:`12907`) +- ``eval``'s upcasting rules for ``float32`` types have also been updated to be more consistent with numpy's rules. New behavior will not upcast to ``float64`` if you multiply a pandas ``float32`` object by a scalar float64. (:issue:`12388`) .. _whatsnew_0182.api: diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py index 01d0fa664ac41..f1cf210754d12 100644 --- a/pandas/computation/expr.py +++ b/pandas/computation/expr.py @@ -5,6 +5,7 @@ import tokenize from functools import partial +import numpy as np import pandas as pd from pandas import compat @@ -356,6 +357,19 @@ def _possibly_transform_eq_ne(self, node, left=None, right=None): right) return op, op_class, left, right + def _possibly_downcast_constants(self, left, right): + f32 = np.dtype(np.float32) + if left.isscalar and not right.isscalar and right.return_type == f32: + # right is a float32 array, left is a scalar + name = self.env.add_tmp(np.float32(left.value)) + left = self.term_type(name, self.env) + if right.isscalar and not left.isscalar and left.return_type == f32: + # left is a float32 array, right is a scalar + name = self.env.add_tmp(np.float32(right.value)) + right = self.term_type(name, self.env) + + return left, right + def _possibly_eval(self, binop, eval_in_python): # eval `in` and `not in` (for now) in "partial" python space # things that can be evaluated in "eval" space will be turned into @@ -399,6 +413,7 @@ def _possibly_evaluate_binop(self, op, op_class, lhs, rhs, def visit_BinOp(self, node, **kwargs): op, op_class, left, right = self._possibly_transform_eq_ne(node) + left, right = self._possibly_downcast_constants(left, right) return self._possibly_evaluate_binop(op, op_class, left, right) def visit_Div(self, node, **kwargs): diff --git a/pandas/computation/ops.py b/pandas/computation/ops.py index 603c030dcaa6e..20976b82452dc 100644 --- a/pandas/computation/ops.py +++ b/pandas/computation/ops.py @@ -276,18 +276,25 @@ def _not_in(x, y): _binary_ops_dict.update(d) -def _cast_inplace(terms, dtype): +def _cast_inplace(terms, acceptable_dtypes, dtype): """Cast an expression inplace. + .. versionadded:: 0.18.2 + Parameters ---------- terms : Op The expression that should cast. + acceptable_dtypes : list of acceptable numpy.dtype + Will not cast if term's dtype in this list. dtype : str or numpy.dtype The dtype to cast to. """ dt = np.dtype(dtype) for term in terms: + if term.type in acceptable_dtypes: + continue + try: new_value = term.value.astype(dt) except AttributeError: @@ -452,7 +459,9 @@ def __init__(self, lhs, rhs, truediv, *args, **kwargs): rhs.return_type)) if truediv or PY3: - _cast_inplace(com.flatten(self), np.float_) + # do not upcast float32s to float64 un-necessarily + acceptable_dtypes = [np.float32, np.float_] + _cast_inplace(com.flatten(self), acceptable_dtypes, np.float_) _unary_ops_syms = '+', '-', '~', 'not' diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py index 023519fd7fc20..bbcdaf857364c 100644 --- a/pandas/computation/tests/test_eval.py +++ b/pandas/computation/tests/test_eval.py @@ -739,6 +739,35 @@ def check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs): ENGINES_PARSERS = list(product(_engines, expr._parsers)) +#------------------------------------- +# typecasting rules consistency with python +# issue #12388 + +class TestTypeCasting(tm.TestCase): + + def check_binop_typecasting(self, engine, parser, op, dt): + tm.skip_if_no_ne(engine) + df = mkdf(5, 3, data_gen_f=f, dtype=dt) + s = 'df {} 3'.format(op) + res = pd.eval(s, engine=engine, parser=parser) + self.assertTrue(df.values.dtype == dt) + self.assertTrue(res.values.dtype == dt) + assert_frame_equal(res, eval(s)) + + s = '3 {} df'.format(op) + res = pd.eval(s, engine=engine, parser=parser) + self.assertTrue(df.values.dtype == dt) + self.assertTrue(res.values.dtype == dt) + assert_frame_equal(res, eval(s)) + + def test_binop_typecasting(self): + for engine, parser in ENGINES_PARSERS: + for op in ['+', '-', '*', '**', '/']: + # maybe someday... numexpr has too many upcasting rules now + #for dt in chain(*(np.sctypes[x] for x in ['uint', 'int', 'float'])): + for dt in [np.float32, np.float64]: + yield self.check_binop_typecasting, engine, parser, op, dt + #------------------------------------- # basic and complex alignment