Skip to content

do not upcast results to float64 when float32 scalar *+/- float64 array #12559

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.18.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ Other enhancements

- ``pd.read_html()`` has gained support for the ``decimal`` option (:issue:`12907`)

- ``eval``'s upcasting rules for ``float32`` types have also been updated to be more consistent with numpy's rules. New behavior will not upcast to ``float64`` if you multiply a pandas ``float32`` object by a scalar float64. (:issue:`12388`)


.. _whatsnew_0182.api:
Expand Down
15 changes: 15 additions & 0 deletions pandas/computation/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import tokenize

from functools import partial
import numpy as np

import pandas as pd
from pandas import compat
Expand Down Expand Up @@ -356,6 +357,19 @@ def _possibly_transform_eq_ne(self, node, left=None, right=None):
right)
return op, op_class, left, right

def _possibly_downcast_constants(self, left, right):
f32 = np.dtype(np.float32)
if left.isscalar and not right.isscalar and right.return_type == f32:
# right is a float32 array, left is a scalar
name = self.env.add_tmp(np.float32(left.value))
left = self.term_type(name, self.env)
if right.isscalar and not left.isscalar and left.return_type == f32:
# left is a float32 array, right is a scalar
name = self.env.add_tmp(np.float32(right.value))
right = self.term_type(name, self.env)

return left, right

def _possibly_eval(self, binop, eval_in_python):
# eval `in` and `not in` (for now) in "partial" python space
# things that can be evaluated in "eval" space will be turned into
Expand Down Expand Up @@ -399,6 +413,7 @@ def _possibly_evaluate_binop(self, op, op_class, lhs, rhs,

def visit_BinOp(self, node, **kwargs):
op, op_class, left, right = self._possibly_transform_eq_ne(node)
left, right = self._possibly_downcast_constants(left, right)
return self._possibly_evaluate_binop(op, op_class, left, right)

def visit_Div(self, node, **kwargs):
Expand Down
13 changes: 11 additions & 2 deletions pandas/computation/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,18 +276,25 @@ def _not_in(x, y):
_binary_ops_dict.update(d)


def _cast_inplace(terms, dtype):
def _cast_inplace(terms, acceptable_dtypes, dtype):
"""Cast an expression inplace.

.. versionadded:: 0.18.2

Parameters
----------
terms : Op
The expression that should cast.
acceptable_dtypes : list of acceptable numpy.dtype
Will not cast if term's dtype in this list.
dtype : str or numpy.dtype
The dtype to cast to.
"""
dt = np.dtype(dtype)
for term in terms:
if term.type in acceptable_dtypes:
continue

try:
new_value = term.value.astype(dt)
except AttributeError:
Expand Down Expand Up @@ -452,7 +459,9 @@ def __init__(self, lhs, rhs, truediv, *args, **kwargs):
rhs.return_type))

if truediv or PY3:
_cast_inplace(com.flatten(self), np.float_)
# do not upcast float32s to float64 un-necessarily
acceptable_dtypes = [np.float32, np.float_]
_cast_inplace(com.flatten(self), acceptable_dtypes, np.float_)


_unary_ops_syms = '+', '-', '~', 'not'
Expand Down
29 changes: 29 additions & 0 deletions pandas/computation/tests/test_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -739,6 +739,35 @@ def check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs):

ENGINES_PARSERS = list(product(_engines, expr._parsers))

#-------------------------------------
# typecasting rules consistency with python
# issue #12388

class TestTypeCasting(tm.TestCase):

def check_binop_typecasting(self, engine, parser, op, dt):
tm.skip_if_no_ne(engine)
df = mkdf(5, 3, data_gen_f=f, dtype=dt)
s = 'df {} 3'.format(op)
res = pd.eval(s, engine=engine, parser=parser)
self.assertTrue(df.values.dtype == dt)
self.assertTrue(res.values.dtype == dt)
assert_frame_equal(res, eval(s))

s = '3 {} df'.format(op)
res = pd.eval(s, engine=engine, parser=parser)
self.assertTrue(df.values.dtype == dt)
self.assertTrue(res.values.dtype == dt)
assert_frame_equal(res, eval(s))

def test_binop_typecasting(self):
for engine, parser in ENGINES_PARSERS:
for op in ['+', '-', '*', '**', '/']:
# maybe someday... numexpr has too many upcasting rules now
#for dt in chain(*(np.sctypes[x] for x in ['uint', 'int', 'float'])):
for dt in [np.float32, np.float64]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do the int/uint types behave similarly? what about float16?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

numexpr does not support float16, and for ints numexpr was upcasting sum of two ints to int64 (I am guessing for overflow reasons). Changing this behavior now could break other people's code (worse, it would only happen on some inputs), so I left it alone

yield self.check_binop_typecasting, engine, parser, op, dt


#-------------------------------------
# basic and complex alignment
Expand Down