From bd7d0c708647ca7964f991f86abde876819575ce Mon Sep 17 00:00:00 2001 From: Jacob Howard Date: Fri, 31 Jan 2014 14:45:38 +0000 Subject: [PATCH] BUG: Add type promotion support for eval() expressions with many properties This commit modifies the call to numpy.result_type to get around the NPY_MAXARGS limit, which at the moment is 32. Instead of passing a generator of all types involved in an expression, the type promotion is done on a pair-wise basis with a call to reduce. This fixes bugs for code such as the following: from numpy.random import randn from pandas import DataFrame d = DataFrame(randn(10, 2), columns=list('ab')) # Evaluates fine print(d.eval('*'.join(['a'] * 32))) # Fails to evaluate due to NumPy argument limits print(d.eval('*'.join(['a'] * 33))) --- doc/source/release.rst | 1 + pandas/computation/align.py | 7 ++++--- pandas/computation/common.py | 14 ++++++++++++++ pandas/computation/ops.py | 4 ++-- pandas/computation/tests/test_eval.py | 12 ++++++++++++ 5 files changed, 33 insertions(+), 5 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 5f7d87ea03f67..ae95c882fe356 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -76,6 +76,7 @@ Bug Fixes - Indexing bugs with reordered indexes (:issue:`6252`, :issue:`6254`) - Bug in ``.xs`` with a Series multiindex (:issue:`6258`, :issue:`5684`) - Bug in conversion of a string types to a DatetimeIndex with a specified frequency (:issue:`6273`, :issue:`6274`) +- Bug in ``eval`` where type-promotion failed for large expressions (:issue:`6205`) pandas 0.13.1 ------------- diff --git a/pandas/computation/align.py b/pandas/computation/align.py index 9fe563574bbd4..1685f66c15416 100644 --- a/pandas/computation/align.py +++ b/pandas/computation/align.py @@ -10,6 +10,7 @@ import pandas as pd from pandas import compat import pandas.core.common as com +from pandas.computation.common import _result_type_many def _align_core_single_unary_op(term): @@ -85,11 +86,11 @@ def wrapper(terms): # only scalars or indexes if all(isinstance(term.value, pd.Index) or term.isscalar for term in terms): - return np.result_type(*term_values), None + return _result_type_many(*term_values), None # no pandas objects if not _any_pandas_objects(terms): - return np.result_type(*term_values), None + return _result_type_many(*term_values), None return f(terms) return wrapper @@ -199,7 +200,7 @@ def _align(terms): # if all resolved variables are numeric scalars if all(term.isscalar for term in terms): - return np.result_type(*(term.value for term in terms)).type, None + return _result_type_many(*(term.value for term in terms)).type, None # perform the main alignment typ, axes = _align_core(terms) diff --git a/pandas/computation/common.py b/pandas/computation/common.py index 9af2197a4fd69..0d5e639032b94 100644 --- a/pandas/computation/common.py +++ b/pandas/computation/common.py @@ -1,5 +1,6 @@ import numpy as np import pandas as pd +from pandas.compat import reduce def _ensure_decoded(s): @@ -9,5 +10,18 @@ def _ensure_decoded(s): return s +def _result_type_many(*arrays_and_dtypes): + """ wrapper around numpy.result_type which overcomes the NPY_MAXARGS (32) + argument limit """ + try: + return np.result_type(*arrays_and_dtypes) + except ValueError: + # length 0 or length > NPY_MAXARGS both throw a ValueError, so check + # which one we're dealing with + if len(arrays_and_dtypes) == 0: + raise ValueError('at least one array or dtype is required') + return reduce(np.result_type, arrays_and_dtypes) + + class NameResolutionError(NameError): pass diff --git a/pandas/computation/ops.py b/pandas/computation/ops.py index 8d7bd0a819e79..270ba92d4483a 100644 --- a/pandas/computation/ops.py +++ b/pandas/computation/ops.py @@ -13,7 +13,7 @@ from pandas.compat import PY3, string_types, text_type import pandas.core.common as com from pandas.core.base import StringMixin -from pandas.computation.common import _ensure_decoded +from pandas.computation.common import _ensure_decoded, _result_type_many _reductions = 'sum', 'prod' @@ -240,7 +240,7 @@ def return_type(self): # clobber types to bool if the op is a boolean operator if self.op in (_cmp_ops_syms + _bool_ops_syms): return np.bool_ - return np.result_type(*(term.type for term in com.flatten(self))) + return _result_type_many(*(term.type for term in com.flatten(self))) @property def isscalar(self): diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py index b1cafca190bb0..dbc190df9c33a 100644 --- a/pandas/computation/tests/test_eval.py +++ b/pandas/computation/tests/test_eval.py @@ -1575,6 +1575,18 @@ def test_invalid_numexpr_version(): yield check_invalid_numexpr_version, engine, parser +def check_many_exprs(engine, parser): + a = 1 + expr = ' * '.join('a' * 33) + expected = 1 + res = pd.eval(expr, engine=engine, parser=parser) + tm.assert_equal(res, expected) + +def test_many_exprs(): + for engine, parser in ENGINES_PARSERS: + yield check_many_exprs, engine, parser + + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False)