diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 749512890d86b..d52c1d55a6c66 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -38,6 +38,7 @@ Highlights include: - Development installed versions of pandas will now have ``PEP440`` compliant version strings (:issue:`9518`) - Support for reading SAS xport files, see :ref:`here ` - Removal of the automatic TimeSeries broadcasting, deprecated since 0.8.0, see :ref:`here ` +- Support for math functions in .eval(), see :ref:`here ` Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. @@ -123,6 +124,25 @@ incrementally. See the :ref:`docs ` for more details. +.. _whatsnew_0170.matheval: + +Support for Math Functions in .eval() +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`~pandas.eval` now supports calling math functions. + +.. code-block:: python + + df = pd.DataFrame({'a': np.random.randn(10)}) + df.eval("b = sin(a)") + +The support math functions are `sin`, `cos`, `exp`, `log`, `expm1`, `log1p`, +`sqrt`, `sinh`, `cosh`, `tanh`, `arcsin`, `arccos`, `arctan`, `arccosh`, +`arcsinh`, `arctanh`, `abs` and `arctan2`. + +These functions map to the intrinsics for the NumExpr engine. For Python +engine, they are mapped to NumPy calls. + .. _whatsnew_0170.enhancements.other: Other enhancements diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py index b6a1fcbec8339..123051d802d7d 100644 --- a/pandas/computation/expr.py +++ b/pandas/computation/expr.py @@ -20,7 +20,7 @@ _arith_ops_syms, _unary_ops_syms, is_term) from pandas.computation.ops import _reductions, _mathops, _LOCAL_TAG from pandas.computation.ops import Op, BinOp, UnaryOp, Term, Constant, Div -from pandas.computation.ops import UndefinedVariableError +from pandas.computation.ops import UndefinedVariableError, FuncNode from pandas.computation.scope import Scope, _ensure_scope @@ -524,27 +524,48 @@ def visit_Call(self, node, side=None, **kwargs): elif not isinstance(node.func, ast.Name): raise TypeError("Only named functions are supported") else: - res = self.visit(node.func) + try: + res = self.visit(node.func) + except UndefinedVariableError: + # Check if this is a supported function name + try: + res = FuncNode(node.func.id) + except ValueError: + # Raise original error + raise if res is None: raise ValueError("Invalid function call {0}".format(node.func.id)) if hasattr(res, 'value'): res = res.value - args = [self.visit(targ).value for targ in node.args] - if node.starargs is not None: - args += self.visit(node.starargs).value + if isinstance(res, FuncNode): + args = [self.visit(targ) for targ in node.args] + + if node.starargs is not None: + args += self.visit(node.starargs) + + if node.keywords or node.kwargs: + raise TypeError("Function \"{0}\" does not support keyword " + "arguments".format(res.name)) + + return res(*args, **kwargs) + + else: + args = [self.visit(targ).value for targ in node.args] + if node.starargs is not None: + args += self.visit(node.starargs).value - keywords = {} - for key in node.keywords: - if not isinstance(key, ast.keyword): - raise ValueError("keyword error in function call " - "'{0}'".format(node.func.id)) - keywords[key.arg] = self.visit(key.value).value - if node.kwargs is not None: - keywords.update(self.visit(node.kwargs).value) + keywords = {} + for key in node.keywords: + if not isinstance(key, ast.keyword): + raise ValueError("keyword error in function call " + "'{0}'".format(node.func.id)) + keywords[key.arg] = self.visit(key.value).value + if node.kwargs is not None: + keywords.update(self.visit(node.kwargs).value) - return self.const_type(res(*args, **keywords), self.env) + return self.const_type(res(*args, **keywords), self.env) def translate_In(self, op): return op @@ -587,7 +608,7 @@ def visitor(x, y): return reduce(visitor, operands) -_python_not_supported = frozenset(['Dict', 'Call', 'BoolOp', 'In', 'NotIn']) +_python_not_supported = frozenset(['Dict', 'BoolOp', 'In', 'NotIn']) _numexpr_supported_calls = frozenset(_reductions + _mathops) diff --git a/pandas/computation/ops.py b/pandas/computation/ops.py index 9df9975b4b61c..f6d5f171036ea 100644 --- a/pandas/computation/ops.py +++ b/pandas/computation/ops.py @@ -16,9 +16,12 @@ _reductions = 'sum', 'prod' -_mathops = ('sin', 'cos', 'exp', 'log', 'expm1', 'log1p', 'pow', 'div', 'sqrt', - 'inv', 'sinh', 'cosh', 'tanh', 'arcsin', 'arccos', 'arctan', - 'arccosh', 'arcsinh', 'arctanh', 'arctan2', 'abs') + +_unary_math_ops = ('sin', 'cos', 'exp', 'log', 'expm1', 'log1p', + 'sqrt', 'sinh', 'cosh', 'tanh', 'arcsin', 'arccos', + 'arctan', 'arccosh', 'arcsinh', 'arctanh', 'abs') +_binary_math_ops = ('arctan2',) +_mathops = _unary_math_ops + _binary_math_ops _LOCAL_TAG = '__pd_eval_local_' @@ -498,3 +501,28 @@ def return_type(self): (operand.op in _cmp_ops_dict or operand.op in _bool_ops_dict)): return np.dtype('bool') return np.dtype('int') + + +class MathCall(Op): + def __init__(self, func, args): + super(MathCall, self).__init__(func.name, args) + self.func = func + + def __call__(self, env): + operands = [op(env) for op in self.operands] + return self.func.func(*operands) + + def __unicode__(self): + operands = map(str, self.operands) + return com.pprint_thing('{0}({1})'.format(self.op, ','.join(operands))) + + +class FuncNode(object): + def __init__(self, name): + if name not in _mathops: + raise ValueError("\"{0}\" is not a supported function".format(name)) + self.name = name + self.func = getattr(np, name) + + def __call__(self, *args): + return MathCall(self, args) diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py index 4f998319d922d..8db0b82f1aa2e 100644 --- a/pandas/computation/tests/test_eval.py +++ b/pandas/computation/tests/test_eval.py @@ -23,7 +23,8 @@ from pandas.computation.expr import PythonExprVisitor, PandasExprVisitor from pandas.computation.ops import (_binary_ops_dict, _special_case_arith_ops_syms, - _arith_ops_syms, _bool_ops_syms) + _arith_ops_syms, _bool_ops_syms, + _unary_math_ops, _binary_math_ops) import pandas.computation.expr as expr import pandas.util.testing as tm @@ -1439,6 +1440,129 @@ def setUpClass(cls): cls.arith_ops = expr._arith_ops_syms + expr._cmp_ops_syms +class TestMathPythonPython(tm.TestCase): + @classmethod + def setUpClass(cls): + super(TestMathPythonPython, cls).setUpClass() + tm.skip_if_no_ne() + cls.engine = 'python' + cls.parser = 'pandas' + cls.unary_fns = _unary_math_ops + cls.binary_fns = _binary_math_ops + + @classmethod + def tearDownClass(cls): + del cls.engine, cls.parser + + def eval(self, *args, **kwargs): + kwargs['engine'] = self.engine + kwargs['parser'] = self.parser + kwargs['level'] = kwargs.pop('level', 0) + 1 + return pd.eval(*args, **kwargs) + + def test_unary_functions(self): + df = DataFrame({'a': np.random.randn(10)}) + a = df.a + for fn in self.unary_fns: + expr = "{0}(a)".format(fn) + got = self.eval(expr) + expect = getattr(np, fn)(a) + pd.util.testing.assert_almost_equal(got, expect) + + def test_binary_functions(self): + df = DataFrame({'a': np.random.randn(10), + 'b': np.random.randn(10)}) + a = df.a + b = df.b + for fn in self.binary_fns: + expr = "{0}(a, b)".format(fn) + got = self.eval(expr) + expect = getattr(np, fn)(a, b) + np.testing.assert_allclose(got, expect) + + def test_df_use_case(self): + df = DataFrame({'a': np.random.randn(10), + 'b': np.random.randn(10)}) + df.eval("e = arctan2(sin(a), b)", + engine=self.engine, + parser=self.parser) + got = df.e + expect = np.arctan2(np.sin(df.a), df.b) + pd.util.testing.assert_almost_equal(got, expect) + + def test_df_arithmetic_subexpression(self): + df = DataFrame({'a': np.random.randn(10), + 'b': np.random.randn(10)}) + df.eval("e = sin(a + b)", + engine=self.engine, + parser=self.parser) + got = df.e + expect = np.sin(df.a + df.b) + pd.util.testing.assert_almost_equal(got, expect) + + def check_result_type(self, dtype, expect_dtype): + df = DataFrame({'a': np.random.randn(10).astype(dtype)}) + self.assertEqual(df.a.dtype, dtype) + df.eval("b = sin(a)", + engine=self.engine, + parser=self.parser) + got = df.b + expect = np.sin(df.a) + self.assertEqual(expect.dtype, got.dtype) + self.assertEqual(expect_dtype, got.dtype) + pd.util.testing.assert_almost_equal(got, expect) + + def test_result_types(self): + self.check_result_type(np.int32, np.float64) + self.check_result_type(np.int64, np.float64) + self.check_result_type(np.float32, np.float32) + self.check_result_type(np.float64, np.float64) + # Did not test complex64 because DataFrame is converting it to + # complex128. Due to https://github.com/pydata/pandas/issues/10952 + self.check_result_type(np.complex128, np.complex128) + + def test_undefined_func(self): + df = DataFrame({'a': np.random.randn(10)}) + with tm.assertRaisesRegexp(ValueError, + "\"mysin\" is not a supported function"): + df.eval("mysin(a)", + engine=self.engine, + parser=self.parser) + + def test_keyword_arg(self): + df = DataFrame({'a': np.random.randn(10)}) + with tm.assertRaisesRegexp(TypeError, + "Function \"sin\" does not support " + "keyword arguments"): + df.eval("sin(x=a)", + engine=self.engine, + parser=self.parser) + + +class TestMathPythonPandas(TestMathPythonPython): + @classmethod + def setUpClass(cls): + super(TestMathPythonPandas, cls).setUpClass() + cls.engine = 'python' + cls.parser = 'pandas' + + +class TestMathNumExprPandas(TestMathPythonPython): + @classmethod + def setUpClass(cls): + super(TestMathNumExprPandas, cls).setUpClass() + cls.engine = 'numexpr' + cls.parser = 'pandas' + + +class TestMathNumExprPython(TestMathPythonPython): + @classmethod + def setUpClass(cls): + super(TestMathNumExprPython, cls).setUpClass() + cls.engine = 'numexpr' + cls.parser = 'python' + + _var_s = randn(10)