diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 61679b14a8592..9c44081b64134 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -158,6 +158,6 @@ Categorical Other ^^^^^ -- +- Improved error message when attempting to use a Python keyword as an identifier in a numexpr query (:issue:`18221`) - - diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py index ae956bce11329..23abfa8b3fca1 100644 --- a/pandas/core/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -307,7 +307,14 @@ def __init__(self, env, engine, parser, preparser=_preparse): def visit(self, node, **kwargs): if isinstance(node, string_types): clean = self.preparser(node) - node = ast.fix_missing_locations(ast.parse(clean)) + try: + node = ast.fix_missing_locations(ast.parse(clean)) + except SyntaxError as e: + from keyword import iskeyword + if any(iskeyword(x) for x in clean.split()): + e.msg = ("Python keyword not valid identifier" + " in numexpr query") + raise e method = 'visit_' + node.__class__.__name__ visitor = getattr(self, method) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 70f1ff0a5380d..9ca0d10cd0b1d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2267,7 +2267,8 @@ def query(self, expr, inplace=False, **kwargs): by default, which allows you to treat both the index and columns of the frame as a column in the frame. The identifier ``index`` is used for the frame index; you can also - use the name of the index to identify it in a query. + use the name of the index to identify it in a query. Please note that + Python keywords may not be used as identifiers. For further details and examples see the ``query`` documentation in :ref:`indexing `. diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index d2874b1606e72..c2d1eb8ae1372 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -718,6 +718,18 @@ def test_float_truncation(self): expected = df.loc[[1], :] tm.assert_frame_equal(expected, result) + def test_disallow_python_keywords(self): + # GH 18221 + df = pd.DataFrame([[0, 0, 0]], columns=['foo', 'bar', 'class']) + msg = "Python keyword not valid identifier in numexpr query" + with tm.assert_raises_regex(SyntaxError, msg): + df.query('class == 0') + + df = pd.DataFrame() + df.index.name = 'lambda' + with tm.assert_raises_regex(SyntaxError, msg): + df.query('lambda == 0') + class TestEvalNumexprPython(TestEvalNumexprPandas):