Skip to content

Commit 17ad0ba

Browse files
committed
ERR/API: disallow local references in top-level calls to eval
1 parent 2450678 commit 17ad0ba

File tree

8 files changed

+65
-19
lines changed

8 files changed

+65
-19
lines changed

pandas/compat/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
import pickle as cPickle
5555
import http.client as httplib
5656

57-
from chainmap import DeepChainMap
57+
from pandas.compat.chainmap import DeepChainMap
5858

5959

6060
if PY3:

pandas/computation/eval.py

+21-1
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44
"""
55

66
import sys
7+
import tokenize
78
from pandas.core import common as com
8-
from pandas.computation.expr import Expr, _parsers
9+
from pandas.computation.expr import Expr, _parsers, tokenize_string
910
from pandas.computation.scope import _ensure_scope
1011
from pandas.compat import DeepChainMap, builtins
1112
from pandas.computation.engines import _engines
@@ -118,6 +119,24 @@ def _convert_expression(expr):
118119
return s
119120

120121

122+
def _check_for_locals(expr, stack_level, parser):
123+
at_top_of_stack = stack_level == 0
124+
not_pandas_parser = parser != 'pandas'
125+
126+
if not_pandas_parser:
127+
msg = "The '@' prefix is only supported by the pandas parser"
128+
elif at_top_of_stack:
129+
msg = ("The '@' prefix is not allowed in "
130+
"top-level eval calls, please refer to "
131+
"your variables by name without the '@' "
132+
"prefix")
133+
134+
if at_top_of_stack or not_pandas_parser:
135+
for toknum, tokval, _, _, _ in tokenize_string(expr):
136+
if toknum == tokenize.OP and tokval == '@':
137+
raise SyntaxError(msg)
138+
139+
121140
def eval(expr, parser='pandas', engine='numexpr', truediv=True,
122141
local_dict=None, global_dict=None, resolvers=(), level=0,
123142
target=None):
@@ -200,6 +219,7 @@ def eval(expr, parser='pandas', engine='numexpr', truediv=True,
200219
_check_engine(engine)
201220
_check_parser(parser)
202221
_check_resolvers(resolvers)
222+
_check_for_locals(expr, level, parser)
203223

204224
# get our (possibly passed-in) scope
205225
level += 1

pandas/computation/expr.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,16 @@
2323
from pandas.computation.scope import Scope, _ensure_scope
2424

2525

26+
def tokenize_string(s):
27+
return tokenize.generate_tokens(StringIO(s).readline)
28+
29+
2630
def _rewrite_assign(source):
2731
"""Rewrite the assignment operator for PyTables expression that want to use
2832
``=`` as a substitute for ``==``.
2933
"""
3034
res = []
31-
g = tokenize.generate_tokens(StringIO(source).readline)
32-
for toknum, tokval, _, _, _ in g:
35+
for toknum, tokval, _, _, _ in tokenize_string(source):
3336
res.append((toknum, '==' if tokval == '=' else tokval))
3437
return tokenize.untokenize(res)
3538

@@ -39,8 +42,7 @@ def _replace_booleans(source):
3942
precedence is changed to boolean precedence.
4043
"""
4144
res = []
42-
g = tokenize.generate_tokens(StringIO(source).readline)
43-
for toknum, tokval, _, _, _ in g:
45+
for toknum, tokval, _, _, _ in tokenize_string(source):
4446
if toknum == tokenize.OP:
4547
if tokval == '&':
4648
res.append((tokenize.NAME, 'and'))
@@ -54,7 +56,7 @@ def _replace_booleans(source):
5456

5557

5658
def _replace_locals(source, local_symbol='@'):
57-
"""Replace local variables with a syntacticall valid name."""
59+
"""Replace local variables with a syntactically valid name."""
5860
return source.replace(local_symbol, _LOCAL_TAG)
5961

6062

pandas/computation/pytables.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from pandas.core.base import StringMixin
1212
import pandas.core.common as com
1313
from pandas.computation import expr, ops
14-
from pandas.computation.ops import is_term
14+
from pandas.computation.ops import is_term, UndefinedVariableError
1515
from pandas.computation.scope import _ensure_scope
1616
from pandas.computation.expr import BaseExprVisitor
1717
from pandas.computation.common import _ensure_decoded
@@ -48,7 +48,10 @@ def _resolve_name(self):
4848
return self.name
4949

5050
# resolve the rhs (and allow it to be None)
51-
return self.env.resolve(self.name, is_local=False)
51+
try:
52+
return self.env.resolve(self.name, is_local=False)
53+
except UndefinedVariableError:
54+
return self.name
5255

5356
@property
5457
def value(self):

pandas/computation/scope.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import pprint
1111

1212
import pandas as pd
13-
from pandas.compat import DeepChainMap, map
13+
from pandas.compat import DeepChainMap, map, StringIO
1414
from pandas.core import common as com
1515
from pandas.core.base import StringMixin
1616
from pandas.computation.ops import UndefinedVariableError, _LOCAL_TAG
@@ -117,11 +117,11 @@ def __init__(self, level, global_dict=None, local_dict=None, resolvers=(),
117117
# shallow copy here because we don't want to replace what's in
118118
# scope when we align terms (alignment accesses the underlying
119119
# numpy array of pandas objects)
120+
self.scope = self.scope.new_child((global_dict or
121+
frame.f_globals).copy())
120122
if not isinstance(local_dict, Scope):
121123
self.scope = self.scope.new_child((local_dict or
122124
frame.f_locals).copy())
123-
self.scope = self.scope.new_child((global_dict or
124-
frame.f_globals).copy())
125125
finally:
126126
del frame
127127

@@ -132,8 +132,8 @@ def __init__(self, level, global_dict=None, local_dict=None, resolvers=(),
132132
self.temps = {}
133133

134134
def __unicode__(self):
135-
scope_keys = _get_pretty_string(self.scope.keys())
136-
res_keys = _get_pretty_string(self.resolvers.keys())
135+
scope_keys = _get_pretty_string(list(self.scope.keys()))
136+
res_keys = _get_pretty_string(list(self.resolvers.keys()))
137137
return '%s(scope=%s, resolvers=%s)' % (type(self).__name__, scope_keys,
138138
res_keys)
139139

pandas/computation/tests/test_eval.py

+19
Original file line numberDiff line numberDiff line change
@@ -1556,6 +1556,25 @@ def test_invalid_numexpr_version():
15561556
yield check_invalid_numexpr_version, engine, parser
15571557

15581558

1559+
def check_invalid_local_variable_reference(engine, parser):
1560+
tm.skip_if_no_ne(engine)
1561+
1562+
a, b = 1, 2
1563+
exprs = 'a + @b', '@a + b', '@a + @b'
1564+
for expr in exprs:
1565+
if parser != 'pandas':
1566+
with tm.assertRaisesRegexp(SyntaxError, "The '@' prefix is only"):
1567+
pd.eval(exprs, engine=engine, parser=parser)
1568+
else:
1569+
with tm.assertRaisesRegexp(SyntaxError, "The '@' prefix is not"):
1570+
pd.eval(exprs, engine=engine, parser=parser)
1571+
1572+
1573+
def test_invalid_local_variable_reference():
1574+
for engine, parser in ENGINES_PARSERS:
1575+
yield check_invalid_local_variable_reference, engine, parser
1576+
1577+
15591578
if __name__ == '__main__':
15601579
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
15611580
exit=False)

pandas/io/pytables.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def _ensure_term(where, scope_level):
8181
where = [w if not maybe_expression(w) else Term(w, scope_level=level)
8282
for w in where if w is not None]
8383
elif maybe_expression(where):
84-
where = Term(where, level)
84+
where = Term(where, scope_level=level)
8585
return where
8686

8787

pandas/tests/test_frame.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -12116,7 +12116,6 @@ def test_isin_dupe_self(self):
1211612116
expected.iloc[1, 1] = True
1211712117
assert_frame_equal(result, expected)
1211812118

12119-
1212012119
def test_isin_against_series(self):
1212112120
df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [2, np.nan, 4, 4]},
1212212121
index=['a', 'b', 'c', 'd'])
@@ -12249,6 +12248,7 @@ def test_empty_frame_dtypes_ftypes(self):
1224912248
('b', 'bool:dense'),
1225012249
('c', 'float64:dense')])))
1225112250

12251+
1225212252
def skip_if_no_ne(engine='numexpr'):
1225312253
if engine == 'numexpr':
1225412254
try:
@@ -12705,16 +12705,16 @@ def test_nested_scope(self):
1270512705
result = df.query('(@df > 0) & (@df2 > 0)', engine=engine, parser=parser)
1270612706
assert_frame_equal(result, expected)
1270712707

12708-
result = pd.eval('@df[@df > 0 and @df2 > 0]', engine=engine,
12708+
result = pd.eval('df[df > 0 and df2 > 0]', engine=engine,
1270912709
parser=parser)
1271012710
assert_frame_equal(result, expected)
1271112711

12712-
result = pd.eval('@df[@df > 0 and @df2 > 0 and @df[@df > 0] > 0]',
12712+
result = pd.eval('df[df > 0 and df2 > 0 and df[df > 0] > 0]',
1271312713
engine=engine, parser=parser)
1271412714
expected = df[(df > 0) & (df2 > 0) & (df[df > 0] > 0)]
1271512715
assert_frame_equal(result, expected)
1271612716

12717-
result = pd.eval('@df[(@df>0) & (@df2>0)]', engine=engine, parser=parser)
12717+
result = pd.eval('df[(df>0) & (df2>0)]', engine=engine, parser=parser)
1271812718
expected = df.query('(@df>0) & (@df2>0)', engine=engine, parser=parser)
1271912719
assert_frame_equal(result, expected)
1272012720

@@ -12874,6 +12874,7 @@ def test_query_builtin(self):
1287412874
result = df.query('sin > 5', engine=engine, parser=parser)
1287512875
tm.assert_frame_equal(expected, result)
1287612876

12877+
1287712878
class TestDataFrameQueryPythonPython(TestDataFrameQueryNumExprPython):
1287812879

1287912880
@classmethod
@@ -12894,6 +12895,7 @@ def test_query_builtin(self):
1289412895
result = df.query('sin > 5', engine=engine, parser=parser)
1289512896
tm.assert_frame_equal(expected, result)
1289612897

12898+
1289712899
PARSERS = 'python', 'pandas'
1289812900
ENGINES = 'python', 'numexpr'
1289912901

0 commit comments

Comments
 (0)