forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheval.py
388 lines (319 loc) · 12.2 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
#!/usr/bin/env python
"""
Top level ``eval`` module.
"""
import tokenize
import warnings
from pandas._libs.lib import _no_default
from pandas.util._validators import validate_bool_kwarg
from pandas.core.computation.engines import _engines
from pandas.core.computation.expr import Expr, _parsers, tokenize_string
from pandas.core.computation.scope import ensure_scope
from pandas.io.formats.printing import pprint_thing
def _check_engine(engine):
"""
Make sure a valid engine is passed.
Parameters
----------
engine : str
Raises
------
KeyError
* If an invalid engine is passed
ImportError
* If numexpr was requested but doesn't exist
Returns
-------
string engine
"""
from pandas.core.computation.check import _NUMEXPR_INSTALLED
if engine is None:
if _NUMEXPR_INSTALLED:
engine = "numexpr"
else:
engine = "python"
if engine not in _engines:
valid = list(_engines.keys())
raise KeyError(
f"Invalid engine {repr(engine)} passed, valid engines are {valid}"
)
# TODO: validate this in a more general way (thinking of future engines
# that won't necessarily be import-able)
# Could potentially be done on engine instantiation
if engine == "numexpr":
if not _NUMEXPR_INSTALLED:
raise ImportError(
"'numexpr' is not installed or an "
"unsupported version. Cannot use "
"engine='numexpr' for query/eval "
"if 'numexpr' is not installed"
)
return engine
def _check_parser(parser: str):
"""
Make sure a valid parser is passed.
Parameters
----------
parser : str
Raises
------
KeyError
* If an invalid parser is passed
"""
if parser not in _parsers:
raise KeyError(
f"Invalid parser {repr(parser)} passed, "
f"valid parsers are {_parsers.keys()}"
)
def _check_resolvers(resolvers):
if resolvers is not None:
for resolver in resolvers:
if not hasattr(resolver, "__getitem__"):
name = type(resolver).__name__
raise TypeError(
f"Resolver of type {repr(name)} does not "
f"implement the __getitem__ method"
)
def _check_expression(expr):
"""
Make sure an expression is not an empty string
Parameters
----------
expr : object
An object that can be converted to a string
Raises
------
ValueError
* If expr is an empty string
"""
if not expr:
raise ValueError("expr cannot be an empty string")
def _convert_expression(expr) -> str:
"""
Convert an object to an expression.
This function converts an object to an expression (a unicode string) and
checks to make sure it isn't empty after conversion. This is used to
convert operators to their string representation for recursive calls to
:func:`~pandas.eval`.
Parameters
----------
expr : object
The object to be converted to a string.
Returns
-------
str
The string representation of an object.
Raises
------
ValueError
* If the expression is empty.
"""
s = pprint_thing(expr)
_check_expression(s)
return s
def _check_for_locals(expr: str, stack_level: int, parser: str):
at_top_of_stack = stack_level == 0
not_pandas_parser = parser != "pandas"
if not_pandas_parser:
msg = "The '@' prefix is only supported by the pandas parser"
elif at_top_of_stack:
msg = (
"The '@' prefix is not allowed in "
"top-level eval calls, \nplease refer to "
"your variables by name without the '@' "
"prefix"
)
if at_top_of_stack or not_pandas_parser:
for toknum, tokval in tokenize_string(expr):
if toknum == tokenize.OP and tokval == "@":
raise SyntaxError(msg)
def eval(
expr,
parser="pandas",
engine=None,
truediv=_no_default,
local_dict=None,
global_dict=None,
resolvers=(),
level=0,
target=None,
inplace=False,
):
"""
Evaluate a Python expression as a string using various backends.
The following arithmetic operations are supported: ``+``, ``-``, ``*``,
``/``, ``**``, ``%``, ``//`` (python engine only) along with the following
boolean operations: ``|`` (or), ``&`` (and), and ``~`` (not).
Additionally, the ``'pandas'`` parser allows the use of :keyword:`and`,
:keyword:`or`, and :keyword:`not` with the same semantics as the
corresponding bitwise operators. :class:`~pandas.Series` and
:class:`~pandas.DataFrame` objects are supported and behave as they would
with plain ol' Python evaluation.
Parameters
----------
expr : str
The expression to evaluate. This string cannot contain any Python
`statements
<https://docs.python.org/3/reference/simple_stmts.html#simple-statements>`__,
only Python `expressions
<https://docs.python.org/3/reference/simple_stmts.html#expression-statements>`__.
parser : {'pandas', 'python'}, default 'pandas'
The parser to use to construct the syntax tree from the expression. The
default of ``'pandas'`` parses code slightly different than standard
Python. Alternatively, you can parse an expression using the
``'python'`` parser to retain strict Python semantics. See the
:ref:`enhancing performance <enhancingperf.eval>` documentation for
more details.
engine : {'python', 'numexpr'}, default 'numexpr'
The engine used to evaluate the expression. Supported engines are
- None : tries to use ``numexpr``, falls back to ``python``
- ``'numexpr'``: This default engine evaluates pandas objects using
numexpr for large speed ups in complex expressions
with large frames.
- ``'python'``: Performs operations as if you had ``eval``'d in top
level python. This engine is generally not that useful.
More backends may be available in the future.
truediv : bool, optional
Whether to use true division, like in Python >= 3.
deprecated:: 1.0.0
local_dict : dict or None, optional
A dictionary of local variables, taken from locals() by default.
global_dict : dict or None, optional
A dictionary of global variables, taken from globals() by default.
resolvers : list of dict-like or None, optional
A list of objects implementing the ``__getitem__`` special method that
you can use to inject an additional collection of namespaces to use for
variable lookup. For example, this is used in the
:meth:`~DataFrame.query` method to inject the
``DataFrame.index`` and ``DataFrame.columns``
variables that refer to their respective :class:`~pandas.DataFrame`
instance attributes.
level : int, optional
The number of prior stack frames to traverse and add to the current
scope. Most users will **not** need to change this parameter.
target : object, optional, default None
This is the target object for assignment. It is used when there is
variable assignment in the expression. If so, then `target` must
support item assignment with string keys, and if a copy is being
returned, it must also support `.copy()`.
inplace : bool, default False
If `target` is provided, and the expression mutates `target`, whether
to modify `target` inplace. Otherwise, return a copy of `target` with
the mutation.
Returns
-------
ndarray, numeric scalar, DataFrame, Series
Raises
------
ValueError
There are many instances where such an error can be raised:
- `target=None`, but the expression is multiline.
- The expression is multiline, but not all them have item assignment.
An example of such an arrangement is this:
a = b + 1
a + 2
Here, there are expressions on different lines, making it multiline,
but the last line has no variable assigned to the output of `a + 2`.
- `inplace=True`, but the expression is missing item assignment.
- Item assignment is provided, but the `target` does not support
string item assignment.
- Item assignment is provided and `inplace=False`, but the `target`
does not support the `.copy()` method
See Also
--------
DataFrame.query
DataFrame.eval
Notes
-----
The ``dtype`` of any objects involved in an arithmetic ``%`` operation are
recursively cast to ``float64``.
See the :ref:`enhancing performance <enhancingperf.eval>` documentation for
more details.
"""
inplace = validate_bool_kwarg(inplace, "inplace")
if truediv is not _no_default:
warnings.warn(
"The `truediv` parameter in pd.eval is deprecated and will be "
"removed in a future version.",
FutureWarning,
stacklevel=2,
)
if isinstance(expr, str):
_check_expression(expr)
exprs = [e.strip() for e in expr.splitlines() if e.strip() != ""]
else:
exprs = [expr]
multi_line = len(exprs) > 1
if multi_line and target is None:
raise ValueError(
"multi-line expressions are only valid in the "
"context of data, use DataFrame.eval"
)
ret = None
first_expr = True
target_modified = False
for expr in exprs:
expr = _convert_expression(expr)
engine = _check_engine(engine)
_check_parser(parser)
_check_resolvers(resolvers)
_check_for_locals(expr, level, parser)
# get our (possibly passed-in) scope
env = ensure_scope(
level + 1,
global_dict=global_dict,
local_dict=local_dict,
resolvers=resolvers,
target=target,
)
parsed_expr = Expr(expr, engine=engine, parser=parser, env=env)
# construct the engine and evaluate the parsed expression
eng = _engines[engine]
eng_inst = eng(parsed_expr)
ret = eng_inst.evaluate()
if parsed_expr.assigner is None:
if multi_line:
raise ValueError(
"Multi-line expressions are only valid"
" if all expressions contain an assignment"
)
elif inplace:
raise ValueError("Cannot operate inplace if there is no assignment")
# assign if needed
assigner = parsed_expr.assigner
if env.target is not None and assigner is not None:
target_modified = True
# if returning a copy, copy only on the first assignment
if not inplace and first_expr:
try:
target = env.target.copy()
except AttributeError:
raise ValueError("Cannot return a copy of the target")
else:
target = env.target
# TypeError is most commonly raised (e.g. int, list), but you
# get IndexError if you try to do this assignment on np.ndarray.
# we will ignore numpy warnings here; e.g. if trying
# to use a non-numeric indexer
try:
with warnings.catch_warnings(record=True):
# TODO: Filter the warnings we actually care about here.
target[assigner] = ret
except (TypeError, IndexError):
raise ValueError("Cannot assign expression output to target")
if not resolvers:
resolvers = ({assigner: ret},)
else:
# existing resolver needs updated to handle
# case of mutating existing column in copy
for resolver in resolvers:
if assigner in resolver:
resolver[assigner] = ret
break
else:
resolvers += ({assigner: ret},)
ret = None
first_expr = False
# We want to exclude `inplace=None` as being False.
if inplace is False:
return target if target_modified else ret