|
12 | 12 |
|
13 | 13 | import pandas as pd
|
14 | 14 | from pandas import compat
|
15 |
| -from pandas.compat import StringIO, zip, reduce, string_types |
| 15 | +from pandas.compat import StringIO, lmap, zip, reduce, string_types |
16 | 16 | from pandas.core.base import StringMixin
|
17 | 17 | from pandas.core import common as com
|
| 18 | +from pandas.tools.util import compose |
18 | 19 | from pandas.computation.ops import (_cmp_ops_syms, _bool_ops_syms,
|
19 | 20 | _arith_ops_syms, _unary_ops_syms, is_term)
|
20 | 21 | from pandas.computation.ops import _reductions, _mathops, _LOCAL_TAG
|
|
23 | 24 | from pandas.computation.scope import Scope, _ensure_scope
|
24 | 25 |
|
25 | 26 |
|
26 |
| -def tokenize_string(s): |
27 |
| - return tokenize.generate_tokens(StringIO(s).readline) |
| 27 | +def tokenize_string(source): |
| 28 | + """Tokenize a Python source code string. |
28 | 29 |
|
| 30 | + Parameters |
| 31 | + ---------- |
| 32 | + source : str |
| 33 | + A Python source code string |
| 34 | + """ |
| 35 | + line_reader = StringIO(source).readline |
| 36 | + for toknum, tokval, _, _, _ in tokenize.generate_tokens(line_reader): |
| 37 | + yield toknum, tokval |
| 38 | + |
| 39 | + |
| 40 | +def _rewrite_assign(tok): |
| 41 | + """Rewrite the assignment operator for PyTables expressions that use ``=`` |
| 42 | + as a substitute for ``==``. |
29 | 43 |
|
30 |
| -def _rewrite_assign(source): |
31 |
| - """Rewrite the assignment operator for PyTables expression that want to use |
32 |
| - ``=`` as a substitute for ``==``. |
| 44 | + Parameters |
| 45 | + ---------- |
| 46 | + tok : tuple of int, str |
| 47 | + ints correspond to the all caps constants in the tokenize module |
| 48 | +
|
| 49 | + Returns |
| 50 | + ------- |
| 51 | + t : tuple of int, str |
| 52 | + Either the input or token or the replacement values |
33 | 53 | """
|
34 |
| - res = [] |
35 |
| - for toknum, tokval, _, _, _ in tokenize_string(source): |
36 |
| - res.append((toknum, '==' if tokval == '=' else tokval)) |
37 |
| - return tokenize.untokenize(res) |
| 54 | + toknum, tokval = tok |
| 55 | + return toknum, '==' if tokval == '=' else tokval |
38 | 56 |
|
39 | 57 |
|
40 |
| -def _replace_booleans(source): |
| 58 | +def _replace_booleans(tok): |
41 | 59 | """Replace ``&`` with ``and`` and ``|`` with ``or`` so that bitwise
|
42 | 60 | precedence is changed to boolean precedence.
|
| 61 | +
|
| 62 | + Parameters |
| 63 | + ---------- |
| 64 | + tok : tuple of int, str |
| 65 | + ints correspond to the all caps constants in the tokenize module |
| 66 | +
|
| 67 | + Returns |
| 68 | + ------- |
| 69 | + t : tuple of int, str |
| 70 | + Either the input or token or the replacement values |
43 | 71 | """
|
44 |
| - res = [] |
45 |
| - for toknum, tokval, _, _, _ in tokenize_string(source): |
46 |
| - if toknum == tokenize.OP: |
47 |
| - if tokval == '&': |
48 |
| - res.append((tokenize.NAME, 'and')) |
49 |
| - elif tokval == '|': |
50 |
| - res.append((tokenize.NAME, 'or')) |
51 |
| - else: |
52 |
| - res.append((toknum, tokval)) |
53 |
| - else: |
54 |
| - res.append((toknum, tokval)) |
55 |
| - return tokenize.untokenize(res) |
| 72 | + toknum, tokval = tok |
| 73 | + if toknum == tokenize.OP: |
| 74 | + if tokval == '&': |
| 75 | + return tokenize.NAME, 'and' |
| 76 | + elif tokval == '|': |
| 77 | + return tokenize.NAME, 'or' |
| 78 | + return toknum, tokval |
| 79 | + return toknum, tokval |
56 | 80 |
|
57 | 81 |
|
58 |
| -def _replace_locals(source, local_symbol='@'): |
59 |
| - """Replace local variables with a syntactically valid name.""" |
60 |
| - res = [] |
61 |
| - for toknum, tokval, _, _, _ in tokenize_string(source): |
62 |
| - if toknum == tokenize.OP and tokval == local_symbol: |
63 |
| - res.append((tokenize.OP, _LOCAL_TAG)) |
64 |
| - else: |
65 |
| - res.append((toknum, tokval)) |
66 |
| - return tokenize.untokenize(res) |
| 82 | +def _replace_locals(tok): |
| 83 | + """Replace local variables with a syntactically valid name. |
| 84 | +
|
| 85 | + Parameters |
| 86 | + ---------- |
| 87 | + tok : tuple of int, str |
| 88 | + ints correspond to the all caps constants in the tokenize module |
| 89 | +
|
| 90 | + Returns |
| 91 | + ------- |
| 92 | + t : tuple of int, str |
| 93 | + Either the input or token or the replacement values |
| 94 | +
|
| 95 | + Notes |
| 96 | + ----- |
| 97 | + This is somewhat of a hack in that we rewrite a string such as ``'@a'`` as |
| 98 | + ``'__pd_eval_local_a'`` by telling the tokenizer that ``__pd_eval_local_`` |
| 99 | + is a ``tokenize.OP`` and to replace the ``'@'`` symbol with it. |
| 100 | + """ |
| 101 | + toknum, tokval = tok |
| 102 | + if toknum == tokenize.OP and tokval == '@': |
| 103 | + return tokenize.OP, _LOCAL_TAG |
| 104 | + return toknum, tokval |
67 | 105 |
|
68 | 106 |
|
69 |
| -def _preparse(source): |
70 |
| - """Compose assignment and boolean replacement.""" |
71 |
| - return _replace_booleans(_rewrite_assign(source)) |
| 107 | +def _preparse(source, f=compose(_replace_locals, _replace_booleans, |
| 108 | + _rewrite_assign)): |
| 109 | + """Compose a collection of tokenization functions |
| 110 | +
|
| 111 | + Parameters |
| 112 | + ---------- |
| 113 | + source : str |
| 114 | + A Python source code string |
| 115 | + f : callable |
| 116 | + This takes a tuple of (toknum, tokval) as its argument and returns a |
| 117 | + tuple with the same structure but possibly different elements. Defaults |
| 118 | + to the composition of ``_rewrite_assign``, ``_replace_booleans``, and |
| 119 | + ``_replace_locals``. |
| 120 | +
|
| 121 | + Returns |
| 122 | + ------- |
| 123 | + s : str |
| 124 | + Valid Python source code |
| 125 | +
|
| 126 | + Notes |
| 127 | + ----- |
| 128 | + The `f` parameter can be any callable that takes *and* returns input of the |
| 129 | + form ``(toknum, tokval)``, where ``toknum`` is one of the constants from |
| 130 | + the ``tokenize`` module and ``tokval`` is a string. |
| 131 | + """ |
| 132 | + assert callable(f), 'f must be callable' |
| 133 | + return tokenize.untokenize(lmap(f, tokenize_string(source))) |
72 | 134 |
|
73 | 135 |
|
74 | 136 | def _is_type(t):
|
@@ -535,7 +597,8 @@ def visitor(x, y):
|
535 | 597 | class PandasExprVisitor(BaseExprVisitor):
|
536 | 598 |
|
537 | 599 | def __init__(self, env, engine, parser,
|
538 |
| - preparser=lambda x: _replace_locals(_replace_booleans(x))): |
| 600 | + preparser=partial(_preparse, f=compose(_replace_locals, |
| 601 | + _replace_booleans))): |
539 | 602 | super(PandasExprVisitor, self).__init__(env, engine, parser, preparser)
|
540 | 603 |
|
541 | 604 |
|
|
0 commit comments