Skip to content

Commit 8a09169

Browse files
committed
ENH: Add implementation for backtick quoting (pandas-dev#6508).
This only works for parser=pandas and engine=numexpr It works by replacing any backtick quoted variables to a clean version. For this, see: pandas/core/common.py::clean_column_name_with_spaces This happens before the query is passed and by changing the names of the localdict before it passed to numexpr.
1 parent 3dcc953 commit 8a09169

File tree

3 files changed

+45
-5
lines changed

3 files changed

+45
-5
lines changed

pandas/core/common.py

+8
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,14 @@ def consensus_name_attr(objs):
6666
return name
6767

6868

69+
def clean_column_name_with_spaces(name):
70+
"""Check if name contains any spaces, if it contains any spaces
71+
the spaces will be removed and an underscore suffix is added."""
72+
if not isinstance(name, str) or " " not in name:
73+
return name
74+
return name.replace(" ", "_") + "_"
75+
76+
6977
def maybe_box(indexer, values, obj, key):
7078

7179
# if we have multiples coming back, box em

pandas/core/computation/expr.py

+31-4
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import ast
55
from functools import partial
6+
import re
67
import tokenize
78

89
import numpy as np
@@ -102,6 +103,26 @@ def _replace_locals(tok):
102103
return toknum, tokval
103104

104105

106+
def _replace_spaces_backtickvariables(source):
107+
"""Clean up a column name if surrounded by backticks.
108+
109+
Parameters
110+
----------
111+
source : str
112+
str corresponding to an expression
113+
114+
Return
115+
------
116+
s : str
117+
Anything that was surrounded by spaces in source is now cleanup up.
118+
This is done so that any collumn name with spaces still can be used
119+
in query and eval.
120+
"""
121+
return re.sub(r'`(.*?)`',
122+
lambda m: com.clean_column_name_with_spaces(m.group(1)),
123+
source)
124+
125+
105126
def _compose2(f, g):
106127
"""Compose 2 callables"""
107128
return lambda *args, **kwargs: f(g(*args, **kwargs))
@@ -114,7 +135,7 @@ def _compose(*funcs):
114135

115136

116137
def _preparse(source, f=_compose(_replace_locals, _replace_booleans,
117-
_rewrite_assign)):
138+
_rewrite_assign), g=lambda x: x):
118139
"""Compose a collection of tokenization functions
119140
120141
Parameters
@@ -127,6 +148,9 @@ def _preparse(source, f=_compose(_replace_locals, _replace_booleans,
127148
to the composition of ``_rewrite_assign``, ``_replace_booleans``, and
128149
``_replace_locals``.
129150
151+
g : callable
152+
This takes a source string and returns an altered one.
153+
130154
Returns
131155
-------
132156
s : str
@@ -139,7 +163,8 @@ def _preparse(source, f=_compose(_replace_locals, _replace_booleans,
139163
the ``tokenize`` module and ``tokval`` is a string.
140164
"""
141165
assert callable(f), 'f must be callable'
142-
return tokenize.untokenize(lmap(f, tokenize_string(source)))
166+
assert callable(g), 'g must be callable'
167+
return tokenize.untokenize(lmap(f, tokenize_string(g(source))))
143168

144169

145170
def _is_type(t):
@@ -711,8 +736,10 @@ def visitor(x, y):
711736
class PandasExprVisitor(BaseExprVisitor):
712737

713738
def __init__(self, env, engine, parser,
714-
preparser=partial(_preparse, f=_compose(_replace_locals,
715-
_replace_booleans))):
739+
preparser=partial(_preparse,
740+
f=_compose(_replace_locals,
741+
_replace_booleans),
742+
g=_replace_spaces_backtickvariables)):
716743
super(PandasExprVisitor, self).__init__(env, engine, parser, preparser)
717744

718745

pandas/core/frame.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -3186,7 +3186,12 @@ def eval(self, expr, inplace=False, **kwargs):
31863186
kwargs['level'] = kwargs.pop('level', 0) + 1
31873187
if resolvers is None:
31883188
index_resolvers = self._get_index_resolvers()
3189-
resolvers = dict(self.iteritems()), index_resolvers
3189+
# Alter names with spaces so that they can be found by backtick
3190+
# quoting. see also pandas/core/computation/expr.py
3191+
# _replace_spaces_backtickvariables(source)
3192+
column_resolvers = {com.clean_column_name_with_spaces(k): v
3193+
for k, v in self.iteritems()}
3194+
resolvers = column_resolvers, index_resolvers
31903195
if 'target' not in kwargs:
31913196
kwargs['target'] = self
31923197
kwargs['resolvers'] = kwargs.get('resolvers', ()) + tuple(resolvers)

0 commit comments

Comments
 (0)