pandas-dev · jreback · Mar 20, 2019 · Jan 26, 2019 · Jan 26, 2019 · Feb 15, 2019
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -24,6 +24,7 @@ Other Enhancements
 - :meth:`DataFrame.at_time` and :meth:`Series.at_time` now support :meth:`datetime.time` objects with timezones (:issue:`24043`)
 - :meth:`DataFrame.set_index` now works for instances of ``abc.Iterator``, provided their output is of the same length as the calling frame (:issue:`22484`, :issue:`24984`)
 - :meth:`DatetimeIndex.union` now supports the ``sort`` argument. The behaviour of the sort parameter matches that of :meth:`Index.union` (:issue:`24994`)
+- :meth:`DataFrame.query` and :meth:`DataFrame.eval` now supports quoting column names with backticks to refer to names with spaces (:issue:`6508`)
 -
 
 .. _whatsnew_0250.api_breaking:

diff --git a/pandas/core/computation/common.py b/pandas/core/computation/common.py
@@ -1,9 +1,12 @@
 import numpy as np
 
-from pandas.compat import reduce
+from pandas.compat import reduce, string_types
 
 import pandas as pd
 
+# A token value Python's tokenizer probably will never use.
+_BACKTICK_QUOTED_STRING = 100
+
 
 def _ensure_decoded(s):
     """ if we have bytes, decode them to unicode """
@@ -22,5 +25,25 @@ def _result_type_many(*arrays_and_dtypes):
         return reduce(np.result_type, arrays_and_dtypes)
 
 
+def _clean_column_name_with_spaces(name):
+    """Check if name contains any spaces, if it contains any spaces
+    the spaces will be removed and an underscore suffix is added."""
+    if not isinstance(name, string_types) or " " not in name:
+        return name
+    return "_BACKTICK_QUOTED_STRING_" + name.replace(" ", "_")
+
+
+def _get_column_resolvers(dataFrame):
+    """Return the axis resolvers of a dataframe.
+
+    Column names with spaces are 'cleaned up' so that they can be referred to
+    by backtick quoting. See also :func:`_clean_spaces_backtick_quoted_names`
+    from :mod:`pandas.core.computation`
+    """
+
+    return {_clean_column_name_with_spaces(k): v for k, v
+            in dataFrame.iteritems()}
+
+
 class NameResolutionError(NameError):
     pass
diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py
@@ -3,16 +3,20 @@
 
 import ast
 from functools import partial
+import itertools as it
+import operator
 import tokenize
 
 import numpy as np
 
-from pandas.compat import StringIO, lmap, reduce, string_types, zip
+from pandas.compat import StringIO, lmap, map, reduce, string_types, zip
 
 import pandas as pd
 from pandas import compat
 from pandas.core import common as com
 from pandas.core.base import StringMixin
+from pandas.core.computation.common import (
+    _BACKTICK_QUOTED_STRING, _clean_column_name_with_spaces)
 from pandas.core.computation.ops import (
     _LOCAL_TAG, BinOp, Constant, Div, FuncNode, Op, Term, UnaryOp,
     UndefinedVariableError, _arith_ops_syms, _bool_ops_syms, _cmp_ops_syms,
@@ -31,7 +35,13 @@ def tokenize_string(source):
         A Python source code string
     """
     line_reader = StringIO(source).readline
-    for toknum, tokval, _, _, _ in tokenize.generate_tokens(line_reader):
+    token_generator = tokenize.generate_tokens(line_reader)
+    for toknum, tokval, _, _, _ in token_generator:
+        if tokval == '`':
+            tokval = " ".join(it.takewhile(
+                lambda tokval: tokval != '`',
+                map(operator.itemgetter(1), token_generator)))
+            toknum = _BACKTICK_QUOTED_STRING
         yield toknum, tokval
 
 
@@ -102,6 +112,31 @@ def _replace_locals(tok):
     return toknum, tokval
 
 
+def _clean_spaces_backtick_quoted_names(tok):
+    """Clean up a column name if surrounded by backticks.
+
+    Backtick quoted string are indicated by a certain tokval value. If a string
+    is a backtick quoted token it will processed by
+    :func:`_clean_column_name_with_spaces` so that the parser can find this
+    string when the query is executed. See also :func:`_get_column_resolvers`
+    used in :meth:`DataFrame.eval`.
+
+    Parameters
+    ----------
+    tok : tuple of int, str
+        ints correspond to the all caps constants in the tokenize module
+
+    Returns
+    -------
+    t : tuple of int, str
+        Either the input or token or the replacement values
+    """
+    toknum, tokval = tok
+    if toknum == _BACKTICK_QUOTED_STRING:
+        return tokenize.NAME, _clean_column_name_with_spaces(tokval)
+    return toknum, tokval
+
+
 def _compose2(f, g):
     """Compose 2 callables"""
     return lambda *args, **kwargs: f(g(*args, **kwargs))
@@ -114,7 +149,8 @@ def _compose(*funcs):
 
 
 def _preparse(source, f=_compose(_replace_locals, _replace_booleans,
-                                 _rewrite_assign)):
+                                 _rewrite_assign,
+                                 _clean_spaces_backtick_quoted_names)):
     """Compose a collection of tokenization functions
 
     Parameters
@@ -711,8 +747,9 @@ def visitor(x, y):
 class PandasExprVisitor(BaseExprVisitor):
 
     def __init__(self, env, engine, parser,
-                 preparser=partial(_preparse, f=_compose(_replace_locals,
-                                                         _replace_booleans))):
+                 preparser=partial(_preparse, f=_compose(
+                     _replace_locals, _replace_booleans,
+                     _clean_spaces_backtick_quoted_names))):
         super(PandasExprVisitor, self).__init__(env, engine, parser, preparser)
 
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2967,6 +2967,12 @@ def query(self, expr, inplace=False, **kwargs):
             The query string to evaluate.  You can refer to variables
             in the environment by prefixing them with an '@' character like
             ``@a + b``.
+
+            .. versionadded:: 0.25.0
+
+            You can refer to column names that contain spaces by surrounding
+            them in backticks like ```a a` + b``.
+
         inplace : bool
             Whether the query should modify the data in place or return
             a modified copy.
@@ -3159,8 +3165,11 @@ def eval(self, expr, inplace=False, **kwargs):
         resolvers = kwargs.pop('resolvers', None)
         kwargs['level'] = kwargs.pop('level', 0) + 1
         if resolvers is None:
+            from pandas.core.computation.common import _get_column_resolvers
+
             index_resolvers = self._get_index_resolvers()
-            resolvers = dict(self.iteritems()), index_resolvers
+            column_resolvers = _get_column_resolvers(self)
+            resolvers = column_resolvers, index_resolvers
         if 'target' not in kwargs:
             kwargs['target'] = self
         kwargs['resolvers'] = kwargs.get('resolvers', ()) + tuple(resolvers)

diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py
@@ -1030,3 +1030,54 @@ def test_invalid_type_for_operator_raises(self, parser, engine, op):
 
         with pytest.raises(TypeError, match=msg):
             df.eval('a {0} b'.format(op), engine=engine, parser=parser)
+
+
+class TestDataFrameQueryBacktickQuoting(object):
+
+    @pytest.fixture(scope='class')
+    def df(self):
+        yield DataFrame({'A': [1, 2, 3],
+                         'B B': [3, 2, 1],
+                         'C C': [4, 5, 6],
+                         'C_C': [8, 9, 10],
+                         'D_D D': [11, 1, 101]})
+
+    def test_single_backtick_variable_query(self, df):
+        res = df.query('1 < `B B`')
+        expect = df[1 < df['B B']]
+        assert_frame_equal(res, expect)
+
+    def test_two_backtick_variables_query(self, df):
+        res = df.query('1 < `B B` and 4 < `C C`')
+        expect = df[(1 < df['B B']) & (4 < df['C C'])]
+        assert_frame_equal(res, expect)
+
+    def test_single_backtick_variable_expr(self, df):
+        res = df.eval('A + `B B`')
+        expect = df['A'] + df['B B']
+        assert_series_equal(res, expect)
+
+    def test_two_backtick_variables_expr(self, df):
+        res = df.eval('`B B` + `C C`')
+        expect = df['B B'] + df['C C']
+        assert_series_equal(res, expect)
+
+    def test_already_underscore_variable(self, df):
+        res = df.eval('`C_C` + A')
+        expect = df['C_C'] + df['A']
+        assert_series_equal(res, expect)
+
+    def test_same_name_but_underscores(self, df):
+        res = df.eval('C_C + `C C`')
+        expect = df['C_C'] + df['C C']
+        assert_series_equal(res, expect)
+
+    def test_mixed_underscores_and_spaces(self, df):
+        res = df.eval('A + `D_D D`')
+        expect = df['A'] + df['D_D D']
+        assert_series_equal(res, expect)
+
+    def backtick_quote_name_with_no_spaces(self, df):
+        res = df.eval('A + `C_C`')
+        expect = df['A'] + df['C_C']
+        assert_series_equal(res, expect)