From ad0e9b8b5968f2c06b12569067395e3fbed5ed0d Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Sun, 21 Jul 2024 16:42:25 +0200
Subject: [PATCH 01/32] escape unescape sharp, single quote, double quote

---
 pandas/core/frame.py                  | 41 ++++++++++++++-----
 pandas/tests/computation/test_eval.py | 58 +++++++++++++++++++++++++++
 2 files changed, 90 insertions(+), 9 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index b8039746d9952..364bcd034b42c 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -34,6 +34,7 @@
     cast,
     overload,
 )
+import urllib.parse
 import warnings
 
 import numpy as np
@@ -4559,14 +4560,8 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | No
         For other characters that fall outside the ASCII range (U+0001..U+007F)
         and those that are not further specified in PEP 3131,
         the query parser will raise an error.
-        This excludes whitespace different than the space character,
-        but also the hashtag (as it is used for comments) and the backtick
-        itself (backtick can also not be escaped).
-
-        In a special case, quotes that make a pair around a backtick can
-        confuse the parser.
-        For example, ```it's` > `that's``` will raise an error,
-        as it forms a quoted string (``'s > `that'``) with a backtick inside.
+        This excludes whitespace different than the space character
+        and the backtick itself (backtick cannot be escaped).
 
         See also the `Python documentation about lexical analysis
         <https://docs.python.org/3/reference/lexical_analysis.html>`__
@@ -4620,7 +4615,35 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | No
             raise ValueError(msg)
         kwargs["level"] = kwargs.pop("level", 0) + 1
         kwargs["target"] = None
-        res = self.eval(expr, **kwargs)
+
+        # GH 59285
+        if any(("#" in col) or ("'" in col) or ('"' in col) for col in self.columns):
+            # Create a copy of `self` with column names escaped
+            escaped_self = self.copy()
+            escaped_self.columns = [
+                urllib.parse.quote(col) for col in escaped_self.columns
+            ]
+
+            # In expr, escape column names between backticks
+            column_name_to_escaped_name = {
+                col: urllib.parse.quote(col) for col in self.columns
+            }
+            escaped_expr = "`".join(
+                (column_name_to_escaped_name.get(token, token) if (i % 2) else token)
+                for i, token in enumerate(expr.split("`"))
+            )
+
+            # eval
+            escaped_res = escaped_self.eval(escaped_expr, **kwargs)
+
+            # If `res` is a Series or DataFrame, unescape names
+            res = escaped_res.copy()
+            if isinstance(res, Series) and res.name:
+                res.name = urllib.parse.unquote(res.name)
+            elif isinstance(res, DataFrame):
+                res.columns = [urllib.parse.unquote(col) for col in res.columns]
+        else:
+            res = self.eval(expr, **kwargs)
 
         try:
             result = self.loc[res]
diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
index 31d568d7c1e0c..e6edbd418837f 100644
--- a/pandas/tests/computation/test_eval.py
+++ b/pandas/tests/computation/test_eval.py
@@ -1978,6 +1978,64 @@ def test_eval_no_support_column_name(request, column):
     tm.assert_frame_equal(result, expected)
 
 
+def test_query_on_column_name_with_hashtag_character():
+    # GH 59285
+    df = DataFrame((1, 2, 3), columns=["a#"])
+    result = df.query("`a#` < 2")
+    expected = df[df["a#"] < 2]
+    tm.assert_frame_equal(result, expected)
+
+
+def test_query_on_expr_with_comment():
+    # GH 59285
+    df = DataFrame((1, 2, 3), columns=["a#"])
+    result = df.query("`a#` < 2  # This is a comment")
+    expected = df[df["a#"] < 2]
+    tm.assert_frame_equal(result, expected)
+
+
+def test_query_on_column_names_with_single_quote_character():
+    df = DataFrame(
+        [
+            {"it's": 1, "that's": 2},
+            {"it's": 3, "that's": 4},
+            {"it's": -1, "that's": -2},
+            {"it's": -3, "that's": -4},
+        ]
+    )
+    result = df.query("`it's` < `that's`")
+    expected = df[df["it's"] < df["that's"]]
+    tm.assert_frame_equal(result, expected)
+
+
+def test_query_on_column_names_with_double_quote_character():
+    df = DataFrame(
+        [
+            {'it"s': 1, 'that"s': 2},
+            {'it"s': 3, 'that"s': 4},
+            {'it"s': -1, 'that"s': -2},
+            {'it"s': -3, 'that"s': -4},
+        ]
+    )
+    result = df.query('`it"s` < `that"s`')
+    expected = df[df['it"s'] < df['that"s']]
+    tm.assert_frame_equal(result, expected)
+
+
+def test_query_on_column_names_with_single_quote_and_double_quote_character():
+    df = DataFrame(
+        [
+            {"it's": 1, 'that\'s "nice"': 2},
+            {"it's": 3, 'that\'s "nice"': 4},
+            {"it's": -1, 'that\'s "nice"': -2},
+            {"it's": -3, 'that\'s "nice"': -4},
+        ]
+    )
+    result = df.query("`it's` < `that's \"nice\"`")
+    expected = df[df["it's"] < df['that\'s "nice"']]
+    tm.assert_frame_equal(result, expected)
+
+
 def test_set_inplace():
     # https://github.com/pandas-dev/pandas/issues/47449
     # Ensure we don't only update the DataFrame inplace, but also the actual

From 41fb2c80e873e50e9c9767feaf4e1e5c818d5943 Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Sun, 21 Jul 2024 16:53:20 +0200
Subject: [PATCH 02/32] parametrize and add tests

---
 pandas/tests/computation/test_eval.py | 52 +++++++++------------------
 1 file changed, 17 insertions(+), 35 deletions(-)

diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
index e6edbd418837f..3e7fc62c4b48c 100644
--- a/pandas/tests/computation/test_eval.py
+++ b/pandas/tests/computation/test_eval.py
@@ -1994,45 +1994,27 @@ def test_query_on_expr_with_comment():
     tm.assert_frame_equal(result, expected)
 
 
-def test_query_on_column_names_with_single_quote_character():
-    df = DataFrame(
-        [
-            {"it's": 1, "that's": 2},
-            {"it's": 3, "that's": 4},
-            {"it's": -1, "that's": -2},
-            {"it's": -3, "that's": -4},
-        ]
-    )
-    result = df.query("`it's` < `that's`")
-    expected = df[df["it's"] < df["that's"]]
-    tm.assert_frame_equal(result, expected)
-
-
-def test_query_on_column_names_with_double_quote_character():
-    df = DataFrame(
-        [
-            {'it"s': 1, 'that"s': 2},
-            {'it"s': 3, 'that"s': 4},
-            {'it"s': -1, 'that"s': -2},
-            {'it"s': -3, 'that"s': -4},
-        ]
-    )
-    result = df.query('`it"s` < `that"s`')
-    expected = df[df['it"s'] < df['that"s']]
-    tm.assert_frame_equal(result, expected)
-
-
-def test_query_on_column_names_with_single_quote_and_double_quote_character():
+@pytest.mark.parametrize(
+    "col1,col2,expr",
+    [
+        ("it's", "that's", "`it's` < `that's`"),
+        ('it"s', 'that"s', '`it"s` < `that"s`'),
+        ("it's", 'that\'s "nice"', "`it's` < `that's \"nice\"`"),
+        ("it's", "that's #cool", "`it's` < `that's #cool` # This is a comment"),
+    ],
+)
+def test_query_on_column_names_with_special_characters(col1, col2, expr):
+    # GH 59285
     df = DataFrame(
         [
-            {"it's": 1, 'that\'s "nice"': 2},
-            {"it's": 3, 'that\'s "nice"': 4},
-            {"it's": -1, 'that\'s "nice"': -2},
-            {"it's": -3, 'that\'s "nice"': -4},
+            {col1: 1, col2: 2},
+            {col1: 3, col2: 4},
+            {col1: -1, col2: -2},
+            {col1: -3, col2: -4},
         ]
     )
-    result = df.query("`it's` < `that's \"nice\"`")
-    expected = df[df["it's"] < df['that\'s "nice"']]
+    result = df.query(expr)
+    expected = df[df[col1] < df[col2]]
     tm.assert_frame_equal(result, expected)
 
 

From c756fc34ab2d3b2aacc613df7cba76ba7c73be5e Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Sun, 21 Jul 2024 17:48:21 +0200
Subject: [PATCH 03/32] reinstate text in docs, shorten some lines

---
 pandas/core/frame.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 364bcd034b42c..20f1fc9f750d4 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4560,8 +4560,9 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | No
         For other characters that fall outside the ASCII range (U+0001..U+007F)
         and those that are not further specified in PEP 3131,
         the query parser will raise an error.
-        This excludes whitespace different than the space character
-        and the backtick itself (backtick cannot be escaped).
+        This excludes whitespace different than the space character,
+        but also the hashtag (as it is used for comments) and the backtick
+        itself (backtick can also not be escaped).
 
         See also the `Python documentation about lexical analysis
         <https://docs.python.org/3/reference/lexical_analysis.html>`__
@@ -4620,16 +4621,15 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | No
         if any(("#" in col) or ("'" in col) or ('"' in col) for col in self.columns):
             # Create a copy of `self` with column names escaped
             escaped_self = self.copy()
-            escaped_self.columns = [
-                urllib.parse.quote(col) for col in escaped_self.columns
-            ]
+            escaped_self.columns = map(urllib.parse.quote, escaped_self.columns)
 
             # In expr, escape column names between backticks
-            column_name_to_escaped_name = {
+            column_name_to_escaped = {
                 col: urllib.parse.quote(col) for col in self.columns
             }
+            # Odd-number indexes are column names
             escaped_expr = "`".join(
-                (column_name_to_escaped_name.get(token, token) if (i % 2) else token)
+                (column_name_to_escaped.get(token, token) if (i % 2) else token)
                 for i, token in enumerate(expr.split("`"))
             )
 
@@ -4641,7 +4641,7 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | No
             if isinstance(res, Series) and res.name:
                 res.name = urllib.parse.unquote(res.name)
             elif isinstance(res, DataFrame):
-                res.columns = [urllib.parse.unquote(col) for col in res.columns]
+                res.columns = map(urllib.parse.unquote, res.columns)
         else:
             res = self.eval(expr, **kwargs)
 

From d4707b65eb91dd81aea200c28efe567d31a9b504 Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Sun, 21 Jul 2024 18:02:18 +0200
Subject: [PATCH 04/32] update whatsnew

---
 doc/source/whatsnew/v3.0.0.rst | 1 +
 pandas/core/frame.py           | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 3de65fe6f682c..19a0439cb20ce 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -647,6 +647,7 @@ Other
 - Bug in :meth:`DataFrame.apply` where passing ``engine="numba"`` ignored ``args`` passed to the applied function (:issue:`58712`)
 - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`)
 - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`)
+- Bug in :meth:`DataFrame.query` which raised a ``KeyError`` when the expression contained column names with  characters like ``#``. (:issue:`59285`)
 - Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`)
 - Bug in :meth:`DataFrame.transform` that was returning the wrong order unless the index was monotonically increasing. (:issue:`57069`)
 - Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 20f1fc9f750d4..89ba9830c86a6 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4627,7 +4627,7 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | No
             column_name_to_escaped = {
                 col: urllib.parse.quote(col) for col in self.columns
             }
-            # Odd-number indexes are column names
+            # A `token` with an odd-number index is a column name
             escaped_expr = "`".join(
                 (column_name_to_escaped.get(token, token) if (i % 2) else token)
                 for i, token in enumerate(expr.split("`"))

From ba50d912924c68fa0f25091ba38fed565f465a46 Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Sun, 21 Jul 2024 18:03:58 +0200
Subject: [PATCH 05/32] minor: double space to single space

---
 doc/source/whatsnew/v3.0.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 19a0439cb20ce..b5f7ebc26c2bb 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -647,7 +647,7 @@ Other
 - Bug in :meth:`DataFrame.apply` where passing ``engine="numba"`` ignored ``args`` passed to the applied function (:issue:`58712`)
 - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`)
 - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`)
-- Bug in :meth:`DataFrame.query` which raised a ``KeyError`` when the expression contained column names with  characters like ``#``. (:issue:`59285`)
+- Bug in :meth:`DataFrame.query` which raised a ``KeyError`` when the expression contained column names with characters like ``#``. (:issue:`59285`)
 - Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`)
 - Bug in :meth:`DataFrame.transform` that was returning the wrong order unless the index was monotonically increasing. (:issue:`57069`)
 - Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`)

From aaffbba09ebf137a4f280ac9b101217a8ed6c1ec Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Sat, 27 Jul 2024 20:25:54 +0200
Subject: [PATCH 06/32] move to parsing.py, split better, add tests

---
 pandas/core/computation/parsing.py    | 80 ++++++++++++++++++++++++++-
 pandas/core/frame.py                  | 29 +---------
 pandas/tests/computation/test_eval.py | 16 ++++++
 3 files changed, 95 insertions(+), 30 deletions(-)

diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py
index 8fbf8936d31ef..d5e2c5a9e8864 100644
--- a/pandas/core/computation/parsing.py
+++ b/pandas/core/computation/parsing.py
@@ -4,7 +4,10 @@
 
 from __future__ import annotations
 
-from io import StringIO
+from io import (
+    BytesIO,
+    StringIO,
+)
 from keyword import iskeyword
 import token
 import tokenize
@@ -58,7 +61,7 @@ def create_valid_python_identifier(name: str) -> str:
             "'": "_SINGLEQUOTE_",
             '"': "_DOUBLEQUOTE_",
             # Currently not possible. Terminates parser and won't find backtick.
-            # "#": "_HASH_",
+            "#": "_HASH_",
         }
     )
 
@@ -168,6 +171,69 @@ def tokenize_backtick_quoted_string(
     return BACKTICK_QUOTED_STRING, source[string_start:string_end]
 
 
+def split_by_backtick(s: str) -> list[tuple[bool, str]]:
+    substrings = []
+    substring = ""
+    i = 0
+    while i < len(s):
+        backtick_index = s.find("`", i)
+
+        # No backticks
+        if backtick_index == -1:
+            substrings.append((False, substring + s[i:]))
+            break
+
+        single_quote_index = s.find("'", i)
+        double_quote_index = s.find('"', i)
+        if (single_quote_index == -1) and (double_quote_index == -1):
+            quote_index = -1
+        elif single_quote_index == -1:
+            quote_index = double_quote_index
+        elif double_quote_index == -1:
+            quote_index = single_quote_index
+        else:
+            quote_index = min(single_quote_index, double_quote_index)
+
+        # No quotes
+        if quote_index == -1:
+            next_backtick_index = s.find("`", backtick_index + 1)
+        # Backtick opened before quote
+        elif backtick_index < quote_index:
+            next_backtick_index = s.find("`", backtick_index + 1)
+        # Quote opened before backtick
+        else:
+            next_quote_index = -1
+            line_reader = BytesIO(s[i:].encode("utf-8")).readline
+            token_generator = tokenize.tokenize(line_reader)
+            for toknum, _, (_, _), (_, end), _ in token_generator:
+                if toknum == tokenize.STRING:
+                    next_quote_index = i + end - 1
+                    break
+
+            # Quote is unmatched
+            if next_quote_index == -1:
+                next_backtick_index = s.find("`", backtick_index + 1)
+            # Quote is matched
+            else:
+                substring += s[i:next_quote_index]
+                i = next_quote_index
+                continue
+
+        # Backtick is unmatched
+        if next_backtick_index == -1:
+            substrings.append((False, substring + s[i:]))
+            break
+        # Backtick is matched
+        else:
+            if i != backtick_index:
+                substrings.append((False, substring + s[i:backtick_index]))
+            substrings.append((True, s[backtick_index : next_backtick_index + 1]))
+            substring = ""
+            i = next_backtick_index + 1
+
+    return substrings
+
+
 def tokenize_string(source: str) -> Iterator[tuple[int, str]]:
     """
     Tokenize a Python source code string.
@@ -182,6 +248,16 @@ def tokenize_string(source: str) -> Iterator[tuple[int, str]]:
     tok_generator : Iterator[Tuple[int, str]]
         An iterator yielding all tokens with only toknum and tokval (Tuple[ing, str]).
     """
+    # GH 59285
+    source = "".join(
+        (
+            f"`{create_valid_python_identifier(substring[1:-1])}`"
+            if is_backticked
+            else substring
+        )
+        for is_backticked, substring in split_by_backtick(source)
+    )
+
     line_reader = StringIO(source).readline
     token_generator = tokenize.generate_tokens(line_reader)
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 89ba9830c86a6..f2d87ee36490a 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -34,7 +34,6 @@
     cast,
     overload,
 )
-import urllib.parse
 import warnings
 
 import numpy as np
@@ -4617,33 +4616,7 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | No
         kwargs["level"] = kwargs.pop("level", 0) + 1
         kwargs["target"] = None
 
-        # GH 59285
-        if any(("#" in col) or ("'" in col) or ('"' in col) for col in self.columns):
-            # Create a copy of `self` with column names escaped
-            escaped_self = self.copy()
-            escaped_self.columns = map(urllib.parse.quote, escaped_self.columns)
-
-            # In expr, escape column names between backticks
-            column_name_to_escaped = {
-                col: urllib.parse.quote(col) for col in self.columns
-            }
-            # A `token` with an odd-number index is a column name
-            escaped_expr = "`".join(
-                (column_name_to_escaped.get(token, token) if (i % 2) else token)
-                for i, token in enumerate(expr.split("`"))
-            )
-
-            # eval
-            escaped_res = escaped_self.eval(escaped_expr, **kwargs)
-
-            # If `res` is a Series or DataFrame, unescape names
-            res = escaped_res.copy()
-            if isinstance(res, Series) and res.name:
-                res.name = urllib.parse.unquote(res.name)
-            elif isinstance(res, DataFrame):
-                res.columns = map(urllib.parse.unquote, res.columns)
-        else:
-            res = self.eval(expr, **kwargs)
+        res = self.eval(expr, **kwargs)
 
         try:
             result = self.loc[res]
diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
index 3e7fc62c4b48c..02833ba60ed8d 100644
--- a/pandas/tests/computation/test_eval.py
+++ b/pandas/tests/computation/test_eval.py
@@ -1994,6 +1994,22 @@ def test_query_on_expr_with_comment():
     tm.assert_frame_equal(result, expected)
 
 
+def test_query_on_expr_with_backticks():
+    # GH 59285
+    df = DataFrame(("`", "`````", "``````````"), columns=["#backticks"])
+    result = df.query("'`' < `#backticks`")
+    expected = df["`" < df["#backticks"]]
+    tm.assert_frame_equal(result, expected)
+
+
+def test_query_on_expr_with_backticked_string_same_as_column_name():
+    # GH 59285
+    df = DataFrame(("`", "`````", "``````````"), columns=["#backticks"])
+    result = df.query("'`#backticks`' < `#backticks`")
+    expected = df["`#backticks`" < df["#backticks"]]
+    tm.assert_frame_equal(result, expected)
+
+
 @pytest.mark.parametrize(
     "col1,col2,expr",
     [

From 0c75550b60d8744c2221adc1d28efa366f4c7dc5 Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Sat, 27 Jul 2024 20:48:16 +0200
Subject: [PATCH 07/32] clean up

---
 pandas/core/computation/parsing.py | 38 +++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py
index d5e2c5a9e8864..0cb681a88765a 100644
--- a/pandas/core/computation/parsing.py
+++ b/pandas/core/computation/parsing.py
@@ -171,7 +171,24 @@ def tokenize_backtick_quoted_string(
     return BACKTICK_QUOTED_STRING, source[string_start:string_end]
 
 
-def split_by_backtick(s: str) -> list[tuple[bool, str]]:
+def _split_by_backtick(s: str) -> list[tuple[bool, str]]:
+    """
+    Splits a str into substrings along backtick characters (`).
+
+    Disregards backticks inside quotes.
+
+    Parameters
+    ----------
+    s : str
+        The Python source code string.
+
+    Returns
+    -------
+    substrings: list[tuple[bool, str]]
+        List of tuples, where each tuple has two elements:
+        The first is a boolean indicating if the substring is backtick-quoted.
+        The second is the actual substring.
+    """
     substrings = []
     substring = ""
     i = 0
@@ -249,13 +266,14 @@ def tokenize_string(source: str) -> Iterator[tuple[int, str]]:
         An iterator yielding all tokens with only toknum and tokval (Tuple[ing, str]).
     """
     # GH 59285
+    # Escape characters, including backticks
     source = "".join(
         (
-            f"`{create_valid_python_identifier(substring[1:-1])}`"
-            if is_backticked
+            create_valid_python_identifier(substring[1:-1])
+            if is_backtick_quoted
             else substring
         )
-        for is_backticked, substring in split_by_backtick(source)
+        for is_backtick_quoted, substring in _split_by_backtick(source)
     )
 
     line_reader = StringIO(source).readline
@@ -263,13 +281,5 @@ def tokenize_string(source: str) -> Iterator[tuple[int, str]]:
 
     # Loop over all tokens till a backtick (`) is found.
     # Then, take all tokens till the next backtick to form a backtick quoted string
-    for toknum, tokval, start, _, _ in token_generator:
-        if tokval == "`":
-            try:
-                yield tokenize_backtick_quoted_string(
-                    token_generator, source, string_start=start[1] + 1
-                )
-            except Exception as err:
-                raise SyntaxError(f"Failed to parse backticks in '{source}'.") from err
-        else:
-            yield toknum, tokval
+    for toknum, tokval, _, _, _ in token_generator:
+        yield toknum, tokval

From 90c5dbcfc44a5125843db9f1b7f1f295c2fc3379 Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Sat, 27 Jul 2024 20:50:36 +0200
Subject: [PATCH 08/32] remove old comment

---
 pandas/core/computation/parsing.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py
index 0cb681a88765a..de0a4f8774c15 100644
--- a/pandas/core/computation/parsing.py
+++ b/pandas/core/computation/parsing.py
@@ -279,7 +279,5 @@ def tokenize_string(source: str) -> Iterator[tuple[int, str]]:
     line_reader = StringIO(source).readline
     token_generator = tokenize.generate_tokens(line_reader)
 
-    # Loop over all tokens till a backtick (`) is found.
-    # Then, take all tokens till the next backtick to form a backtick quoted string
     for toknum, tokval, _, _, _ in token_generator:
         yield toknum, tokval

From c0ee651d444d9a5542121ce4a8343b342fb1a018 Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Sat, 27 Jul 2024 21:04:11 +0200
Subject: [PATCH 09/32] test names

---
 pandas/tests/computation/test_eval.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
index 02833ba60ed8d..2ff2f8c7162b6 100644
--- a/pandas/tests/computation/test_eval.py
+++ b/pandas/tests/computation/test_eval.py
@@ -1994,7 +1994,7 @@ def test_query_on_expr_with_comment():
     tm.assert_frame_equal(result, expected)
 
 
-def test_query_on_expr_with_backticks():
+def test_query_on_expr_with_string_with_backticks():
     # GH 59285
     df = DataFrame(("`", "`````", "``````````"), columns=["#backticks"])
     result = df.query("'`' < `#backticks`")
@@ -2002,7 +2002,7 @@ def test_query_on_expr_with_backticks():
     tm.assert_frame_equal(result, expected)
 
 
-def test_query_on_expr_with_backticked_string_same_as_column_name():
+def test_query_on_expr_with_string_with_backticked_substring_same_as_column_name():
     # GH 59285
     df = DataFrame(("`", "`````", "``````````"), columns=["#backticks"])
     result = df.query("'`#backticks`' < `#backticks`")

From b7dc1a8173ba80209532950db7847e46a49abcce Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Sat, 27 Jul 2024 21:13:53 +0200
Subject: [PATCH 10/32] minor test change

---
 pandas/tests/computation/test_eval.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
index 2ff2f8c7162b6..792fa97d97640 100644
--- a/pandas/tests/computation/test_eval.py
+++ b/pandas/tests/computation/test_eval.py
@@ -1997,8 +1997,8 @@ def test_query_on_expr_with_comment():
 def test_query_on_expr_with_string_with_backticks():
     # GH 59285
     df = DataFrame(("`", "`````", "``````````"), columns=["#backticks"])
-    result = df.query("'`' < `#backticks`")
-    expected = df["`" < df["#backticks"]]
+    result = df.query("'```' < `#backticks`")
+    expected = df["```" < df["#backticks"]]
     tm.assert_frame_equal(result, expected)
 
 

From 164e3c50c9d655af0c5155cc34c8e407e067b8cc Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Sat, 27 Jul 2024 23:54:46 +0200
Subject: [PATCH 11/32] improve splitting

---
 pandas/core/computation/parsing.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py
index de0a4f8774c15..f7ab8168614f4 100644
--- a/pandas/core/computation/parsing.py
+++ b/pandas/core/computation/parsing.py
@@ -227,16 +227,17 @@ def _split_by_backtick(s: str) -> list[tuple[bool, str]]:
                     next_quote_index = i + end - 1
                     break
 
-            # Quote is unmatched
+            # Quote is unmatched (Possibly a mistake)
             if next_quote_index == -1:
-                next_backtick_index = s.find("`", backtick_index + 1)
+                substrings.append((False, substring + s[i:]))
+                break
             # Quote is matched
             else:
-                substring += s[i:next_quote_index]
-                i = next_quote_index
+                substring += s[i : next_quote_index + 1]
+                i = next_quote_index + 1
                 continue
 
-        # Backtick is unmatched
+        # Backtick is unmatched (Possibly a mistake)
         if next_backtick_index == -1:
             substrings.append((False, substring + s[i:]))
             break

From 4040370a21a055490293387e0d41401861d34e0e Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Sun, 28 Jul 2024 00:18:14 +0200
Subject: [PATCH 12/32] fix splitting

---
 pandas/core/computation/parsing.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py
index f7ab8168614f4..9646068c6bca1 100644
--- a/pandas/core/computation/parsing.py
+++ b/pandas/core/computation/parsing.py
@@ -231,7 +231,11 @@ def _split_by_backtick(s: str) -> list[tuple[bool, str]]:
             if next_quote_index == -1:
                 substrings.append((False, substring + s[i:]))
                 break
-            # Quote is matched
+            # Quote is matched, and the next quote is at the end of the string
+            elif next_quote_index + 1 == len(s):
+                substrings.append((False, substring + s[i:]))
+                break
+            # Quote is matched, and the next quote is in the middle of the string
             else:
                 substring += s[i : next_quote_index + 1]
                 i = next_quote_index + 1

From 148d1ed2aeebbaa0090e4beb72037532682c845f Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Sun, 28 Jul 2024 00:39:59 +0200
Subject: [PATCH 13/32] improve splitting

---
 pandas/core/computation/parsing.py | 31 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py
index 9646068c6bca1..a8df3a738a389 100644
--- a/pandas/core/computation/parsing.py
+++ b/pandas/core/computation/parsing.py
@@ -211,12 +211,23 @@ def _split_by_backtick(s: str) -> list[tuple[bool, str]]:
         else:
             quote_index = min(single_quote_index, double_quote_index)
 
-        # No quotes
-        if quote_index == -1:
-            next_backtick_index = s.find("`", backtick_index + 1)
+        # No quotes, or
         # Backtick opened before quote
-        elif backtick_index < quote_index:
+        if (quote_index == -1) or (backtick_index < quote_index):
             next_backtick_index = s.find("`", backtick_index + 1)
+
+            # Backtick is unmatched (Possibly a mistake)
+            if next_backtick_index == -1:
+                substrings.append((False, substring + s[i:]))
+                break
+            # Backtick is matched
+            else:
+                if i != backtick_index:
+                    substrings.append((False, substring + s[i:backtick_index]))
+                substrings.append((True, s[backtick_index : next_backtick_index + 1]))
+                substring = ""
+                i = next_backtick_index + 1
+
         # Quote opened before backtick
         else:
             next_quote_index = -1
@@ -241,18 +252,6 @@ def _split_by_backtick(s: str) -> list[tuple[bool, str]]:
                 i = next_quote_index + 1
                 continue
 
-        # Backtick is unmatched (Possibly a mistake)
-        if next_backtick_index == -1:
-            substrings.append((False, substring + s[i:]))
-            break
-        # Backtick is matched
-        else:
-            if i != backtick_index:
-                substrings.append((False, substring + s[i:backtick_index]))
-            substrings.append((True, s[backtick_index : next_backtick_index + 1]))
-            substring = ""
-            i = next_backtick_index + 1
-
     return substrings
 
 

From 990d0d392f23e7c6b6986c0c8bb047de2c2e467a Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Sun, 28 Jul 2024 01:44:24 +0200
Subject: [PATCH 14/32] add tests

---
 pandas/core/computation/parsing.py    | 12 ++----
 pandas/tests/computation/test_eval.py | 61 +++++++++++++++++++++++++++
 2 files changed, 65 insertions(+), 8 deletions(-)

diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py
index a8df3a738a389..12991abc7f399 100644
--- a/pandas/core/computation/parsing.py
+++ b/pandas/core/computation/parsing.py
@@ -216,13 +216,13 @@ def _split_by_backtick(s: str) -> list[tuple[bool, str]]:
         if (quote_index == -1) or (backtick_index < quote_index):
             next_backtick_index = s.find("`", backtick_index + 1)
 
-            # Backtick is unmatched (Possibly a mistake)
+            # Backtick is unmatched (Bad syntax)
             if next_backtick_index == -1:
                 substrings.append((False, substring + s[i:]))
                 break
             # Backtick is matched
             else:
-                if i != backtick_index:
+                if substring or (i != backtick_index):
                     substrings.append((False, substring + s[i:backtick_index]))
                 substrings.append((True, s[backtick_index : next_backtick_index + 1]))
                 substring = ""
@@ -238,19 +238,15 @@ def _split_by_backtick(s: str) -> list[tuple[bool, str]]:
                     next_quote_index = i + end - 1
                     break
 
-            # Quote is unmatched (Possibly a mistake)
-            if next_quote_index == -1:
-                substrings.append((False, substring + s[i:]))
-                break
+            # Quote is unmatched (Bad syntax), or
             # Quote is matched, and the next quote is at the end of the string
-            elif next_quote_index + 1 == len(s):
+            if (next_quote_index == -1) or (next_quote_index + 1 == len(s)):
                 substrings.append((False, substring + s[i:]))
                 break
             # Quote is matched, and the next quote is in the middle of the string
             else:
                 substring += s[i : next_quote_index + 1]
                 i = next_quote_index + 1
-                continue
 
     return substrings
 
diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
index 792fa97d97640..d2547a02fba03 100644
--- a/pandas/tests/computation/test_eval.py
+++ b/pandas/tests/computation/test_eval.py
@@ -2034,6 +2034,67 @@ def test_query_on_column_names_with_special_characters(col1, col2, expr):
     tm.assert_frame_equal(result, expected)
 
 
+def test_query_on_expr_with_no_backticks():
+    # GH 59285
+    df = DataFrame(("aaa", "vvv", "zzz"), columns=["column_name"])
+    result = df.query("'value' < column_name")
+    expected = df["value" < df["column_name"]]
+    tm.assert_frame_equal(result, expected)
+
+
+def test_query_on_expr_with_no_quotes_and_backtick_is_unmatched():
+    # GH 59285
+    df = DataFrame((1, 5, 10), columns=["column-name"])
+    with pytest.raises(SyntaxError, match="invalid syntax"):
+        df.query("5 < `column-name")
+
+
+def test_query_on_expr_with_no_quotes_and_backtick_is_matched():
+    # GH 59285
+    df = DataFrame((1, 5, 10), columns=["column-name"])
+    result = df.query("5 < `column-name`")
+    expected = df[5 < df["column-name"]]
+    tm.assert_frame_equal(result, expected)
+
+
+def test_query_on_expr_with_backtick_opened_before_quote_and_backtick_is_unmatched():
+    # GH 59285
+    df = DataFrame((1, 5, 10), columns=["It's"])
+    with pytest.raises(SyntaxError, match="unterminated string literal"):
+        df.query("5 < `It's")
+
+
+def test_query_on_expr_with_backtick_opened_before_quote_and_backtick_is_matched():
+    # GH 59285
+    df = DataFrame((1, 5, 10), columns=["It's"])
+    result = df.query("5 < `It's`")
+    expected = df[5 < df["It's"]]
+    tm.assert_frame_equal(result, expected)
+
+
+def test_query_on_expr_with_quote_opened_before_backtick_and_quote_is_unmatched():
+    # GH 59285
+    df = DataFrame(("aaa", "vvv", "zzz"), columns=['It`s that\\\'s "quote" #hash'])
+    with pytest.raises(SyntaxError, match="unterminated string literal"):
+        df.query("`column-name` < 'It`s that\\'s \"quote\" #hash")
+
+
+def test_query_on_expr_with_quote_opened_before_backtick_and_quote_is_matched_at_end():
+    # GH 59285
+    df = DataFrame(("aaa", "vvv", "zzz"), columns=["column-name"])
+    result = df.query("`column-name` < 'It`s that\\'s \"quote\" #hash'")
+    expected = df[df["column-name"] < 'It`s that\'s "quote" #hash']
+    tm.assert_frame_equal(result, expected)
+
+
+def test_query_on_expr_with_quote_opened_before_backtick_and_quote_is_matched_in_mid():
+    # GH 59285
+    df = DataFrame(("aaa", "vvv", "zzz"), columns=["column-name"])
+    result = df.query("'It`s that\\'s \"quote\" #hash' < `column-name`")
+    expected = df['It`s that\'s "quote" #hash' < df["column-name"]]
+    tm.assert_frame_equal(result, expected)
+
+
 def test_set_inplace():
     # https://github.com/pandas-dev/pandas/issues/47449
     # Ensure we don't only update the DataFrame inplace, but also the actual

From e674eb83236d6e832ebd76497e60d1aaa4ad3fce Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Sun, 28 Jul 2024 02:29:53 +0200
Subject: [PATCH 15/32] edit docstring and comments

---
 pandas/core/computation/parsing.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py
index 12991abc7f399..5030d1728d799 100644
--- a/pandas/core/computation/parsing.py
+++ b/pandas/core/computation/parsing.py
@@ -35,9 +35,8 @@ def create_valid_python_identifier(name: str) -> str:
     ------
     SyntaxError
         If the returned name is not a Python valid identifier, raise an exception.
-        This can happen if there is a hashtag in the name, as the tokenizer will
-        than terminate and not find the backtick.
-        But also for characters that fall out of the range of (U+0001..U+007F).
+        This can happen if the name includes characters that fall out of the range of
+        (U+0001..U+007F).
     """
     if name.isidentifier() and not iskeyword(name):
         return name
@@ -60,7 +59,6 @@ def create_valid_python_identifier(name: str) -> str:
             # Including quotes works, but there are exceptions.
             "'": "_SINGLEQUOTE_",
             '"': "_DOUBLEQUOTE_",
-            # Currently not possible. Terminates parser and won't find backtick.
             "#": "_HASH_",
         }
     )
@@ -239,11 +237,11 @@ def _split_by_backtick(s: str) -> list[tuple[bool, str]]:
                     break
 
             # Quote is unmatched (Bad syntax), or
-            # Quote is matched, and the next quote is at the end of the string
+            # Quote is matched, and the next quote is at the end of s
             if (next_quote_index == -1) or (next_quote_index + 1 == len(s)):
                 substrings.append((False, substring + s[i:]))
                 break
-            # Quote is matched, and the next quote is in the middle of the string
+            # Quote is matched, and the next quote is in the middle of s
             else:
                 substring += s[i : next_quote_index + 1]
                 i = next_quote_index + 1

From 6a0ac728ce5ef7aa57de2a52e1a29a8420453749 Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Sun, 28 Jul 2024 04:08:32 +0200
Subject: [PATCH 16/32] minor test change

---
 pandas/tests/computation/test_eval.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
index d2547a02fba03..b969c7f270810 100644
--- a/pandas/tests/computation/test_eval.py
+++ b/pandas/tests/computation/test_eval.py
@@ -2074,7 +2074,7 @@ def test_query_on_expr_with_backtick_opened_before_quote_and_backtick_is_matched
 
 def test_query_on_expr_with_quote_opened_before_backtick_and_quote_is_unmatched():
     # GH 59285
-    df = DataFrame(("aaa", "vvv", "zzz"), columns=['It`s that\\\'s "quote" #hash'])
+    df = DataFrame(("aaa", "vvv", "zzz"), columns=["column-name"])
     with pytest.raises(SyntaxError, match="unterminated string literal"):
         df.query("`column-name` < 'It`s that\\'s \"quote\" #hash")
 

From f2126b3e7f141b9fd4748544717422abda347042 Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Sun, 28 Jul 2024 21:53:47 +0200
Subject: [PATCH 17/32] escape backticks

---
 pandas/core/computation/parsing.py    | 2 +-
 pandas/tests/computation/test_eval.py | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py
index 5030d1728d799..67758fc99ae5e 100644
--- a/pandas/core/computation/parsing.py
+++ b/pandas/core/computation/parsing.py
@@ -56,7 +56,6 @@ def create_valid_python_identifier(name: str) -> str:
             "$": "_DOLLARSIGN_",
             "€": "_EUROSIGN_",
             "°": "_DEGREESIGN_",
-            # Including quotes works, but there are exceptions.
             "'": "_SINGLEQUOTE_",
             '"': "_DOUBLEQUOTE_",
             "#": "_HASH_",
@@ -128,6 +127,7 @@ def clean_column_name(name: Hashable) -> Hashable:
         which is not caught and propagates to the user level.
     """
     try:
+        name = name.replace("`", "``")  # Escape backticks
         tokenized = tokenize_string(f"`{name}`")
         tokval = next(tokenized)[1]
         return create_valid_python_identifier(tokval)
diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
index b969c7f270810..7ddb62b1b17f0 100644
--- a/pandas/tests/computation/test_eval.py
+++ b/pandas/tests/computation/test_eval.py
@@ -1994,6 +1994,14 @@ def test_query_on_expr_with_comment():
     tm.assert_frame_equal(result, expected)
 
 
+def test_query_on_expr_with_column_name_with_backtick_and_hash():
+    # GH 59285
+    df = DataFrame((1, 2, 3), columns=["a`#b"])
+    result = df.query("`a``#b` < 2")
+    expected = df[df["a`#b"] < 2]
+    tm.assert_frame_equal(result, expected)
+
+
 def test_query_on_expr_with_string_with_backticks():
     # GH 59285
     df = DataFrame(("`", "`````", "``````````"), columns=["#backticks"])

From 168f56c36db6662954487def0a8ffe04ce10f8fd Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Mon, 29 Jul 2024 13:15:43 +0200
Subject: [PATCH 18/32] escape backticks properly

---
 pandas/core/computation/parsing.py    | 8 ++++++++
 pandas/tests/computation/test_eval.py | 9 +++++++++
 2 files changed, 17 insertions(+)

diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py
index 67758fc99ae5e..05db7d51b5b20 100644
--- a/pandas/core/computation/parsing.py
+++ b/pandas/core/computation/parsing.py
@@ -59,6 +59,7 @@ def create_valid_python_identifier(name: str) -> str:
             "'": "_SINGLEQUOTE_",
             '"': "_DOUBLEQUOTE_",
             "#": "_HASH_",
+            "`": "_BACKTICK_",
         }
     )
 
@@ -213,6 +214,13 @@ def _split_by_backtick(s: str) -> list[tuple[bool, str]]:
         # Backtick opened before quote
         if (quote_index == -1) or (backtick_index < quote_index):
             next_backtick_index = s.find("`", backtick_index + 1)
+            while (
+                (next_backtick_index != -1)
+                and (next_backtick_index != len(s) - 1)
+                and (s[next_backtick_index + 1] == "`")
+            ):
+                # Since the next character is also a backtick, it's an escaped backtick
+                next_backtick_index = s.find("`", next_backtick_index + 2)
 
             # Backtick is unmatched (Bad syntax)
             if next_backtick_index == -1:
diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
index 7ddb62b1b17f0..1c6a58d044410 100644
--- a/pandas/tests/computation/test_eval.py
+++ b/pandas/tests/computation/test_eval.py
@@ -2002,6 +2002,15 @@ def test_query_on_expr_with_column_name_with_backtick_and_hash():
     tm.assert_frame_equal(result, expected)
 
 
+def test_query_on_expr_with_column_name_with_backtick():
+    # GH 59285
+    df = DataFrame({"a`b": (1, 2, 3), "ab": (4, 5, 6)})
+    result = df.query("`a``b` < 2")
+    # Note: Formatting checks may wrongly consider the above``inline code``.
+    expected = df[df["a`b"] < 2]
+    tm.assert_frame_equal(result, expected)
+
+
 def test_query_on_expr_with_string_with_backticks():
     # GH 59285
     df = DataFrame(("`", "`````", "``````````"), columns=["#backticks"])

From 810c82b3c2dabf6f0ef6abfb04c28f5c46fa6ce3 Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Tue, 30 Jul 2024 13:11:08 +0200
Subject: [PATCH 19/32] comment

---
 pandas/tests/computation/test_eval.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
index 1c6a58d044410..1d5f5c3e76bcd 100644
--- a/pandas/tests/computation/test_eval.py
+++ b/pandas/tests/computation/test_eval.py
@@ -2006,7 +2006,7 @@ def test_query_on_expr_with_column_name_with_backtick():
     # GH 59285
     df = DataFrame({"a`b": (1, 2, 3), "ab": (4, 5, 6)})
     result = df.query("`a``b` < 2")
-    # Note: Formatting checks may wrongly consider the above``inline code``.
+    # Note: Formatting checks may wrongly consider the above ``inline code``.
     expected = df[df["a`b"] < 2]
     tm.assert_frame_equal(result, expected)
 

From 86947b2188b5cd9a1716713db810065816e8c022 Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Tue, 30 Jul 2024 14:25:24 +0200
Subject: [PATCH 20/32] fix tests

---
 pandas/tests/computation/test_eval.py | 134 -------------------------
 pandas/tests/frame/test_query_eval.py | 135 ++++++++++++++++++++++++--
 2 files changed, 127 insertions(+), 142 deletions(-)

diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
index 1d5f5c3e76bcd..31d568d7c1e0c 100644
--- a/pandas/tests/computation/test_eval.py
+++ b/pandas/tests/computation/test_eval.py
@@ -1978,140 +1978,6 @@ def test_eval_no_support_column_name(request, column):
     tm.assert_frame_equal(result, expected)
 
 
-def test_query_on_column_name_with_hashtag_character():
-    # GH 59285
-    df = DataFrame((1, 2, 3), columns=["a#"])
-    result = df.query("`a#` < 2")
-    expected = df[df["a#"] < 2]
-    tm.assert_frame_equal(result, expected)
-
-
-def test_query_on_expr_with_comment():
-    # GH 59285
-    df = DataFrame((1, 2, 3), columns=["a#"])
-    result = df.query("`a#` < 2  # This is a comment")
-    expected = df[df["a#"] < 2]
-    tm.assert_frame_equal(result, expected)
-
-
-def test_query_on_expr_with_column_name_with_backtick_and_hash():
-    # GH 59285
-    df = DataFrame((1, 2, 3), columns=["a`#b"])
-    result = df.query("`a``#b` < 2")
-    expected = df[df["a`#b"] < 2]
-    tm.assert_frame_equal(result, expected)
-
-
-def test_query_on_expr_with_column_name_with_backtick():
-    # GH 59285
-    df = DataFrame({"a`b": (1, 2, 3), "ab": (4, 5, 6)})
-    result = df.query("`a``b` < 2")
-    # Note: Formatting checks may wrongly consider the above ``inline code``.
-    expected = df[df["a`b"] < 2]
-    tm.assert_frame_equal(result, expected)
-
-
-def test_query_on_expr_with_string_with_backticks():
-    # GH 59285
-    df = DataFrame(("`", "`````", "``````````"), columns=["#backticks"])
-    result = df.query("'```' < `#backticks`")
-    expected = df["```" < df["#backticks"]]
-    tm.assert_frame_equal(result, expected)
-
-
-def test_query_on_expr_with_string_with_backticked_substring_same_as_column_name():
-    # GH 59285
-    df = DataFrame(("`", "`````", "``````````"), columns=["#backticks"])
-    result = df.query("'`#backticks`' < `#backticks`")
-    expected = df["`#backticks`" < df["#backticks"]]
-    tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize(
-    "col1,col2,expr",
-    [
-        ("it's", "that's", "`it's` < `that's`"),
-        ('it"s', 'that"s', '`it"s` < `that"s`'),
-        ("it's", 'that\'s "nice"', "`it's` < `that's \"nice\"`"),
-        ("it's", "that's #cool", "`it's` < `that's #cool` # This is a comment"),
-    ],
-)
-def test_query_on_column_names_with_special_characters(col1, col2, expr):
-    # GH 59285
-    df = DataFrame(
-        [
-            {col1: 1, col2: 2},
-            {col1: 3, col2: 4},
-            {col1: -1, col2: -2},
-            {col1: -3, col2: -4},
-        ]
-    )
-    result = df.query(expr)
-    expected = df[df[col1] < df[col2]]
-    tm.assert_frame_equal(result, expected)
-
-
-def test_query_on_expr_with_no_backticks():
-    # GH 59285
-    df = DataFrame(("aaa", "vvv", "zzz"), columns=["column_name"])
-    result = df.query("'value' < column_name")
-    expected = df["value" < df["column_name"]]
-    tm.assert_frame_equal(result, expected)
-
-
-def test_query_on_expr_with_no_quotes_and_backtick_is_unmatched():
-    # GH 59285
-    df = DataFrame((1, 5, 10), columns=["column-name"])
-    with pytest.raises(SyntaxError, match="invalid syntax"):
-        df.query("5 < `column-name")
-
-
-def test_query_on_expr_with_no_quotes_and_backtick_is_matched():
-    # GH 59285
-    df = DataFrame((1, 5, 10), columns=["column-name"])
-    result = df.query("5 < `column-name`")
-    expected = df[5 < df["column-name"]]
-    tm.assert_frame_equal(result, expected)
-
-
-def test_query_on_expr_with_backtick_opened_before_quote_and_backtick_is_unmatched():
-    # GH 59285
-    df = DataFrame((1, 5, 10), columns=["It's"])
-    with pytest.raises(SyntaxError, match="unterminated string literal"):
-        df.query("5 < `It's")
-
-
-def test_query_on_expr_with_backtick_opened_before_quote_and_backtick_is_matched():
-    # GH 59285
-    df = DataFrame((1, 5, 10), columns=["It's"])
-    result = df.query("5 < `It's`")
-    expected = df[5 < df["It's"]]
-    tm.assert_frame_equal(result, expected)
-
-
-def test_query_on_expr_with_quote_opened_before_backtick_and_quote_is_unmatched():
-    # GH 59285
-    df = DataFrame(("aaa", "vvv", "zzz"), columns=["column-name"])
-    with pytest.raises(SyntaxError, match="unterminated string literal"):
-        df.query("`column-name` < 'It`s that\\'s \"quote\" #hash")
-
-
-def test_query_on_expr_with_quote_opened_before_backtick_and_quote_is_matched_at_end():
-    # GH 59285
-    df = DataFrame(("aaa", "vvv", "zzz"), columns=["column-name"])
-    result = df.query("`column-name` < 'It`s that\\'s \"quote\" #hash'")
-    expected = df[df["column-name"] < 'It`s that\'s "quote" #hash']
-    tm.assert_frame_equal(result, expected)
-
-
-def test_query_on_expr_with_quote_opened_before_backtick_and_quote_is_matched_in_mid():
-    # GH 59285
-    df = DataFrame(("aaa", "vvv", "zzz"), columns=["column-name"])
-    result = df.query("'It`s that\\'s \"quote\" #hash' < `column-name`")
-    expected = df['It`s that\'s "quote" #hash' < df["column-name"]]
-    tm.assert_frame_equal(result, expected)
-
-
 def test_set_inplace():
     # https://github.com/pandas-dev/pandas/issues/47449
     # Ensure we don't only update the DataFrame inplace, but also the actual
diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py
index aa2fb19fe8528..550241201836d 100644
--- a/pandas/tests/frame/test_query_eval.py
+++ b/pandas/tests/frame/test_query_eval.py
@@ -1341,20 +1341,139 @@ def test_missing_attribute(self, df):
         with pytest.raises(AttributeError, match=message):
             df.eval("@pd.thing")
 
-    def test_failing_quote(self, df):
-        msg = r"(Could not convert ).*( to a valid Python identifier.)"
-        with pytest.raises(SyntaxError, match=msg):
-            df.query("`it's` > `that's`")
+    def test_quote(self, df):
+        res = df.query("`it's` > `that's`")
+        expect = df[df["it's"] > df["that's"]]
+        tm.assert_frame_equal(res, expect)
 
     def test_failing_character_outside_range(self, df):
         msg = r"(Could not convert ).*( to a valid Python identifier.)"
         with pytest.raises(SyntaxError, match=msg):
             df.query("`☺` > 4")
 
-    def test_failing_hashtag(self, df):
-        msg = "Failed to parse backticks"
-        with pytest.raises(SyntaxError, match=msg):
-            df.query("`foo#bar` > 4")
+    def test_hashtag(self, df):
+        res = df.query("`foo#bar` > 4")
+        expect = df[df["foo#bar"] > 4]
+        tm.assert_frame_equal(res, expect)
+
+    def test_expr_with_column_name_with_hashtag_character(self):
+        # GH 59285
+        df = DataFrame((1, 2, 3), columns=["a#"])
+        result = df.query("`a#` < 2")
+        expected = df[df["a#"] < 2]
+        tm.assert_frame_equal(result, expected)
+
+    def test_expr_with_comment(self):
+        # GH 59285
+        df = DataFrame((1, 2, 3), columns=["a#"])
+        result = df.query("`a#` < 2  # This is a comment")
+        expected = df[df["a#"] < 2]
+        tm.assert_frame_equal(result, expected)
+
+    def test_expr_with_column_name_with_backtick_and_hash(self):
+        # GH 59285
+        df = DataFrame((1, 2, 3), columns=["a`#b"])
+        result = df.query("`a``#b` < 2")
+        expected = df[df["a`#b"] < 2]
+        tm.assert_frame_equal(result, expected)
+
+    def test_expr_with_column_name_with_backtick(self):
+        # GH 59285
+        df = DataFrame({"a`b": (1, 2, 3), "ab": (4, 5, 6)})
+        result = df.query("`a``b` < 2")
+        # Note: Formatting checks may wrongly consider the above ``inline code``.
+        expected = df[df["a`b"] < 2]
+        tm.assert_frame_equal(result, expected)
+
+    def test_expr_with_string_with_backticks(self):
+        # GH 59285
+        df = DataFrame(("`", "`````", "``````````"), columns=["#backticks"])
+        result = df.query("'```' < `#backticks`")
+        expected = df["```" < df["#backticks"]]
+        tm.assert_frame_equal(result, expected)
+
+    def test_expr_with_string_with_backticked_substring_same_as_column_name(self):
+        # GH 59285
+        df = DataFrame(("`", "`````", "``````````"), columns=["#backticks"])
+        result = df.query("'`#backticks`' < `#backticks`")
+        expected = df["`#backticks`" < df["#backticks"]]
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "col1,col2,expr",
+        [
+            ("it's", "that's", "`it's` < `that's`"),
+            ('it"s', 'that"s', '`it"s` < `that"s`'),
+            ("it's", 'that\'s "nice"', "`it's` < `that's \"nice\"`"),
+            ("it's", "that's #cool", "`it's` < `that's #cool` # This is a comment"),
+        ],
+    )
+    def test_expr_with_column_names_with_special_characters(self, col1, col2, expr):
+        # GH 59285
+        df = DataFrame(
+            [
+                {col1: 1, col2: 2},
+                {col1: 3, col2: 4},
+                {col1: -1, col2: -2},
+                {col1: -3, col2: -4},
+            ]
+        )
+        result = df.query(expr)
+        expected = df[df[col1] < df[col2]]
+        tm.assert_frame_equal(result, expected)
+
+    def test_expr_with_no_backticks(self):
+        # GH 59285
+        df = DataFrame(("aaa", "vvv", "zzz"), columns=["column_name"])
+        result = df.query("'value' < column_name")
+        expected = df["value" < df["column_name"]]
+        tm.assert_frame_equal(result, expected)
+
+    def test_expr_with_no_quotes_and_backtick_is_unmatched(self):
+        # GH 59285
+        df = DataFrame((1, 5, 10), columns=["column-name"])
+        with pytest.raises(SyntaxError, match="invalid syntax"):
+            df.query("5 < `column-name")
+
+    def test_expr_with_no_quotes_and_backtick_is_matched(self):
+        # GH 59285
+        df = DataFrame((1, 5, 10), columns=["column-name"])
+        result = df.query("5 < `column-name`")
+        expected = df[5 < df["column-name"]]
+        tm.assert_frame_equal(result, expected)
+
+    def test_expr_with_backtick_opened_before_quote_and_backtick_is_unmatched(self):
+        # GH 59285
+        df = DataFrame((1, 5, 10), columns=["It's"])
+        with pytest.raises(SyntaxError, match="unterminated string literal"):
+            df.query("5 < `It's")
+
+    def test_expr_with_backtick_opened_before_quote_and_backtick_is_matched(self):
+        # GH 59285
+        df = DataFrame((1, 5, 10), columns=["It's"])
+        result = df.query("5 < `It's`")
+        expected = df[5 < df["It's"]]
+        tm.assert_frame_equal(result, expected)
+
+    def test_expr_with_quote_opened_before_backtick_and_quote_is_unmatched(self):
+        # GH 59285
+        df = DataFrame(("aaa", "vvv", "zzz"), columns=["column-name"])
+        with pytest.raises(SyntaxError, match="unterminated string literal"):
+            df.query("`column-name` < 'It`s that\\'s \"quote\" #hash")
+
+    def test_expr_with_quote_opened_before_backtick_and_quote_is_matched_at_end(self):
+        # GH 59285
+        df = DataFrame(("aaa", "vvv", "zzz"), columns=["column-name"])
+        result = df.query("`column-name` < 'It`s that\\'s \"quote\" #hash'")
+        expected = df[df["column-name"] < 'It`s that\'s "quote" #hash']
+        tm.assert_frame_equal(result, expected)
+
+    def test_expr_with_quote_opened_before_backtick_and_quote_is_matched_in_mid(self):
+        # GH 59285
+        df = DataFrame(("aaa", "vvv", "zzz"), columns=["column-name"])
+        result = df.query("'It`s that\\'s \"quote\" #hash' < `column-name`")
+        expected = df['It`s that\'s "quote" #hash' < df["column-name"]]
+        tm.assert_frame_equal(result, expected)
 
     def test_call_non_named_expression(self, df):
         """

From e99db1ce03953fd4078dd3b368b30c1a9a25f1ca Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Tue, 6 Aug 2024 18:10:29 +0200
Subject: [PATCH 21/32] GH 49633: special characters

---
 pandas/core/computation/parsing.py    | 10 ++++++++++
 pandas/core/frame.py                  |  8 ++------
 pandas/tests/frame/test_query_eval.py | 22 ++++++++++++++++++----
 3 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py
index 05db7d51b5b20..d793111f93aeb 100644
--- a/pandas/core/computation/parsing.py
+++ b/pandas/core/computation/parsing.py
@@ -41,6 +41,16 @@ def create_valid_python_identifier(name: str) -> str:
     if name.isidentifier() and not iskeyword(name):
         return name
 
+    # Escape characters that fall outside the ASCII range (U+0001..U+007F).
+    # GH 49633
+    c_escaped_gen = (
+        "".join(chr(b) for b in c.encode("ascii", "backslashreplace")) for c in name
+    )
+    name = "".join(
+        c_escaped.replace("\\", "_UNICODE_" if c != c_escaped else "_BACKSLASH_")
+        for c, c_escaped in zip(name, c_escaped_gen)
+    )
+
     # Create a dict with the special characters and their replacement string.
     # EXACT_TOKEN_TYPES contains these special characters
     # token.tok_name contains a readable description of the replacement string.
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index f2d87ee36490a..ef7ffcb652149 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4556,12 +4556,8 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | No
         quoted string are replaced by strings that are allowed as a Python identifier.
         These characters include all operators in Python, the space character, the
         question mark, the exclamation mark, the dollar sign, and the euro sign.
-        For other characters that fall outside the ASCII range (U+0001..U+007F)
-        and those that are not further specified in PEP 3131,
-        the query parser will raise an error.
-        This excludes whitespace different than the space character,
-        but also the hashtag (as it is used for comments) and the backtick
-        itself (backtick can also not be escaped).
+
+        A backtick can be escaped by double backticks.
 
         See also the `Python documentation about lexical analysis
         <https://docs.python.org/3/reference/lexical_analysis.html>`__
diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py
index 550241201836d..b8769cc7d4be8 100644
--- a/pandas/tests/frame/test_query_eval.py
+++ b/pandas/tests/frame/test_query_eval.py
@@ -1246,6 +1246,8 @@ def df(self):
                 "it's": [6, 3, 1],
                 "that's": [9, 1, 8],
                 "☺": [8, 7, 6],
+                "xy （z）": [1, 2, 3],
+                "xy （z\\uff09": [4, 5, 6],
                 "foo#bar": [2, 4, 5],
                 1: [5, 7, 9],
             }
@@ -1346,10 +1348,22 @@ def test_quote(self, df):
         expect = df[df["it's"] > df["that's"]]
         tm.assert_frame_equal(res, expect)
 
-    def test_failing_character_outside_range(self, df):
-        msg = r"(Could not convert ).*( to a valid Python identifier.)"
-        with pytest.raises(SyntaxError, match=msg):
-            df.query("`☺` > 4")
+    def test_character_outside_range_smiley(self, df):
+        res = df.query("`☺` > 4")
+        expect = df[df["☺"] > 4]
+        tm.assert_frame_equal(res, expect)
+
+    def test_character_outside_range_2_byte_parens(self, df):
+        # GH 49633
+        res = df.query("`xy （z）` == 2")
+        expect = df[df["xy （z）"] == 2]
+        tm.assert_frame_equal(res, expect)
+
+    def test_character_outside_range_and_actual_backslash(self, df):
+        # GH 49633
+        res = df.query("`xy （z\\uff09` == 2")
+        expect = df[df["xy \uff08z\\uff09"] == 2]
+        tm.assert_frame_equal(res, expect)
 
     def test_hashtag(self, df):
         res = df.query("`foo#bar` > 4")

From a005f135d16c9c4666886db37ff101139d4db85c Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Tue, 6 Aug 2024 18:14:34 +0200
Subject: [PATCH 22/32] add noqa

---
 pandas/tests/frame/test_query_eval.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py
index b8769cc7d4be8..b1b75ea0c16cb 100644
--- a/pandas/tests/frame/test_query_eval.py
+++ b/pandas/tests/frame/test_query_eval.py
@@ -1246,8 +1246,8 @@ def df(self):
                 "it's": [6, 3, 1],
                 "that's": [9, 1, 8],
                 "☺": [8, 7, 6],
-                "xy （z）": [1, 2, 3],
-                "xy （z\\uff09": [4, 5, 6],
+                "xy （z）": [1, 2, 3],  # noqa: RUF001
+                "xy （z\\uff09": [4, 5, 6],  # noqa: RUF001
                 "foo#bar": [2, 4, 5],
                 1: [5, 7, 9],
             }
@@ -1355,13 +1355,13 @@ def test_character_outside_range_smiley(self, df):
 
     def test_character_outside_range_2_byte_parens(self, df):
         # GH 49633
-        res = df.query("`xy （z）` == 2")
-        expect = df[df["xy （z）"] == 2]
+        res = df.query("`xy （z）` == 2")  # noqa: RUF001
+        expect = df[df["xy （z）"] == 2]  # noqa: RUF001
         tm.assert_frame_equal(res, expect)
 
     def test_character_outside_range_and_actual_backslash(self, df):
         # GH 49633
-        res = df.query("`xy （z\\uff09` == 2")
+        res = df.query("`xy （z\\uff09` == 2")  # noqa: RUF001
         expect = df[df["xy \uff08z\\uff09"] == 2]
         tm.assert_frame_equal(res, expect)
 
@@ -1394,7 +1394,7 @@ def test_expr_with_column_name_with_backtick_and_hash(self):
     def test_expr_with_column_name_with_backtick(self):
         # GH 59285
         df = DataFrame({"a`b": (1, 2, 3), "ab": (4, 5, 6)})
-        result = df.query("`a``b` < 2")
+        result = df.query("`a``b` < 2")  # noqa
         # Note: Formatting checks may wrongly consider the above ``inline code``.
         expected = df[df["a`b"] < 2]
         tm.assert_frame_equal(result, expected)

From a77a215a0f700d5a0c7fd30eb6e6ba9c94371f66 Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Tue, 6 Aug 2024 19:40:21 +0200
Subject: [PATCH 23/32] update docstring,

---
 pandas/core/computation/parsing.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py
index d793111f93aeb..5fcaec1bb8a93 100644
--- a/pandas/core/computation/parsing.py
+++ b/pandas/core/computation/parsing.py
@@ -35,20 +35,19 @@ def create_valid_python_identifier(name: str) -> str:
     ------
     SyntaxError
         If the returned name is not a Python valid identifier, raise an exception.
-        This can happen if the name includes characters that fall out of the range of
-        (U+0001..U+007F).
     """
     if name.isidentifier() and not iskeyword(name):
         return name
 
     # Escape characters that fall outside the ASCII range (U+0001..U+007F).
     # GH 49633
-    c_escaped_gen = (
-        "".join(chr(b) for b in c.encode("ascii", "backslashreplace")) for c in name
+    gen = (
+        (c, "".join(chr(b) for b in c.encode("ascii", "backslashreplace")))
+        for c in name
     )
     name = "".join(
         c_escaped.replace("\\", "_UNICODE_" if c != c_escaped else "_BACKSLASH_")
-        for c, c_escaped in zip(name, c_escaped_gen)
+        for c, c_escaped in gen
     )
 
     # Create a dict with the special characters and their replacement string.

From daf2c37d9de49ead30de14351aebf9d8eedde2e9 Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Tue, 6 Aug 2024 22:47:49 +0200
Subject: [PATCH 24/32] unmatched backtick or quote can raise SyntaxError OR
 TokenError

---
 pandas/tests/frame/test_query_eval.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py
index b1b75ea0c16cb..0a83c8113e814 100644
--- a/pandas/tests/frame/test_query_eval.py
+++ b/pandas/tests/frame/test_query_eval.py
@@ -1,4 +1,5 @@
 import operator
+from tokenize import TokenError
 
 import numpy as np
 import pytest
@@ -1446,7 +1447,7 @@ def test_expr_with_no_backticks(self):
     def test_expr_with_no_quotes_and_backtick_is_unmatched(self):
         # GH 59285
         df = DataFrame((1, 5, 10), columns=["column-name"])
-        with pytest.raises(SyntaxError, match="invalid syntax"):
+        with pytest.raises((SyntaxError, TokenError), match="invalid syntax"):
             df.query("5 < `column-name")
 
     def test_expr_with_no_quotes_and_backtick_is_matched(self):
@@ -1459,7 +1460,9 @@ def test_expr_with_no_quotes_and_backtick_is_matched(self):
     def test_expr_with_backtick_opened_before_quote_and_backtick_is_unmatched(self):
         # GH 59285
         df = DataFrame((1, 5, 10), columns=["It's"])
-        with pytest.raises(SyntaxError, match="unterminated string literal"):
+        with pytest.raises(
+            (SyntaxError, TokenError), match="unterminated string literal"
+        ):
             df.query("5 < `It's")
 
     def test_expr_with_backtick_opened_before_quote_and_backtick_is_matched(self):
@@ -1472,7 +1475,9 @@ def test_expr_with_backtick_opened_before_quote_and_backtick_is_matched(self):
     def test_expr_with_quote_opened_before_backtick_and_quote_is_unmatched(self):
         # GH 59285
         df = DataFrame(("aaa", "vvv", "zzz"), columns=["column-name"])
-        with pytest.raises(SyntaxError, match="unterminated string literal"):
+        with pytest.raises(
+            (SyntaxError, TokenError), match="unterminated string literal"
+        ):
             df.query("`column-name` < 'It`s that\\'s \"quote\" #hash")
 
     def test_expr_with_quote_opened_before_backtick_and_quote_is_matched_at_end(self):

From 984431bfaa2864cabab52aa501fe84e7b842167d Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Wed, 7 Aug 2024 06:37:23 +0200
Subject: [PATCH 25/32] change splitting

---
 pandas/core/computation/parsing.py | 136 +++++++++++++++--------------
 1 file changed, 70 insertions(+), 66 deletions(-)

diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py
index 5fcaec1bb8a93..7b2c9816c607a 100644
--- a/pandas/core/computation/parsing.py
+++ b/pandas/core/computation/parsing.py
@@ -4,10 +4,8 @@
 
 from __future__ import annotations
 
-from io import (
-    BytesIO,
-    StringIO,
-)
+from enum import Enum
+from io import StringIO
 from keyword import iskeyword
 import token
 import tokenize
@@ -179,6 +177,13 @@ def tokenize_backtick_quoted_string(
     return BACKTICK_QUOTED_STRING, source[string_start:string_end]
 
 
+class ParseState(Enum):
+    DEFAULT = 0
+    IN_BACKTICK = 1
+    IN_SINGLE_QUOTE = 2
+    IN_DOUBLE_QUOTE = 3
+
+
 def _split_by_backtick(s: str) -> list[tuple[bool, str]]:
     """
     Splits a str into substrings along backtick characters (`).
@@ -198,70 +203,69 @@ def _split_by_backtick(s: str) -> list[tuple[bool, str]]:
         The second is the actual substring.
     """
     substrings = []
-    substring = ""
+    substr = ""
     i = 0
+    parse_state = ParseState.DEFAULT
     while i < len(s):
-        backtick_index = s.find("`", i)
-
-        # No backticks
-        if backtick_index == -1:
-            substrings.append((False, substring + s[i:]))
-            break
-
-        single_quote_index = s.find("'", i)
-        double_quote_index = s.find('"', i)
-        if (single_quote_index == -1) and (double_quote_index == -1):
-            quote_index = -1
-        elif single_quote_index == -1:
-            quote_index = double_quote_index
-        elif double_quote_index == -1:
-            quote_index = single_quote_index
-        else:
-            quote_index = min(single_quote_index, double_quote_index)
-
-        # No quotes, or
-        # Backtick opened before quote
-        if (quote_index == -1) or (backtick_index < quote_index):
-            next_backtick_index = s.find("`", backtick_index + 1)
-            while (
-                (next_backtick_index != -1)
-                and (next_backtick_index != len(s) - 1)
-                and (s[next_backtick_index + 1] == "`")
-            ):
-                # Since the next character is also a backtick, it's an escaped backtick
-                next_backtick_index = s.find("`", next_backtick_index + 2)
-
-            # Backtick is unmatched (Bad syntax)
-            if next_backtick_index == -1:
-                substrings.append((False, substring + s[i:]))
-                break
-            # Backtick is matched
-            else:
-                if substring or (i != backtick_index):
-                    substrings.append((False, substring + s[i:backtick_index]))
-                substrings.append((True, s[backtick_index : next_backtick_index + 1]))
-                substring = ""
-                i = next_backtick_index + 1
-
-        # Quote opened before backtick
-        else:
-            next_quote_index = -1
-            line_reader = BytesIO(s[i:].encode("utf-8")).readline
-            token_generator = tokenize.tokenize(line_reader)
-            for toknum, _, (_, _), (_, end), _ in token_generator:
-                if toknum == tokenize.STRING:
-                    next_quote_index = i + end - 1
-                    break
-
-            # Quote is unmatched (Bad syntax), or
-            # Quote is matched, and the next quote is at the end of s
-            if (next_quote_index == -1) or (next_quote_index + 1 == len(s)):
-                substrings.append((False, substring + s[i:]))
-                break
-            # Quote is matched, and the next quote is in the middle of s
-            else:
-                substring += s[i : next_quote_index + 1]
-                i = next_quote_index + 1
+        char = s[i]
+
+        match char:
+            case "`":
+                # start of a backtick-quoted string
+                if parse_state == ParseState.DEFAULT:
+                    if substr:
+                        substrings.append((False, substr))
+                    substr = char
+                    i += 1
+                    parse_state = ParseState.IN_BACKTICK
+                    continue
+                elif parse_state == ParseState.IN_BACKTICK:
+                    # escaped backtick inside a backtick-quoted string
+                    next_char = s[i + 1] if (i != len(s) - 1) else None
+                    if next_char == "`":
+                        substr += char + next_char
+                        i += 2
+                        continue
+                    # end of the backtick-quoted string
+                    else:
+                        substr += char
+                        substrings.append((True, substr))
+
+                        substr = ""
+                        i += 1
+                        parse_state = ParseState.DEFAULT
+                        continue
+            case "'":
+                # start of a single-quoted string
+                if parse_state == ParseState.DEFAULT:
+                    substr += char
+                    i += 1
+                    parse_state = ParseState.IN_SINGLE_QUOTE
+                    continue
+                # end of a single-quoted string
+                elif (parse_state == ParseState.IN_SINGLE_QUOTE) and (s[i - 1] != "\\"):
+                    substr += char
+                    i += 1
+                    parse_state = ParseState.DEFAULT
+                    continue
+            case '"':
+                # start of a double-quoted string
+                if parse_state == ParseState.DEFAULT:
+                    substr += char
+                    i += 1
+                    parse_state = ParseState.IN_DOUBLE_QUOTE
+                    continue
+                # end of a double-quoted string
+                elif (parse_state == ParseState.IN_DOUBLE_QUOTE) and (s[i - 1] != "\\"):
+                    substr += char
+                    i += 1
+                    parse_state = ParseState.DEFAULT
+                    continue
+        substr += char
+        i += 1
+
+    if substr:
+        substrings.append((False, substr))
 
     return substrings
 

From b0833c0a51915b0189751838fd48c7efd8ad9840 Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Wed, 7 Aug 2024 06:40:43 +0200
Subject: [PATCH 26/32] remove repeated

---
 pandas/core/computation/parsing.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py
index 7b2c9816c607a..4353e046db9ec 100644
--- a/pandas/core/computation/parsing.py
+++ b/pandas/core/computation/parsing.py
@@ -238,29 +238,17 @@ def _split_by_backtick(s: str) -> list[tuple[bool, str]]:
             case "'":
                 # start of a single-quoted string
                 if parse_state == ParseState.DEFAULT:
-                    substr += char
-                    i += 1
                     parse_state = ParseState.IN_SINGLE_QUOTE
-                    continue
                 # end of a single-quoted string
                 elif (parse_state == ParseState.IN_SINGLE_QUOTE) and (s[i - 1] != "\\"):
-                    substr += char
-                    i += 1
                     parse_state = ParseState.DEFAULT
-                    continue
             case '"':
                 # start of a double-quoted string
                 if parse_state == ParseState.DEFAULT:
-                    substr += char
-                    i += 1
                     parse_state = ParseState.IN_DOUBLE_QUOTE
-                    continue
                 # end of a double-quoted string
                 elif (parse_state == ParseState.IN_DOUBLE_QUOTE) and (s[i - 1] != "\\"):
-                    substr += char
-                    i += 1
                     parse_state = ParseState.DEFAULT
-                    continue
         substr += char
         i += 1
 

From 5e0631d028c43697d5c367bc391ca54e38164298 Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Fri, 9 Aug 2024 03:13:36 +0200
Subject: [PATCH 27/32] collect chars in a list

---
 pandas/core/computation/parsing.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py
index 4353e046db9ec..9b58bb7bf9e7a 100644
--- a/pandas/core/computation/parsing.py
+++ b/pandas/core/computation/parsing.py
@@ -203,7 +203,7 @@ def _split_by_backtick(s: str) -> list[tuple[bool, str]]:
         The second is the actual substring.
     """
     substrings = []
-    substr = ""
+    substr = []  # collect in a list, join into a string before adding to substrings
     i = 0
     parse_state = ParseState.DEFAULT
     while i < len(s):
@@ -214,24 +214,28 @@ def _split_by_backtick(s: str) -> list[tuple[bool, str]]:
                 # start of a backtick-quoted string
                 if parse_state == ParseState.DEFAULT:
                     if substr:
-                        substrings.append((False, substr))
-                    substr = char
+                        substrings.append((False, "".join(substr)))
+
+                    substr = [char]
                     i += 1
                     parse_state = ParseState.IN_BACKTICK
                     continue
+
                 elif parse_state == ParseState.IN_BACKTICK:
                     # escaped backtick inside a backtick-quoted string
                     next_char = s[i + 1] if (i != len(s) - 1) else None
                     if next_char == "`":
-                        substr += char + next_char
+                        substr.append(char)
+                        substr.append(next_char)
                         i += 2
                         continue
+
                     # end of the backtick-quoted string
                     else:
-                        substr += char
-                        substrings.append((True, substr))
+                        substr.append(char)
+                        substrings.append((True, "".join(substr)))
 
-                        substr = ""
+                        substr = []
                         i += 1
                         parse_state = ParseState.DEFAULT
                         continue
@@ -249,11 +253,11 @@ def _split_by_backtick(s: str) -> list[tuple[bool, str]]:
                 # end of a double-quoted string
                 elif (parse_state == ParseState.IN_DOUBLE_QUOTE) and (s[i - 1] != "\\"):
                     parse_state = ParseState.DEFAULT
-        substr += char
+        substr.append(char)
         i += 1
 
     if substr:
-        substrings.append((False, substr))
+        substrings.append((False, "".join(substr)))
 
     return substrings
 

From d3669c7974ddb5c24e694d8fc4bfeb8e08aad357 Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Fri, 9 Aug 2024 03:31:07 +0200
Subject: [PATCH 28/32] add issue 49633 to whatsnew

---
 doc/source/whatsnew/v3.0.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index b5f7ebc26c2bb..452e4a9bfece8 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -647,7 +647,7 @@ Other
 - Bug in :meth:`DataFrame.apply` where passing ``engine="numba"`` ignored ``args`` passed to the applied function (:issue:`58712`)
 - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`)
 - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`)
-- Bug in :meth:`DataFrame.query` which raised a ``KeyError`` when the expression contained column names with characters like ``#``. (:issue:`59285`)
+- Bug in :meth:`DataFrame.query` which raised an exception or produced incorrect results when expressions contained backtick-quoted column names containing the hash character ``#``, backticks, or characters that fall outside the ASCII range (U+0001..U+007F). (:issue:`59285`) (:issue:`49633`)
 - Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`)
 - Bug in :meth:`DataFrame.transform` that was returning the wrong order unless the index was monotonically increasing. (:issue:`57069`)
 - Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`)

From 87ded7c3d3130b6c432044b32ebaaf51f3515605 Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Sat, 10 Aug 2024 15:17:56 +0200
Subject: [PATCH 29/32] atone for my typing sins :)

---
 pandas/core/computation/parsing.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py
index 9b58bb7bf9e7a..b35dda87bbfb4 100644
--- a/pandas/core/computation/parsing.py
+++ b/pandas/core/computation/parsing.py
@@ -135,7 +135,9 @@ def clean_column_name(name: Hashable) -> Hashable:
         which is not caught and propagates to the user level.
     """
     try:
-        name = name.replace("`", "``")  # Escape backticks
+        # Escape backticks
+        name = name.replace("`", "``") if isinstance(name, str) else name
+
         tokenized = tokenize_string(f"`{name}`")
         tokval = next(tokenized)[1]
         return create_valid_python_identifier(tokval)
@@ -203,7 +205,7 @@ def _split_by_backtick(s: str) -> list[tuple[bool, str]]:
         The second is the actual substring.
     """
     substrings = []
-    substr = []  # collect in a list, join into a string before adding to substrings
+    substr: list[str] = []  # join into a string before adding to `substrings`
     i = 0
     parse_state = ParseState.DEFAULT
     while i < len(s):

From ad18c87c6b5ee9243cf4f2a674dc1850dba42233 Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Sat, 10 Aug 2024 15:23:22 +0200
Subject: [PATCH 30/32] exclude test_query_eval.py for
 rst-inline-touching-normal in .pre-commit-config.yaml

---
 .pre-commit-config.yaml            | 1 +
 pandas/core/computation/parsing.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b81b9ba070a44..882be47c47ee7 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -85,6 +85,7 @@ repos:
         types: [text]  # overwrite types: [rst]
         types_or: [python, rst]
       - id: rst-inline-touching-normal
+        exclude: ^pandas/tests/frame/test_query_eval.py
         types: [text]  # overwrite types: [rst]
         types_or: [python, rst]
 -   repo: https://github.com/sphinx-contrib/sphinx-lint
diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py
index b35dda87bbfb4..35a6d1c6ad269 100644
--- a/pandas/core/computation/parsing.py
+++ b/pandas/core/computation/parsing.py
@@ -205,7 +205,7 @@ def _split_by_backtick(s: str) -> list[tuple[bool, str]]:
         The second is the actual substring.
     """
     substrings = []
-    substr: list[str] = []  # join into a string before adding to `substrings`
+    substr: list[str] = []  # Will join into a string before adding to `substrings`
     i = 0
     parse_state = ParseState.DEFAULT
     while i < len(s):

From 173f3996079c8226c6779e2560fd66f0e5463f24 Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Mon, 12 Aug 2024 15:36:45 +0200
Subject: [PATCH 31/32] tests: add decorators for Future Infer Strings job

---
 pandas/tests/frame/test_query_eval.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py
index 0a83c8113e814..fa71153d01157 100644
--- a/pandas/tests/frame/test_query_eval.py
+++ b/pandas/tests/frame/test_query_eval.py
@@ -1400,6 +1400,7 @@ def test_expr_with_column_name_with_backtick(self):
         expected = df[df["a`b"] < 2]
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_expr_with_string_with_backticks(self):
         # GH 59285
         df = DataFrame(("`", "`````", "``````````"), columns=["#backticks"])
@@ -1407,6 +1408,7 @@ def test_expr_with_string_with_backticks(self):
         expected = df["```" < df["#backticks"]]
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_expr_with_string_with_backticked_substring_same_as_column_name(self):
         # GH 59285
         df = DataFrame(("`", "`````", "``````````"), columns=["#backticks"])
@@ -1437,6 +1439,7 @@ def test_expr_with_column_names_with_special_characters(self, col1, col2, expr):
         expected = df[df[col1] < df[col2]]
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_expr_with_no_backticks(self):
         # GH 59285
         df = DataFrame(("aaa", "vvv", "zzz"), columns=["column_name"])
@@ -1480,6 +1483,7 @@ def test_expr_with_quote_opened_before_backtick_and_quote_is_unmatched(self):
         ):
             df.query("`column-name` < 'It`s that\\'s \"quote\" #hash")
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_expr_with_quote_opened_before_backtick_and_quote_is_matched_at_end(self):
         # GH 59285
         df = DataFrame(("aaa", "vvv", "zzz"), columns=["column-name"])
@@ -1487,6 +1491,7 @@ def test_expr_with_quote_opened_before_backtick_and_quote_is_matched_at_end(self
         expected = df[df["column-name"] < 'It`s that\'s "quote" #hash']
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_expr_with_quote_opened_before_backtick_and_quote_is_matched_in_mid(self):
         # GH 59285
         df = DataFrame(("aaa", "vvv", "zzz"), columns=["column-name"])

From 9ee2231edc25893293645a64a4e25e47715ab201 Mon Sep 17 00:00:00 2001
From: aram-cinnamon <aram.cinnamon.cinnabar@gmail.com>
Date: Tue, 13 Aug 2024 02:41:52 +0200
Subject: [PATCH 32/32] pre-commit exclude

---
 .pre-commit-config.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 882be47c47ee7..f6717dd503c9b 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -23,6 +23,7 @@ repos:
     hooks:
     -   id: ruff
         args: [--exit-non-zero-on-fix]
+        exclude: ^pandas/tests/frame/test_query_eval.py
     -   id: ruff
         # TODO: remove autofixe-only rules when they are checked by ruff
         name: ruff-selected-autofixes
@@ -31,7 +32,7 @@ repos:
         exclude: ^pandas/tests
         args: [--select, "ANN001,ANN2", --fix-only, --exit-non-zero-on-fix]
     -   id: ruff-format
-        exclude: ^scripts
+        exclude: ^scripts|^pandas/tests/frame/test_query_eval.py
 -   repo: https://github.com/jendrikseipp/vulture
     rev: 'v2.11'
     hooks: