Skip to content

Commit 0f58dd9

Browse files
GH 49633: special characters
1 parent b40c3c8 commit 0f58dd9

File tree

3 files changed

+30
-10
lines changed

3 files changed

+30
-10
lines changed

pandas/core/computation/parsing.py

+10
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,16 @@ def create_valid_python_identifier(name: str) -> str:
4141
if name.isidentifier() and not iskeyword(name):
4242
return name
4343

44+
# Escape characters that fall outside the ASCII range (U+0001..U+007F).
45+
# GH 49633
46+
c_escaped_gen = (
47+
"".join(chr(b) for b in c.encode("ascii", "backslashreplace")) for c in name
48+
)
49+
name = "".join(
50+
c_escaped.replace("\\", "_UNICODE_" if c != c_escaped else "_BACKSLASH_")
51+
for c, c_escaped in zip(name, c_escaped_gen)
52+
)
53+
4454
# Create a dict with the special characters and their replacement string.
4555
# EXACT_TOKEN_TYPES contains these special characters
4656
# token.tok_name contains a readable description of the replacement string.

pandas/core/frame.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -4556,12 +4556,8 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | No
45564556
quoted string are replaced by strings that are allowed as a Python identifier.
45574557
These characters include all operators in Python, the space character, the
45584558
question mark, the exclamation mark, the dollar sign, and the euro sign.
4559-
For other characters that fall outside the ASCII range (U+0001..U+007F)
4560-
and those that are not further specified in PEP 3131,
4561-
the query parser will raise an error.
4562-
This excludes whitespace different than the space character,
4563-
but also the hashtag (as it is used for comments) and the backtick
4564-
itself (backtick can also not be escaped).
4559+
4560+
A backtick can be escaped by double backticks.
45654561
45664562
See also the `Python documentation about lexical analysis
45674563
<https://docs.python.org/3/reference/lexical_analysis.html>`__

pandas/tests/frame/test_query_eval.py

+18-4
Original file line numberDiff line numberDiff line change
@@ -1246,6 +1246,8 @@ def df(self):
12461246
"it's": [6, 3, 1],
12471247
"that's": [9, 1, 8],
12481248
"☺": [8, 7, 6],
1249+
"xy (z)": [1, 2, 3],
1250+
"xy (z\\uff09": [4, 5, 6],
12491251
"foo#bar": [2, 4, 5],
12501252
1: [5, 7, 9],
12511253
}
@@ -1346,10 +1348,22 @@ def test_quote(self, df):
13461348
expect = df[df["it's"] > df["that's"]]
13471349
tm.assert_frame_equal(res, expect)
13481350

1349-
def test_failing_character_outside_range(self, df):
1350-
msg = r"(Could not convert ).*( to a valid Python identifier.)"
1351-
with pytest.raises(SyntaxError, match=msg):
1352-
df.query("`☺` > 4")
1351+
def test_character_outside_range_smiley(self, df):
1352+
res = df.query("`☺` > 4")
1353+
expect = df[df["☺"] > 4]
1354+
tm.assert_frame_equal(res, expect)
1355+
1356+
def test_character_outside_range_2_byte_parens(self, df):
1357+
# GH 49633
1358+
res = df.query("`xy (z)` == 2")
1359+
expect = df[df["xy (z)"] == 2]
1360+
tm.assert_frame_equal(res, expect)
1361+
1362+
def test_character_outside_range_and_actual_backslash(self, df):
1363+
# GH 49633
1364+
res = df.query("`xy (z\\uff09` == 2")
1365+
expect = df[df["xy \uff08z\\uff09"] == 2]
1366+
tm.assert_frame_equal(res, expect)
13531367

13541368
def test_hashtag(self, df):
13551369
res = df.query("`foo#bar` > 4")

0 commit comments

Comments
 (0)