Skip to content

Commit e99ff8f

Browse files
change splitting
1 parent 16bc7de commit e99ff8f

File tree

1 file changed

+70
-66
lines changed

1 file changed

+70
-66
lines changed

pandas/core/computation/parsing.py

+70-66
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,8 @@
44

55
from __future__ import annotations
66

7-
from io import (
8-
BytesIO,
9-
StringIO,
10-
)
7+
from enum import Enum
8+
from io import StringIO
119
from keyword import iskeyword
1210
import token
1311
import tokenize
@@ -179,6 +177,13 @@ def tokenize_backtick_quoted_string(
179177
return BACKTICK_QUOTED_STRING, source[string_start:string_end]
180178

181179

180+
class ParseState(Enum):
181+
DEFAULT = 0
182+
IN_BACKTICK = 1
183+
IN_SINGLE_QUOTE = 2
184+
IN_DOUBLE_QUOTE = 3
185+
186+
182187
def _split_by_backtick(s: str) -> list[tuple[bool, str]]:
183188
"""
184189
Splits a str into substrings along backtick characters (`).
@@ -198,70 +203,69 @@ def _split_by_backtick(s: str) -> list[tuple[bool, str]]:
198203
The second is the actual substring.
199204
"""
200205
substrings = []
201-
substring = ""
206+
substr = ""
202207
i = 0
208+
parse_state = ParseState.DEFAULT
203209
while i < len(s):
204-
backtick_index = s.find("`", i)
205-
206-
# No backticks
207-
if backtick_index == -1:
208-
substrings.append((False, substring + s[i:]))
209-
break
210-
211-
single_quote_index = s.find("'", i)
212-
double_quote_index = s.find('"', i)
213-
if (single_quote_index == -1) and (double_quote_index == -1):
214-
quote_index = -1
215-
elif single_quote_index == -1:
216-
quote_index = double_quote_index
217-
elif double_quote_index == -1:
218-
quote_index = single_quote_index
219-
else:
220-
quote_index = min(single_quote_index, double_quote_index)
221-
222-
# No quotes, or
223-
# Backtick opened before quote
224-
if (quote_index == -1) or (backtick_index < quote_index):
225-
next_backtick_index = s.find("`", backtick_index + 1)
226-
while (
227-
(next_backtick_index != -1)
228-
and (next_backtick_index != len(s) - 1)
229-
and (s[next_backtick_index + 1] == "`")
230-
):
231-
# Since the next character is also a backtick, it's an escaped backtick
232-
next_backtick_index = s.find("`", next_backtick_index + 2)
233-
234-
# Backtick is unmatched (Bad syntax)
235-
if next_backtick_index == -1:
236-
substrings.append((False, substring + s[i:]))
237-
break
238-
# Backtick is matched
239-
else:
240-
if substring or (i != backtick_index):
241-
substrings.append((False, substring + s[i:backtick_index]))
242-
substrings.append((True, s[backtick_index : next_backtick_index + 1]))
243-
substring = ""
244-
i = next_backtick_index + 1
245-
246-
# Quote opened before backtick
247-
else:
248-
next_quote_index = -1
249-
line_reader = BytesIO(s[i:].encode("utf-8")).readline
250-
token_generator = tokenize.tokenize(line_reader)
251-
for toknum, _, (_, _), (_, end), _ in token_generator:
252-
if toknum == tokenize.STRING:
253-
next_quote_index = i + end - 1
254-
break
255-
256-
# Quote is unmatched (Bad syntax), or
257-
# Quote is matched, and the next quote is at the end of s
258-
if (next_quote_index == -1) or (next_quote_index + 1 == len(s)):
259-
substrings.append((False, substring + s[i:]))
260-
break
261-
# Quote is matched, and the next quote is in the middle of s
262-
else:
263-
substring += s[i : next_quote_index + 1]
264-
i = next_quote_index + 1
210+
char = s[i]
211+
212+
match char:
213+
case "`":
214+
# start of a backtick-quoted string
215+
if parse_state == ParseState.DEFAULT:
216+
if substr:
217+
substrings.append((False, substr))
218+
substr = char
219+
i += 1
220+
parse_state = ParseState.IN_BACKTICK
221+
continue
222+
elif parse_state == ParseState.IN_BACKTICK:
223+
# escaped backtick inside a backtick-quoted string
224+
next_char = s[i + 1] if (i != len(s) - 1) else None
225+
if next_char == "`":
226+
substr += char + next_char
227+
i += 2
228+
continue
229+
# end of the backtick-quoted string
230+
else:
231+
substr += char
232+
substrings.append((True, substr))
233+
234+
substr = ""
235+
i += 1
236+
parse_state = ParseState.DEFAULT
237+
continue
238+
case "'":
239+
# start of a single-quoted string
240+
if parse_state == ParseState.DEFAULT:
241+
substr += char
242+
i += 1
243+
parse_state = ParseState.IN_SINGLE_QUOTE
244+
continue
245+
# end of a single-quoted string
246+
elif (parse_state == ParseState.IN_SINGLE_QUOTE) and (s[i - 1] != "\\"):
247+
substr += char
248+
i += 1
249+
parse_state = ParseState.DEFAULT
250+
continue
251+
case '"':
252+
# start of a double-quoted string
253+
if parse_state == ParseState.DEFAULT:
254+
substr += char
255+
i += 1
256+
parse_state = ParseState.IN_DOUBLE_QUOTE
257+
continue
258+
# end of a double-quoted string
259+
elif (parse_state == ParseState.IN_DOUBLE_QUOTE) and (s[i - 1] != "\\"):
260+
substr += char
261+
i += 1
262+
parse_state = ParseState.DEFAULT
263+
continue
264+
substr += char
265+
i += 1
266+
267+
if substr:
268+
substrings.append((False, substr))
265269

266270
return substrings
267271

0 commit comments

Comments
 (0)