4
4
5
5
from __future__ import annotations
6
6
7
- from io import (
8
- BytesIO ,
9
- StringIO ,
10
- )
7
+ from enum import Enum
8
+ from io import StringIO
11
9
from keyword import iskeyword
12
10
import token
13
11
import tokenize
@@ -179,6 +177,13 @@ def tokenize_backtick_quoted_string(
179
177
return BACKTICK_QUOTED_STRING , source [string_start :string_end ]
180
178
181
179
180
+ class ParseState (Enum ):
181
+ DEFAULT = 0
182
+ IN_BACKTICK = 1
183
+ IN_SINGLE_QUOTE = 2
184
+ IN_DOUBLE_QUOTE = 3
185
+
186
+
182
187
def _split_by_backtick (s : str ) -> list [tuple [bool , str ]]:
183
188
"""
184
189
Splits a str into substrings along backtick characters (`).
@@ -198,70 +203,69 @@ def _split_by_backtick(s: str) -> list[tuple[bool, str]]:
198
203
The second is the actual substring.
199
204
"""
200
205
substrings = []
201
- substring = ""
206
+ substr = ""
202
207
i = 0
208
+ parse_state = ParseState .DEFAULT
203
209
while i < len (s ):
204
- backtick_index = s .find ("`" , i )
205
-
206
- # No backticks
207
- if backtick_index == - 1 :
208
- substrings .append ((False , substring + s [i :]))
209
- break
210
-
211
- single_quote_index = s .find ("'" , i )
212
- double_quote_index = s .find ('"' , i )
213
- if (single_quote_index == - 1 ) and (double_quote_index == - 1 ):
214
- quote_index = - 1
215
- elif single_quote_index == - 1 :
216
- quote_index = double_quote_index
217
- elif double_quote_index == - 1 :
218
- quote_index = single_quote_index
219
- else :
220
- quote_index = min (single_quote_index , double_quote_index )
221
-
222
- # No quotes, or
223
- # Backtick opened before quote
224
- if (quote_index == - 1 ) or (backtick_index < quote_index ):
225
- next_backtick_index = s .find ("`" , backtick_index + 1 )
226
- while (
227
- (next_backtick_index != - 1 )
228
- and (next_backtick_index != len (s ) - 1 )
229
- and (s [next_backtick_index + 1 ] == "`" )
230
- ):
231
- # Since the next character is also a backtick, it's an escaped backtick
232
- next_backtick_index = s .find ("`" , next_backtick_index + 2 )
233
-
234
- # Backtick is unmatched (Bad syntax)
235
- if next_backtick_index == - 1 :
236
- substrings .append ((False , substring + s [i :]))
237
- break
238
- # Backtick is matched
239
- else :
240
- if substring or (i != backtick_index ):
241
- substrings .append ((False , substring + s [i :backtick_index ]))
242
- substrings .append ((True , s [backtick_index : next_backtick_index + 1 ]))
243
- substring = ""
244
- i = next_backtick_index + 1
245
-
246
- # Quote opened before backtick
247
- else :
248
- next_quote_index = - 1
249
- line_reader = BytesIO (s [i :].encode ("utf-8" )).readline
250
- token_generator = tokenize .tokenize (line_reader )
251
- for toknum , _ , (_ , _ ), (_ , end ), _ in token_generator :
252
- if toknum == tokenize .STRING :
253
- next_quote_index = i + end - 1
254
- break
255
-
256
- # Quote is unmatched (Bad syntax), or
257
- # Quote is matched, and the next quote is at the end of s
258
- if (next_quote_index == - 1 ) or (next_quote_index + 1 == len (s )):
259
- substrings .append ((False , substring + s [i :]))
260
- break
261
- # Quote is matched, and the next quote is in the middle of s
262
- else :
263
- substring += s [i : next_quote_index + 1 ]
264
- i = next_quote_index + 1
210
+ char = s [i ]
211
+
212
+ match char :
213
+ case "`" :
214
+ # start of a backtick-quoted string
215
+ if parse_state == ParseState .DEFAULT :
216
+ if substr :
217
+ substrings .append ((False , substr ))
218
+ substr = char
219
+ i += 1
220
+ parse_state = ParseState .IN_BACKTICK
221
+ continue
222
+ elif parse_state == ParseState .IN_BACKTICK :
223
+ # escaped backtick inside a backtick-quoted string
224
+ next_char = s [i + 1 ] if (i != len (s ) - 1 ) else None
225
+ if next_char == "`" :
226
+ substr += char + next_char
227
+ i += 2
228
+ continue
229
+ # end of the backtick-quoted string
230
+ else :
231
+ substr += char
232
+ substrings .append ((True , substr ))
233
+
234
+ substr = ""
235
+ i += 1
236
+ parse_state = ParseState .DEFAULT
237
+ continue
238
+ case "'" :
239
+ # start of a single-quoted string
240
+ if parse_state == ParseState .DEFAULT :
241
+ substr += char
242
+ i += 1
243
+ parse_state = ParseState .IN_SINGLE_QUOTE
244
+ continue
245
+ # end of a single-quoted string
246
+ elif (parse_state == ParseState .IN_SINGLE_QUOTE ) and (s [i - 1 ] != "\\ " ):
247
+ substr += char
248
+ i += 1
249
+ parse_state = ParseState .DEFAULT
250
+ continue
251
+ case '"' :
252
+ # start of a double-quoted string
253
+ if parse_state == ParseState .DEFAULT :
254
+ substr += char
255
+ i += 1
256
+ parse_state = ParseState .IN_DOUBLE_QUOTE
257
+ continue
258
+ # end of a double-quoted string
259
+ elif (parse_state == ParseState .IN_DOUBLE_QUOTE ) and (s [i - 1 ] != "\\ " ):
260
+ substr += char
261
+ i += 1
262
+ parse_state = ParseState .DEFAULT
263
+ continue
264
+ substr += char
265
+ i += 1
266
+
267
+ if substr :
268
+ substrings .append ((False , substr ))
265
269
266
270
return substrings
267
271
0 commit comments