@@ -163,9 +163,14 @@ def __str__(self) -> str:
163
163
return "\n " .join (lines )
164
164
165
165
166
+ @dataclass
167
+ class State :
168
+ has_comments : bool = False
169
+
170
+
166
171
def to_items (
167
172
text : str , line_offset : int = 0 , column_offset : int = 0
168
- ) -> Iterable [ tuple [str , str ]]:
173
+ ) -> tuple [ list [ tuple [str , str ]], State ]:
169
174
"""Parse a directive option block into (key, value) tuples.
170
175
171
176
:param text: The directive option text.
@@ -174,12 +179,17 @@ def to_items(
174
179
175
180
:raises: `TokenizeError`
176
181
"""
177
- for key_token , value_token in to_tokens (text , line_offset , column_offset ):
178
- yield key_token .value , value_token .value if value_token is not None else ""
182
+ output = []
183
+ state = State ()
184
+ for key_token , value_token in _to_tokens (text , state , line_offset , column_offset ):
185
+ output .append (
186
+ (key_token .value , value_token .value if value_token is not None else "" )
187
+ )
188
+ return output , state
179
189
180
190
181
- def to_tokens (
182
- text : str , line_offset : int = 0 , column_offset : int = 0
191
+ def _to_tokens (
192
+ text : str , state : State , line_offset : int = 0 , column_offset : int = 0
183
193
) -> Iterable [tuple [KeyToken , ValueToken | None ]]:
184
194
"""Parse a directive option, and yield key/value token pairs.
185
195
@@ -191,7 +201,7 @@ def to_tokens(
191
201
"""
192
202
key_token : KeyToken | None = None
193
203
try :
194
- for token in tokenize (text ):
204
+ for token in _tokenize (text , state ):
195
205
if isinstance (token , KeyToken ):
196
206
if key_token is not None :
197
207
yield key_token , None
@@ -207,12 +217,12 @@ def to_tokens(
207
217
raise
208
218
209
219
210
- def tokenize (text : str ) -> Iterable [Token ]:
220
+ def _tokenize (text : str , state : State ) -> Iterable [Token ]:
211
221
"""Yield tokens from a directive option stream."""
212
222
stream = StreamBuffer (text )
213
223
214
224
while True :
215
- _scan_to_next_token (stream )
225
+ _scan_to_next_token (stream , state )
216
226
217
227
if stream .peek () == _CHARS_END :
218
228
break
@@ -227,9 +237,9 @@ def tokenize(text: str) -> Iterable[Token]:
227
237
if ch in ("'" , '"' ):
228
238
yield _scan_flow_scalar (stream , cast (Literal ['"' , "'" ], ch ), is_key = True )
229
239
else :
230
- yield _scan_plain_scalar (stream , is_key = True )
240
+ yield _scan_plain_scalar (stream , state , is_key = True )
231
241
232
- _scan_to_next_token (stream )
242
+ _scan_to_next_token (stream , state )
233
243
234
244
# check next char is colon + space
235
245
if stream .peek () != ":" :
@@ -240,21 +250,21 @@ def tokenize(text: str) -> Iterable[Token]:
240
250
end_mark = stream .get_position ()
241
251
yield ColonToken (start_mark , end_mark )
242
252
243
- _scan_to_next_token (stream )
253
+ _scan_to_next_token (stream , state )
244
254
245
255
# now find value
246
256
ch = stream .peek ()
247
257
if stream .column == 0 :
248
258
pass
249
259
elif ch in ("|" , ">" ):
250
- yield _scan_block_scalar (stream , cast (Literal ["|" , ">" ], ch ))
260
+ yield _scan_block_scalar (stream , cast (Literal ["|" , ">" ], ch ), state )
251
261
elif ch in ("'" , '"' ):
252
262
yield _scan_flow_scalar (stream , cast (Literal ['"' , "'" ], ch ), is_key = False )
253
263
else :
254
- yield _scan_plain_scalar (stream , is_key = False )
264
+ yield _scan_plain_scalar (stream , state , is_key = False )
255
265
256
266
257
- def _scan_to_next_token (stream : StreamBuffer ) -> None :
267
+ def _scan_to_next_token (stream : StreamBuffer , state : State ) -> None :
258
268
"""Skip spaces, line breaks and comments.
259
269
260
270
The byte order mark is also stripped,
@@ -267,14 +277,15 @@ def _scan_to_next_token(stream: StreamBuffer) -> None:
267
277
while stream .peek () == " " :
268
278
stream .forward ()
269
279
if stream .peek () == "#" :
280
+ state .has_comments = True
270
281
while stream .peek () not in _CHARS_END_NEWLINE :
271
282
stream .forward ()
272
283
if not _scan_line_break (stream ):
273
284
found = True
274
285
275
286
276
287
def _scan_plain_scalar (
277
- stream : StreamBuffer , is_key : bool = False
288
+ stream : StreamBuffer , state : State , is_key : bool = False
278
289
) -> KeyToken | ValueToken :
279
290
chunks = []
280
291
start_mark = stream .get_position ()
@@ -284,6 +295,7 @@ def _scan_plain_scalar(
284
295
while True :
285
296
length = 0
286
297
if stream .peek () == "#" :
298
+ state .has_comments = True
287
299
break
288
300
while True :
289
301
ch = stream .peek (length )
@@ -302,6 +314,8 @@ def _scan_plain_scalar(
302
314
end_mark = stream .get_position ()
303
315
spaces = _scan_plain_spaces (stream , allow_newline = (not is_key ))
304
316
if not spaces or stream .peek () == "#" or (stream .column < indent ):
317
+ if stream .peek () == "#" :
318
+ state .has_comments = True
305
319
break
306
320
307
321
return (
@@ -472,7 +486,9 @@ def _scan_flow_scalar_breaks(stream: StreamBuffer) -> list[str]:
472
486
return chunks
473
487
474
488
475
- def _scan_block_scalar (stream : StreamBuffer , style : Literal ["|" , ">" ]) -> ValueToken :
489
+ def _scan_block_scalar (
490
+ stream : StreamBuffer , style : Literal ["|" , ">" ], state : State
491
+ ) -> ValueToken :
476
492
indent = 0
477
493
folded = style == ">"
478
494
chunks = []
@@ -481,7 +497,7 @@ def _scan_block_scalar(stream: StreamBuffer, style: Literal["|", ">"]) -> ValueT
481
497
# Scan the header.
482
498
stream .forward ()
483
499
chomping , increment = _scan_block_scalar_indicators (stream , start_mark )
484
- _scan_block_scalar_ignored_line (stream , start_mark )
500
+ _scan_block_scalar_ignored_line (stream , start_mark , state )
485
501
486
502
# Determine the indentation level and go to the first non-empty line.
487
503
min_indent = indent + 1
@@ -575,10 +591,13 @@ def _scan_block_scalar_indicators(
575
591
return chomping , increment
576
592
577
593
578
- def _scan_block_scalar_ignored_line (stream : StreamBuffer , start_mark : Position ) -> None :
594
+ def _scan_block_scalar_ignored_line (
595
+ stream : StreamBuffer , start_mark : Position , state : State
596
+ ) -> None :
579
597
while stream .peek () == " " :
580
598
stream .forward ()
581
599
if stream .peek () == "#" :
600
+ state .has_comments = True
582
601
while stream .peek () not in _CHARS_END_NEWLINE :
583
602
stream .forward ()
584
603
ch = stream .peek ()
0 commit comments