Skip to content

Commit 5ad2d6d

Browse files
authored
🔧 More improvements for directive option parsing (#919)
1 parent 8614eca commit 5ad2d6d

File tree

7 files changed

+184
-119
lines changed

7 files changed

+184
-119
lines changed

docs/syntax/roles-and-directives.md

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,18 +73,16 @@ print(f'my {a}nd line')
7373
```
7474
:::
7575

76-
Comments, starting `#`, are also allowed in between options or at the end of values, and are ignored.
7776
The values can be enclosed in quotes (`"` or `'`) and span multiple lines.
7877
Newline behaviour can be controlled by starting the value with `|` (preserve newlines) or `>` (collapse newlines):
7978

8079
:::{myst-example}
8180
```{code-block} python
82-
:lineno-start: 10 # this is a comment
83-
: # this is also a comment
81+
:lineno-start: 10
8482
:emphasize-lines: "1, 3"
8583
:caption: |
86-
: This is my
87-
: multi-line caption. It is *pretty nifty* ;-)
84+
: This is my
85+
: multi-line caption. It is *pretty nifty* ;-)
8886
8987
a = 2
9088
print('my 1st line')

myst_parser/parsers/directives.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -188,16 +188,13 @@ def _parse_directive_options(
188188
yaml_block = content
189189
content = ""
190190
yaml_block = dedent(yaml_block)
191-
elif content.lstrip().startswith(":"):
192-
# TODO deprecate allowing initial whitespace (by lstripping)
193-
# or at least make it that all have the same indent
194-
# also look at mystjs implementation
191+
elif content.startswith(":"):
195192
content_lines = content.splitlines()
196193
yaml_lines = []
197194
while content_lines:
198-
if not content_lines[0].lstrip().startswith(":"):
195+
if not content_lines[0].startswith(":"):
199196
break
200-
yaml_lines.append(content_lines.pop(0).lstrip()[1:])
197+
yaml_lines.append(content_lines.pop(0)[1:])
201198
yaml_block = "\n".join(yaml_lines)
202199
content = "\n".join(content_lines)
203200

@@ -227,10 +224,13 @@ def _parse_directive_options(
227224
)
228225
return _DirectiveOptions(content, yaml_options, yaml_errors, has_options_block)
229226

227+
validation_errors: list[ParseWarnings] = []
228+
230229
options: dict[str, str] = {}
231230
if yaml_block is not None:
232231
try:
233-
options = dict(options_to_items(yaml_block))
232+
_options, state = options_to_items(yaml_block)
233+
options = dict(_options)
234234
except TokenizeError as err:
235235
return _DirectiveOptions(
236236
content,
@@ -244,6 +244,14 @@ def _parse_directive_options(
244244
],
245245
has_options_block,
246246
)
247+
if state.has_comments:
248+
validation_errors.append(
249+
ParseWarnings(
250+
"Directive options has # comments, which may not be supported in future versions.",
251+
line,
252+
MystWarnings.DIRECTIVE_OPTION_COMMENTS,
253+
)
254+
)
247255

248256
if issubclass(directive_class, TestDirective):
249257
# technically this directive spec only accepts one option ('option')
@@ -258,7 +266,6 @@ def _parse_directive_options(
258266
options_spec: dict[str, Callable] = directive_class.option_spec
259267
unknown_options: list[str] = []
260268
new_options: dict[str, Any] = {}
261-
validation_errors: list[ParseWarnings] = []
262269
value: str | None
263270
for name, value in options.items():
264271
try:

myst_parser/parsers/options.py

Lines changed: 37 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -163,9 +163,14 @@ def __str__(self) -> str:
163163
return "\n".join(lines)
164164

165165

166+
@dataclass
167+
class State:
168+
has_comments: bool = False
169+
170+
166171
def to_items(
167172
text: str, line_offset: int = 0, column_offset: int = 0
168-
) -> Iterable[tuple[str, str]]:
173+
) -> tuple[list[tuple[str, str]], State]:
169174
"""Parse a directive option block into (key, value) tuples.
170175
171176
:param text: The directive option text.
@@ -174,12 +179,17 @@ def to_items(
174179
175180
:raises: `TokenizeError`
176181
"""
177-
for key_token, value_token in to_tokens(text, line_offset, column_offset):
178-
yield key_token.value, value_token.value if value_token is not None else ""
182+
output = []
183+
state = State()
184+
for key_token, value_token in _to_tokens(text, state, line_offset, column_offset):
185+
output.append(
186+
(key_token.value, value_token.value if value_token is not None else "")
187+
)
188+
return output, state
179189

180190

181-
def to_tokens(
182-
text: str, line_offset: int = 0, column_offset: int = 0
191+
def _to_tokens(
192+
text: str, state: State, line_offset: int = 0, column_offset: int = 0
183193
) -> Iterable[tuple[KeyToken, ValueToken | None]]:
184194
"""Parse a directive option, and yield key/value token pairs.
185195
@@ -191,7 +201,7 @@ def to_tokens(
191201
"""
192202
key_token: KeyToken | None = None
193203
try:
194-
for token in tokenize(text):
204+
for token in _tokenize(text, state):
195205
if isinstance(token, KeyToken):
196206
if key_token is not None:
197207
yield key_token, None
@@ -207,12 +217,12 @@ def to_tokens(
207217
raise
208218

209219

210-
def tokenize(text: str) -> Iterable[Token]:
220+
def _tokenize(text: str, state: State) -> Iterable[Token]:
211221
"""Yield tokens from a directive option stream."""
212222
stream = StreamBuffer(text)
213223

214224
while True:
215-
_scan_to_next_token(stream)
225+
_scan_to_next_token(stream, state)
216226

217227
if stream.peek() == _CHARS_END:
218228
break
@@ -227,9 +237,9 @@ def tokenize(text: str) -> Iterable[Token]:
227237
if ch in ("'", '"'):
228238
yield _scan_flow_scalar(stream, cast(Literal['"', "'"], ch), is_key=True)
229239
else:
230-
yield _scan_plain_scalar(stream, is_key=True)
240+
yield _scan_plain_scalar(stream, state, is_key=True)
231241

232-
_scan_to_next_token(stream)
242+
_scan_to_next_token(stream, state)
233243

234244
# check next char is colon + space
235245
if stream.peek() != ":":
@@ -240,21 +250,21 @@ def tokenize(text: str) -> Iterable[Token]:
240250
end_mark = stream.get_position()
241251
yield ColonToken(start_mark, end_mark)
242252

243-
_scan_to_next_token(stream)
253+
_scan_to_next_token(stream, state)
244254

245255
# now find value
246256
ch = stream.peek()
247257
if stream.column == 0:
248258
pass
249259
elif ch in ("|", ">"):
250-
yield _scan_block_scalar(stream, cast(Literal["|", ">"], ch))
260+
yield _scan_block_scalar(stream, cast(Literal["|", ">"], ch), state)
251261
elif ch in ("'", '"'):
252262
yield _scan_flow_scalar(stream, cast(Literal['"', "'"], ch), is_key=False)
253263
else:
254-
yield _scan_plain_scalar(stream, is_key=False)
264+
yield _scan_plain_scalar(stream, state, is_key=False)
255265

256266

257-
def _scan_to_next_token(stream: StreamBuffer) -> None:
267+
def _scan_to_next_token(stream: StreamBuffer, state: State) -> None:
258268
"""Skip spaces, line breaks and comments.
259269
260270
The byte order mark is also stripped,
@@ -267,14 +277,15 @@ def _scan_to_next_token(stream: StreamBuffer) -> None:
267277
while stream.peek() == " ":
268278
stream.forward()
269279
if stream.peek() == "#":
280+
state.has_comments = True
270281
while stream.peek() not in _CHARS_END_NEWLINE:
271282
stream.forward()
272283
if not _scan_line_break(stream):
273284
found = True
274285

275286

276287
def _scan_plain_scalar(
277-
stream: StreamBuffer, is_key: bool = False
288+
stream: StreamBuffer, state: State, is_key: bool = False
278289
) -> KeyToken | ValueToken:
279290
chunks = []
280291
start_mark = stream.get_position()
@@ -284,6 +295,7 @@ def _scan_plain_scalar(
284295
while True:
285296
length = 0
286297
if stream.peek() == "#":
298+
state.has_comments = True
287299
break
288300
while True:
289301
ch = stream.peek(length)
@@ -302,6 +314,8 @@ def _scan_plain_scalar(
302314
end_mark = stream.get_position()
303315
spaces = _scan_plain_spaces(stream, allow_newline=(not is_key))
304316
if not spaces or stream.peek() == "#" or (stream.column < indent):
317+
if stream.peek() == "#":
318+
state.has_comments = True
305319
break
306320

307321
return (
@@ -472,7 +486,9 @@ def _scan_flow_scalar_breaks(stream: StreamBuffer) -> list[str]:
472486
return chunks
473487

474488

475-
def _scan_block_scalar(stream: StreamBuffer, style: Literal["|", ">"]) -> ValueToken:
489+
def _scan_block_scalar(
490+
stream: StreamBuffer, style: Literal["|", ">"], state: State
491+
) -> ValueToken:
476492
indent = 0
477493
folded = style == ">"
478494
chunks = []
@@ -481,7 +497,7 @@ def _scan_block_scalar(stream: StreamBuffer, style: Literal["|", ">"]) -> ValueT
481497
# Scan the header.
482498
stream.forward()
483499
chomping, increment = _scan_block_scalar_indicators(stream, start_mark)
484-
_scan_block_scalar_ignored_line(stream, start_mark)
500+
_scan_block_scalar_ignored_line(stream, start_mark, state)
485501

486502
# Determine the indentation level and go to the first non-empty line.
487503
min_indent = indent + 1
@@ -575,10 +591,13 @@ def _scan_block_scalar_indicators(
575591
return chomping, increment
576592

577593

578-
def _scan_block_scalar_ignored_line(stream: StreamBuffer, start_mark: Position) -> None:
594+
def _scan_block_scalar_ignored_line(
595+
stream: StreamBuffer, start_mark: Position, state: State
596+
) -> None:
579597
while stream.peek() == " ":
580598
stream.forward()
581599
if stream.peek() == "#":
600+
state.has_comments = True
582601
while stream.peek() not in _CHARS_END_NEWLINE:
583602
stream.forward()
584603
ch = stream.peek()

myst_parser/warnings_.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ class MystWarnings(Enum):
3434
"""Issue parsing directive."""
3535
DIRECTIVE_OPTION = "directive_option"
3636
"""Issue parsing directive options."""
37+
DIRECTIVE_OPTION_COMMENTS = "directive_comments"
38+
"""Directive options has # comments, which may not be supported in future versions."""
3739
DIRECTIVE_BODY = "directive_body"
3840
"""Issue parsing directive body."""
3941
UNKNOWN_DIRECTIVE = "directive_unknown"

tests/test_renderers/fixtures/directive_parsing.txt

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,25 @@ options:
5757
warnings: []
5858
.
5959

60+
note: comment in option
61+
.
62+
```{note}
63+
:class: name # oops
64+
a
65+
```
66+
.
67+
arguments: []
68+
body:
69+
- a
70+
content_offset: 1
71+
options:
72+
class:
73+
- name
74+
warnings:
75+
- 'ParseWarnings(msg=''Directive options has # comments, which may not be supported
76+
in future versions.'', lineno=0, type=<MystWarnings.DIRECTIVE_OPTION_COMMENTS: ''directive_comments''>)'
77+
.
78+
6079
note: content after option with new line
6180
.
6281
```{note}

0 commit comments

Comments
 (0)