Skip to content

Commit dab73ea

Browse files
committed
Lower RepeatRange to railroads more elaborately
For `RepeatRange(e, a, b)`, we were rendering `e` on the main line then rendering under it a message about how many times it may or must repeat based on `a` and `b`. The trouble is that if we say that something "repeats once" on the recurrent edge -- after we've already consumed a thing -- that reads reasonably as though we're saying that two things can be consumed when that's not what we mean. Similarly, it's a bit odd to say, on the recurrent edge, that something must "repeat twice" when that edge (and presumably then that rule) may not be taken at all. Let's solve all this by doing the following: - For `e{1..1}`, simply render the node. - For `e{0..1}`, treat this as simply `e?`. - For `e{0..}`, treat this as simply `e*`. - For `e{1..}`, treat this as simply `e+`. - For `e{a..0}`, render an empty node. - For `e{0..b} b > 1`, treat this as `(e{1..b})?`. - For `e{1..b} b > 1`, render the node on the main line, then on the recurrent line render "at most {b - 1} more times". - For `e{a..b} a > 1`, make a sequence of length `a` where the final node repeats `{1..b - (a - 1)}` times (or `{1..}` times if `b` is unbounded). (We'll also add a check in parsing to ensure that for the range to be well formed `a <= b`.) As it turns out, the most straightforward way to implement this isn't by recursing. Doing that means we end up needing to take special care to handle the suffix and the footnote, we have to build up an extra `Expression` we don't need, and we have to `unwrap` the call. Instead, it works better to treat this lowering in the manner of a transitioning state machine in the spirit of `loop match` as proposed in RFC 3720.
1 parent 8a37649 commit dab73ea

File tree

3 files changed

+151
-107
lines changed

3 files changed

+151
-107
lines changed

Diff for: mdbook-spec/src/grammar.rs

+11-3
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ pub struct Production {
3131
is_root: bool,
3232
}
3333

34-
#[derive(Debug)]
34+
#[derive(Clone, Debug)]
3535
struct Expression {
3636
kind: ExpressionKind,
3737
/// Suffix is the `_foo_` part that is shown as a subscript.
@@ -40,7 +40,7 @@ struct Expression {
4040
footnote: Option<String>,
4141
}
4242

43-
#[derive(Debug)]
43+
#[derive(Clone, Debug)]
4444
enum ExpressionKind {
4545
/// `( A B C )`
4646
Grouped(Box<Expression>),
@@ -78,7 +78,7 @@ enum ExpressionKind {
7878
Unicode(String),
7979
}
8080

81-
#[derive(Debug)]
81+
#[derive(Clone, Debug)]
8282
enum Characters {
8383
/// `LF`
8484
Named(String),
@@ -97,6 +97,14 @@ impl Grammar {
9797
}
9898

9999
impl Expression {
100+
fn new_kind(kind: ExpressionKind) -> Self {
101+
Self {
102+
kind,
103+
suffix: None,
104+
footnote: None,
105+
}
106+
}
107+
100108
fn visit_nt(&self, callback: &mut dyn FnMut(&str)) {
101109
match &self.kind {
102110
ExpressionKind::Grouped(e)

Diff for: mdbook-spec/src/grammar/parser.rs

+4
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,10 @@ impl Parser<'_> {
263263
} else if let Some(cap) = self.take_re(&RANGE_RE) {
264264
let a = cap.get(1).map(|m| m.as_str().parse::<u32>().unwrap());
265265
let b = cap.get(2).map(|m| m.as_str().parse::<u32>().unwrap());
266+
match (a, b) {
267+
(Some(a), Some(b)) if b < a => bail!(self, "range {a}..{b} is malformed"),
268+
_ => {}
269+
}
266270
kind = ExpressionKind::RepeatRange(box_kind(kind), a, b);
267271
}
268272

Diff for: mdbook-spec/src/grammar/render_railroad.rs

+136-104
Original file line numberDiff line numberDiff line change
@@ -93,115 +93,147 @@ impl Expression {
9393
stack: bool,
9494
link_map: &HashMap<String, String>,
9595
) -> Option<Box<dyn Node>> {
96-
let n: Box<dyn Node> = match &self.kind {
97-
ExpressionKind::Grouped(e) => {
98-
// I don't think this needs anything special. The grouped
99-
// expression is usually an Alt or Optional or something like
100-
// that which ends up as a distinct railroad node. But I'm not
101-
// sure.
102-
e.render_railroad(stack, link_map)?
103-
}
104-
ExpressionKind::Alt(es) => {
105-
let choices: Vec<_> = es
106-
.iter()
107-
.map(|e| e.render_railroad(stack, link_map))
108-
.filter_map(|n| n)
109-
.collect();
110-
Box::new(Choice::<Box<dyn Node>>::new(choices))
111-
}
112-
ExpressionKind::Sequence(es) => {
113-
let es: Vec<_> = es.iter().collect();
114-
let make_seq = |es: &[&Expression]| {
115-
let seq: Vec<_> = es
116-
.iter()
117-
.map(|e| e.render_railroad(stack, link_map))
118-
.filter_map(|n| n)
119-
.collect();
120-
let seq: Sequence<Box<dyn Node>> = Sequence::new(seq);
121-
Box::new(seq)
122-
};
96+
let mut state;
97+
let mut state_ref = &self.kind;
98+
let n: Box<dyn Node> = 'l: loop {
99+
state_ref = 'cont: {
100+
break 'l match state_ref {
101+
// Render grouped nodes and `e{1..1}` repeats directly.
102+
ExpressionKind::Grouped(e)
103+
| ExpressionKind::RepeatRange(e, Some(1), Some(1)) => {
104+
e.render_railroad(stack, link_map)?
105+
}
106+
ExpressionKind::Alt(es) => {
107+
let choices: Vec<_> = es
108+
.iter()
109+
.map(|e| e.render_railroad(stack, link_map))
110+
.filter_map(|n| n)
111+
.collect();
112+
Box::new(Choice::<Box<dyn Node>>::new(choices))
113+
}
114+
ExpressionKind::Sequence(es) => {
115+
let es: Vec<_> = es.iter().collect();
116+
let make_seq = |es: &[&Expression]| {
117+
let seq: Vec<_> = es
118+
.iter()
119+
.map(|e| e.render_railroad(stack, link_map))
120+
.filter_map(|n| n)
121+
.collect();
122+
let seq: Sequence<Box<dyn Node>> = Sequence::new(seq);
123+
Box::new(seq)
124+
};
123125

124-
// If `stack` is true, split the sequence on Breaks and stack them vertically.
125-
if stack {
126-
// First, trim a Break from the front and back.
127-
let es = if matches!(
128-
es.first(),
129-
Some(e) if e.is_break()
130-
) {
131-
&es[1..]
132-
} else {
133-
&es[..]
134-
};
135-
let es = if matches!(
136-
es.last(),
137-
Some(e) if e.is_break()
138-
) {
139-
&es[..es.len() - 1]
140-
} else {
141-
&es[..]
142-
};
126+
// If `stack` is true, split the sequence on Breaks and
127+
// stack them vertically.
128+
if stack {
129+
// First, trim a Break from the front and back.
130+
let es = if matches!(
131+
es.first(),
132+
Some(e) if e.is_break()
133+
) {
134+
&es[1..]
135+
} else {
136+
&es[..]
137+
};
138+
let es = if matches!(
139+
es.last(),
140+
Some(e) if e.is_break()
141+
) {
142+
&es[..es.len() - 1]
143+
} else {
144+
&es[..]
145+
};
143146

144-
let mut breaks: Vec<_> =
145-
es.split(|e| e.is_break()).map(|es| make_seq(es)).collect();
146-
// If there aren't any breaks, don't bother stacking.
147-
if breaks.len() == 1 {
148-
breaks.pop().unwrap()
149-
} else {
150-
Box::new(Stack::new(breaks))
147+
let mut breaks: Vec<_> =
148+
es.split(|e| e.is_break()).map(|es| make_seq(es)).collect();
149+
// If there aren't any breaks, don't bother stacking.
150+
if breaks.len() == 1 {
151+
breaks.pop().unwrap()
152+
} else {
153+
Box::new(Stack::new(breaks))
154+
}
155+
} else {
156+
make_seq(&es)
157+
}
151158
}
152-
} else {
153-
make_seq(&es)
154-
}
155-
}
156-
ExpressionKind::Optional(e) => {
157-
let n = e.render_railroad(stack, link_map)?;
158-
Box::new(Optional::new(n))
159-
}
160-
ExpressionKind::Repeat(e) => {
161-
let n = e.render_railroad(stack, link_map)?;
162-
Box::new(Optional::new(Repeat::new(n, railroad::Empty)))
163-
}
164-
ExpressionKind::RepeatNonGreedy(e) => {
165-
let n = e.render_railroad(stack, link_map)?;
166-
let r = Box::new(Optional::new(Repeat::new(n, railroad::Empty)));
167-
let lbox = LabeledBox::new(r, Comment::new("non-greedy".to_string()));
168-
Box::new(lbox)
169-
}
170-
ExpressionKind::RepeatPlus(e) => {
171-
let n = e.render_railroad(stack, link_map)?;
172-
Box::new(Repeat::new(n, railroad::Empty))
173-
}
174-
ExpressionKind::RepeatPlusNonGreedy(e) => {
175-
let n = e.render_railroad(stack, link_map)?;
176-
let r = Repeat::new(n, railroad::Empty);
177-
let lbox = LabeledBox::new(r, Comment::new("non-greedy".to_string()));
178-
Box::new(lbox)
179-
}
180-
ExpressionKind::RepeatRange(e, a, b) => {
181-
let n = e.render_railroad(stack, link_map)?;
182-
let cmt = match (a, b) {
183-
(Some(a), Some(b)) => format!("repeat between {a} and {b} times"),
184-
(None, Some(b)) => format!("repeat at most {b} times"),
185-
(Some(a), None) => format!("repeat at least {a} times"),
186-
(None, None) => panic!("infinite repeat should use *"),
159+
// Treat `e?` and `e{..1}` / `e{0..1}` equally.
160+
ExpressionKind::Optional(e)
161+
| ExpressionKind::RepeatRange(e, None | Some(0), Some(1)) => {
162+
let n = e.render_railroad(stack, link_map)?;
163+
Box::new(Optional::new(n))
164+
}
165+
// Treat `e*` and `e{..}` / `e{0..}` equally.
166+
ExpressionKind::Repeat(e)
167+
| ExpressionKind::RepeatRange(e, None | Some(0), None) => {
168+
let n = e.render_railroad(stack, link_map)?;
169+
Box::new(Optional::new(Repeat::new(n, railroad::Empty)))
170+
}
171+
ExpressionKind::RepeatNonGreedy(e) => {
172+
let n = e.render_railroad(stack, link_map)?;
173+
let r = Box::new(Optional::new(Repeat::new(n, railroad::Empty)));
174+
let lbox = LabeledBox::new(r, Comment::new("non-greedy".to_string()));
175+
Box::new(lbox)
176+
}
177+
// Treat `e+` and `e{1..}` equally.
178+
ExpressionKind::RepeatPlus(e)
179+
| ExpressionKind::RepeatRange(e, Some(1), None) => {
180+
let n = e.render_railroad(stack, link_map)?;
181+
Box::new(Repeat::new(n, railroad::Empty))
182+
}
183+
ExpressionKind::RepeatPlusNonGreedy(e) => {
184+
let n = e.render_railroad(stack, link_map)?;
185+
let r = Repeat::new(n, railroad::Empty);
186+
let lbox = LabeledBox::new(r, Comment::new("non-greedy".to_string()));
187+
Box::new(lbox)
188+
}
189+
// For `e{a..0}` render an empty node.
190+
ExpressionKind::RepeatRange(_, _, Some(0)) => Box::new(railroad::Empty),
191+
// Treat `e{..b}` / `e{0..b}` as `(e{1..b})?`.
192+
ExpressionKind::RepeatRange(e, None | Some(0), Some(b @ 2..)) => {
193+
state = ExpressionKind::Optional(Box::new(Expression::new_kind(
194+
ExpressionKind::RepeatRange(e.clone(), Some(1), Some(*b)),
195+
)));
196+
break 'cont &state;
197+
}
198+
// Render `e{1..b}` directly.
199+
ExpressionKind::RepeatRange(e, Some(1), Some(b @ 2..)) => {
200+
let n = e.render_railroad(stack, link_map)?;
201+
let cmt = format!("at most {b} more times", b = b - 1);
202+
let r = Repeat::new(n, Comment::new(cmt));
203+
Box::new(r)
204+
}
205+
// Treat `e{a..}` as `e{a-1..a-1} e{1..}` and `e{a..b}` as
206+
// `e{a-1..a-1} e{1..b-(a-1)}`, and treat `e{x..x}` for some
207+
// `x` as a sequence of `e` nodes of length `x`.
208+
ExpressionKind::RepeatRange(e, Some(a @ 2..), b) => {
209+
let mut es = Vec::<Expression>::new();
210+
for _ in 0..(a - 1) {
211+
es.push(*e.clone());
212+
}
213+
es.push(Expression::new_kind(ExpressionKind::RepeatRange(
214+
e.clone(),
215+
Some(1),
216+
b.map(|x| x - (a - 1)),
217+
)));
218+
state = ExpressionKind::Sequence(es);
219+
break 'cont &state;
220+
}
221+
ExpressionKind::Nt(nt) => node_for_nt(link_map, nt),
222+
ExpressionKind::Terminal(t) => Box::new(Terminal::new(t.clone())),
223+
ExpressionKind::Prose(s) => Box::new(Terminal::new(s.clone())),
224+
ExpressionKind::Break(_) => return None,
225+
ExpressionKind::Charset(set) => {
226+
let ns: Vec<_> = set.iter().map(|c| c.render_railroad(link_map)).collect();
227+
Box::new(Choice::<Box<dyn Node>>::new(ns))
228+
}
229+
ExpressionKind::NegExpression(e) => {
230+
let n = e.render_railroad(stack, link_map)?;
231+
let ch = node_for_nt(link_map, "CHAR");
232+
Box::new(Except::new(Box::new(ch), n))
233+
}
234+
ExpressionKind::Unicode(s) => Box::new(Terminal::new(format!("U+{}", s))),
187235
};
188-
let r = Repeat::new(n, Comment::new(cmt));
189-
Box::new(r)
190-
}
191-
ExpressionKind::Nt(nt) => node_for_nt(link_map, nt),
192-
ExpressionKind::Terminal(t) => Box::new(Terminal::new(t.clone())),
193-
ExpressionKind::Prose(s) => Box::new(Terminal::new(s.clone())),
194-
ExpressionKind::Break(_) => return None,
195-
ExpressionKind::Charset(set) => {
196-
let ns: Vec<_> = set.iter().map(|c| c.render_railroad(link_map)).collect();
197-
Box::new(Choice::<Box<dyn Node>>::new(ns))
198-
}
199-
ExpressionKind::NegExpression(e) => {
200-
let n = e.render_railroad(stack, link_map)?;
201-
let ch = node_for_nt(link_map, "CHAR");
202-
Box::new(Except::new(Box::new(ch), n))
203236
}
204-
ExpressionKind::Unicode(s) => Box::new(Terminal::new(format!("U+{}", s))),
205237
};
206238
if let Some(suffix) = &self.suffix {
207239
let suffix = strip_markdown(suffix);

0 commit comments

Comments
 (0)