Skip to content

Commit 846ea58

Browse files
committed
Auto merge of #56884 - euclio:codeblock-diagnostics, r=QuietMisdreavus
rustdoc: overhaul code block lexing errors Fixes #53919. This PR moves the reporting of code block lexing errors from rendering time to an early pass, so we can use the compiler's error reporting mechanisms. This dramatically improves the diagnostics in this situation: we now de-emphasize the lexing errors as a note under a warning that has a span and suggestion instead of just emitting errors at the top level. Additionally, this PR generalizes the markdown -> source span calculation function, which should allow other rustdoc warnings to use better spans in the future. Last, the PR makes sure that the code block is always emitted in the docs, even if it fails to highlight correctly. Of note: - The new pass unfortunately adds another pass over the docs to gather the doc blocks for syntax-checking. I wonder if this could be combined with the pass that looks for testable blocks? I'm not familiar with that code, so I don't know how feasible that is. - `pulldown_cmark` doesn't make it easy to find the spans of the code blocks, so the code that calculates the spans is a little nasty. It works for all the test cases I threw at it, but I wouldn't be surprised if an edge case would break it. Should have a thorough review. - This PR worsens the state of #56885, since those certain fatal lexing errors are now emitted before docs get generated at all.
2 parents 794e228 + 8c93798 commit 846ea58

File tree

10 files changed

+562
-131
lines changed

10 files changed

+562
-131
lines changed

Diff for: src/librustdoc/html/highlight.rs

+55-37
Original file line numberDiff line numberDiff line change
@@ -25,40 +25,51 @@ pub fn render_with_highlighting(
2525
tooltip: Option<(&str, &str)>,
2626
) -> String {
2727
debug!("highlighting: ================\n{}\n==============", src);
28-
let sess = parse::ParseSess::new(FilePathMapping::empty());
29-
let fm = sess.source_map().new_source_file(FileName::Custom("stdin".to_string()),
30-
src.to_string());
31-
3228
let mut out = Vec::new();
3329
if let Some((tooltip, class)) = tooltip {
3430
write!(out, "<div class='information'><div class='tooltip {}'>ⓘ<span \
3531
class='tooltiptext'>{}</span></div></div>",
3632
class, tooltip).unwrap();
3733
}
38-
write_header(class, &mut out).unwrap();
39-
40-
let lexer = match lexer::StringReader::new_without_err(&sess, fm, None, "Output from rustc:") {
41-
Ok(l) => l,
42-
Err(_) => {
43-
let first_line = src.lines().next().unwrap_or_else(|| "");
44-
let mut err = sess.span_diagnostic
45-
.struct_warn(&format!("Invalid doc comment starting with: `{}`\n\
46-
(Ignoring this codeblock)",
47-
first_line));
48-
err.emit();
49-
return String::new();
34+
35+
let sess = parse::ParseSess::new(FilePathMapping::empty());
36+
let fm = sess.source_map().new_source_file(
37+
FileName::Custom(String::from("rustdoc-highlighting")),
38+
src.to_owned(),
39+
);
40+
let highlight_result =
41+
lexer::StringReader::new_or_buffered_errs(&sess, fm, None).and_then(|lexer| {
42+
let mut classifier = Classifier::new(lexer, sess.source_map());
43+
44+
let mut highlighted_source = vec![];
45+
if classifier.write_source(&mut highlighted_source).is_err() {
46+
Err(classifier.lexer.buffer_fatal_errors())
47+
} else {
48+
Ok(String::from_utf8_lossy(&highlighted_source).into_owned())
49+
}
50+
});
51+
52+
match highlight_result {
53+
Ok(highlighted_source) => {
54+
write_header(class, &mut out).unwrap();
55+
write!(out, "{}", highlighted_source).unwrap();
56+
if let Some(extension) = extension {
57+
write!(out, "{}", extension).unwrap();
58+
}
59+
write_footer(&mut out).unwrap();
5060
}
51-
};
52-
let mut classifier = Classifier::new(lexer, sess.source_map());
53-
if classifier.write_source(&mut out).is_err() {
54-
classifier.lexer.emit_fatal_errors();
55-
return format!("<pre>{}</pre>", src);
56-
}
61+
Err(errors) => {
62+
// If errors are encountered while trying to highlight, cancel the errors and just emit
63+
// the unhighlighted source. The errors will have already been reported in the
64+
// `check-code-block-syntax` pass.
65+
for mut error in errors {
66+
error.cancel();
67+
}
5768

58-
if let Some(extension) = extension {
59-
write!(out, "{}", extension).unwrap();
69+
write!(out, "<pre><code>{}</code></pre>", src).unwrap();
70+
}
6071
}
61-
write_footer(&mut out).unwrap();
72+
6273
String::from_utf8_lossy(&out[..]).into_owned()
6374
}
6475

@@ -151,6 +162,17 @@ impl<U: Write> Writer for U {
151162
}
152163
}
153164

165+
enum HighlightError {
166+
LexError,
167+
IoError(io::Error),
168+
}
169+
170+
impl From<io::Error> for HighlightError {
171+
fn from(err: io::Error) -> Self {
172+
HighlightError::IoError(err)
173+
}
174+
}
175+
154176
impl<'a> Classifier<'a> {
155177
fn new(lexer: lexer::StringReader<'a>, source_map: &'a SourceMap) -> Classifier<'a> {
156178
Classifier {
@@ -162,17 +184,11 @@ impl<'a> Classifier<'a> {
162184
}
163185
}
164186

165-
/// Gets the next token out of the lexer, emitting fatal errors if lexing fails.
166-
fn try_next_token(&mut self) -> io::Result<TokenAndSpan> {
187+
/// Gets the next token out of the lexer.
188+
fn try_next_token(&mut self) -> Result<TokenAndSpan, HighlightError> {
167189
match self.lexer.try_next_token() {
168190
Ok(tas) => Ok(tas),
169-
Err(_) => {
170-
let mut err = self.lexer.sess.span_diagnostic
171-
.struct_warn("Backing out of syntax highlighting");
172-
err.note("You probably did not intend to render this as a rust code-block");
173-
err.emit();
174-
Err(io::Error::new(io::ErrorKind::Other, ""))
175-
}
191+
Err(_) => Err(HighlightError::LexError),
176192
}
177193
}
178194

@@ -185,7 +201,7 @@ impl<'a> Classifier<'a> {
185201
/// source.
186202
fn write_source<W: Writer>(&mut self,
187203
out: &mut W)
188-
-> io::Result<()> {
204+
-> Result<(), HighlightError> {
189205
loop {
190206
let next = self.try_next_token()?;
191207
if next.tok == token::Eof {
@@ -202,7 +218,7 @@ impl<'a> Classifier<'a> {
202218
fn write_token<W: Writer>(&mut self,
203219
out: &mut W,
204220
tas: TokenAndSpan)
205-
-> io::Result<()> {
221+
-> Result<(), HighlightError> {
206222
let klass = match tas.tok {
207223
token::Shebang(s) => {
208224
out.string(Escape(&s.as_str()), Class::None)?;
@@ -341,7 +357,9 @@ impl<'a> Classifier<'a> {
341357

342358
// Anything that didn't return above is the simple case where we the
343359
// class just spans a single token, so we can use the `string` method.
344-
out.string(Escape(&self.snip(tas.sp)), klass)
360+
out.string(Escape(&self.snip(tas.sp)), klass)?;
361+
362+
Ok(())
345363
}
346364

347365
// Helper function to get a snippet from the source_map.

Diff for: src/librustdoc/html/markdown.rs

+109
Original file line numberDiff line numberDiff line change
@@ -919,6 +919,115 @@ pub fn markdown_links(md: &str) -> Vec<(String, Option<Range<usize>>)> {
919919
links
920920
}
921921

922+
#[derive(Debug)]
923+
crate struct RustCodeBlock {
924+
/// The range in the markdown that the code block occupies. Note that this includes the fences
925+
/// for fenced code blocks.
926+
pub range: Range<usize>,
927+
/// The range in the markdown that the code within the code block occupies.
928+
pub code: Range<usize>,
929+
pub is_fenced: bool,
930+
pub syntax: Option<String>,
931+
}
932+
933+
/// Returns a range of bytes for each code block in the markdown that is tagged as `rust` or
934+
/// untagged (and assumed to be rust).
935+
crate fn rust_code_blocks(md: &str) -> Vec<RustCodeBlock> {
936+
let mut code_blocks = vec![];
937+
938+
if md.is_empty() {
939+
return code_blocks;
940+
}
941+
942+
let mut opts = Options::empty();
943+
opts.insert(OPTION_ENABLE_TABLES);
944+
opts.insert(OPTION_ENABLE_FOOTNOTES);
945+
let mut p = Parser::new_ext(md, opts);
946+
947+
let mut code_block_start = 0;
948+
let mut code_start = 0;
949+
let mut is_fenced = false;
950+
let mut previous_offset = 0;
951+
let mut in_rust_code_block = false;
952+
while let Some(event) = p.next() {
953+
let offset = p.get_offset();
954+
955+
match event {
956+
Event::Start(Tag::CodeBlock(syntax)) => {
957+
let lang_string = if syntax.is_empty() {
958+
LangString::all_false()
959+
} else {
960+
LangString::parse(&*syntax, ErrorCodes::Yes)
961+
};
962+
963+
if lang_string.rust {
964+
in_rust_code_block = true;
965+
966+
code_start = offset;
967+
code_block_start = match md[previous_offset..offset].find("```") {
968+
Some(fence_idx) => {
969+
is_fenced = true;
970+
previous_offset + fence_idx
971+
}
972+
None => offset,
973+
};
974+
}
975+
}
976+
Event::End(Tag::CodeBlock(syntax)) if in_rust_code_block => {
977+
in_rust_code_block = false;
978+
979+
let code_block_end = if is_fenced {
980+
let fence_str = &md[previous_offset..offset]
981+
.chars()
982+
.rev()
983+
.collect::<String>();
984+
fence_str
985+
.find("```")
986+
.map(|fence_idx| offset - fence_idx)
987+
.unwrap_or_else(|| offset)
988+
} else if md
989+
.as_bytes()
990+
.get(offset)
991+
.map(|b| *b == b'\n')
992+
.unwrap_or_default()
993+
{
994+
offset - 1
995+
} else {
996+
offset
997+
};
998+
999+
let code_end = if is_fenced {
1000+
previous_offset
1001+
} else {
1002+
code_block_end
1003+
};
1004+
1005+
code_blocks.push(RustCodeBlock {
1006+
is_fenced,
1007+
range: Range {
1008+
start: code_block_start,
1009+
end: code_block_end,
1010+
},
1011+
code: Range {
1012+
start: code_start,
1013+
end: code_end,
1014+
},
1015+
syntax: if !syntax.is_empty() {
1016+
Some(syntax.into_owned())
1017+
} else {
1018+
None
1019+
},
1020+
});
1021+
}
1022+
_ => (),
1023+
}
1024+
1025+
previous_offset = offset;
1026+
}
1027+
1028+
code_blocks
1029+
}
1030+
9221031
#[derive(Clone, Default, Debug)]
9231032
pub struct IdMap {
9241033
map: FxHashMap<String, usize>,

Diff for: src/librustdoc/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
html_root_url = "https://doc.rust-lang.org/nightly/",
44
html_playground_url = "https://play.rust-lang.org/")]
55

6+
#![feature(bind_by_move_pattern_guards)]
67
#![feature(rustc_private)]
78
#![feature(box_patterns)]
89
#![feature(box_syntax)]

Diff for: src/librustdoc/passes/check_code_block_syntax.rs

+109
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
use errors::Applicability;
2+
use syntax::parse::lexer::{TokenAndSpan, StringReader as Lexer};
3+
use syntax::parse::{ParseSess, token};
4+
use syntax::source_map::FilePathMapping;
5+
use syntax_pos::FileName;
6+
7+
use clean;
8+
use core::DocContext;
9+
use fold::DocFolder;
10+
use html::markdown::{self, RustCodeBlock};
11+
use passes::Pass;
12+
13+
pub const CHECK_CODE_BLOCK_SYNTAX: Pass =
14+
Pass::early("check-code-block-syntax", check_code_block_syntax,
15+
"validates syntax inside Rust code blocks");
16+
17+
pub fn check_code_block_syntax(krate: clean::Crate, cx: &DocContext) -> clean::Crate {
18+
SyntaxChecker { cx }.fold_crate(krate)
19+
}
20+
21+
struct SyntaxChecker<'a, 'tcx: 'a, 'rcx: 'a> {
22+
cx: &'a DocContext<'a, 'tcx, 'rcx>,
23+
}
24+
25+
impl<'a, 'tcx, 'rcx> SyntaxChecker<'a, 'tcx, 'rcx> {
26+
fn check_rust_syntax(&self, item: &clean::Item, dox: &str, code_block: RustCodeBlock) {
27+
let sess = ParseSess::new(FilePathMapping::empty());
28+
let source_file = sess.source_map().new_source_file(
29+
FileName::Custom(String::from("doctest")),
30+
dox[code_block.code].to_owned(),
31+
);
32+
33+
let errors = Lexer::new_or_buffered_errs(&sess, source_file, None).and_then(|mut lexer| {
34+
while let Ok(TokenAndSpan { tok, .. }) = lexer.try_next_token() {
35+
if tok == token::Eof {
36+
break;
37+
}
38+
}
39+
40+
let errors = lexer.buffer_fatal_errors();
41+
42+
if !errors.is_empty() {
43+
Err(errors)
44+
} else {
45+
Ok(())
46+
}
47+
});
48+
49+
if let Err(errors) = errors {
50+
let mut diag = if let Some(sp) =
51+
super::source_span_for_markdown_range(self.cx, &dox, &code_block.range, &item.attrs)
52+
{
53+
let mut diag = self
54+
.cx
55+
.sess()
56+
.struct_span_warn(sp, "could not parse code block as Rust code");
57+
58+
for mut err in errors {
59+
diag.note(&format!("error from rustc: {}", err.message()));
60+
err.cancel();
61+
}
62+
63+
if code_block.syntax.is_none() && code_block.is_fenced {
64+
let sp = sp.from_inner_byte_pos(0, 3);
65+
diag.span_suggestion_with_applicability(
66+
sp,
67+
"mark blocks that do not contain Rust code as text",
68+
String::from("```text"),
69+
Applicability::MachineApplicable,
70+
);
71+
}
72+
73+
diag
74+
} else {
75+
// We couldn't calculate the span of the markdown block that had the error, so our
76+
// diagnostics are going to be a bit lacking.
77+
let mut diag = self.cx.sess().struct_span_warn(
78+
super::span_of_attrs(&item.attrs),
79+
"doc comment contains an invalid Rust code block",
80+
);
81+
82+
for mut err in errors {
83+
// Don't bother reporting the error, because we can't show where it happened.
84+
err.cancel();
85+
}
86+
87+
if code_block.syntax.is_none() && code_block.is_fenced {
88+
diag.help("mark blocks that do not contain Rust code as text: ```text");
89+
}
90+
91+
diag
92+
};
93+
94+
diag.emit();
95+
}
96+
}
97+
}
98+
99+
impl<'a, 'tcx, 'rcx> DocFolder for SyntaxChecker<'a, 'tcx, 'rcx> {
100+
fn fold_item(&mut self, item: clean::Item) -> Option<clean::Item> {
101+
if let Some(dox) = &item.attrs.collapsed_doc_value() {
102+
for code_block in markdown::rust_code_blocks(&dox) {
103+
self.check_rust_syntax(&item, &dox, code_block);
104+
}
105+
}
106+
107+
self.fold_item_recur(item)
108+
}
109+
}

0 commit comments

Comments
 (0)