Skip to content

Commit 2cedb7c

Browse files
Add new tool to check HTML
1 parent 835150e commit 2cedb7c

File tree

7 files changed

+163
-1
lines changed

7 files changed

+163
-1
lines changed

Cargo.lock

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1567,6 +1567,13 @@ dependencies = [
15671567
"winapi 0.3.9",
15681568
]
15691569

1570+
[[package]]
1571+
name = "html-checker"
1572+
version = "0.1.0"
1573+
dependencies = [
1574+
"walkdir",
1575+
]
1576+
15701577
[[package]]
15711578
name = "html5ever"
15721579
version = "0.25.1"

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ members = [
3434
"src/tools/unicode-table-generator",
3535
"src/tools/expand-yaml-anchors",
3636
"src/tools/jsondocck",
37+
"src/tools/html-checker",
3738
]
3839

3940
exclude = [

src/bootstrap/builder.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,7 @@ impl<'a> Builder<'a> {
432432
test::RustdocTheme,
433433
test::RustdocUi,
434434
test::RustdocJson,
435+
test::HtmlCheck,
435436
// Run bootstrap close to the end as it's unlikely to fail
436437
test::Bootstrap,
437438
// Run run-make last, since these won't pass without make on Windows

src/bootstrap/test.rs

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use std::fmt;
99
use std::fs;
1010
use std::iter;
1111
use std::path::{Path, PathBuf};
12-
use std::process::Command;
12+
use std::process::{Command, Stdio};
1313

1414
use build_helper::{self, output, t};
1515

@@ -144,6 +144,50 @@ You can skip linkcheck with --exclude src/tools/linkchecker"
144144
}
145145
}
146146

147+
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
148+
pub struct HtmlCheck {
149+
target: TargetSelection,
150+
}
151+
152+
impl Step for HtmlCheck {
153+
type Output = ();
154+
const DEFAULT: bool = true;
155+
const ONLY_HOSTS: bool = true;
156+
157+
fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> {
158+
run.path("src/tools/html-checker")
159+
}
160+
161+
fn make_run(run: RunConfig<'_>) {
162+
run.builder.ensure(HtmlCheck { target: run.target });
163+
}
164+
165+
fn run(self, builder: &Builder<'_>) {
166+
if !Command::new("tidy")
167+
.arg("--version")
168+
.stdout(Stdio::null())
169+
.status()
170+
.map_or(false, |status| status.success())
171+
{
172+
eprintln!("not running HTML-check tool because `tidy` is missing");
173+
eprintln!(
174+
"Note that `tidy` is not the in-tree `src/tools/tidy` but needs to be installed"
175+
);
176+
return;
177+
}
178+
// Ensure that a few different kinds of documentation are available.
179+
builder.default_doc(&[]);
180+
builder.ensure(crate::doc::Rustc { target: self.target, stage: builder.top_stage });
181+
182+
try_run(
183+
builder,
184+
builder
185+
.tool_cmd(Tool::HtmlChecker)
186+
.arg(builder.out.join(self.target.triple).join("doc")),
187+
);
188+
}
189+
}
190+
147191
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
148192
pub struct Cargotest {
149193
stage: u32,

src/bootstrap/tool.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,7 @@ bootstrap_tool!(
375375
ExpandYamlAnchors, "src/tools/expand-yaml-anchors", "expand-yaml-anchors";
376376
LintDocs, "src/tools/lint-docs", "lint-docs";
377377
JsonDocCk, "src/tools/jsondocck", "jsondocck";
378+
HtmlChecker, "src/tools/html-checker", "html-checker";
378379
);
379380

380381
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)]

src/tools/html-checker/Cargo.toml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
[package]
2+
name = "html-checker"
3+
version = "0.1.0"
4+
authors = ["Guillaume Gomez <[email protected]>"]
5+
edition = "2018"
6+
7+
[[bin]]
8+
name = "html-checker"
9+
path = "main.rs"
10+
11+
[dependencies]
12+
walkdir = "2"

src/tools/html-checker/main.rs

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
use std::env;
2+
use std::path::Path;
3+
use std::process::{Command, Output};
4+
5+
fn check_html_file(file: &Path) -> usize {
6+
let to_mute = &[
7+
// "disabled" on <link> or "autocomplete" on <select> emit this warning
8+
"PROPRIETARY_ATTRIBUTE",
9+
// It complains when multiple in the same page link to the same anchor for some reason...
10+
"ANCHOR_NOT_UNIQUE",
11+
// If a <span> contains only HTML elements and no text, it complains about it.
12+
"TRIM_EMPTY_ELEMENT",
13+
// FIXME: the three next warnings are about <pre> elements which are not supposed to
14+
// contain HTML. The solution here would be to replace them with a <div> with
15+
// ""
16+
"MISSING_ENDTAG_BEFORE",
17+
"INSERTING_TAG",
18+
"DISCARDING_UNEXPECTED",
19+
// FIXME: mdbook repeats the name attribute on <input>. When the fix is merged upstream,
20+
// this warning can be used again.
21+
"REPEATED_ATTRIBUTE",
22+
// FIXME: mdbook uses "align" attribute on <td>, which is not allowed.
23+
"MISMATCHED_ATTRIBUTE_WARN",
24+
// FIXME: mdbook doesn't add "alt" attribute on images.
25+
"MISSING_ATTRIBUTE",
26+
// FIXME: mdbook doesn't escape `&` (in "&String" for example).
27+
"UNKNOWN_ENTITY",
28+
// Compiler docs have some inlined <style> in the markdown.
29+
"MOVED_STYLE_TO_HEAD",
30+
];
31+
let to_mute_s = to_mute.join(",");
32+
let mut command = Command::new("tidy");
33+
command
34+
.arg("-errors")
35+
.arg("-quiet")
36+
.arg("--mute-id") // this option is useful in case we want to mute more warnings
37+
.arg("yes")
38+
.arg("--mute")
39+
.arg(&to_mute_s)
40+
.arg(file);
41+
42+
let Output { status, stderr, .. } = command.output().expect("failed to run tidy command");
43+
if status.success() {
44+
0
45+
} else {
46+
let stderr = String::from_utf8(stderr).expect("String::from_utf8 failed...");
47+
if stderr.is_empty() && status.code() != Some(2) {
48+
0
49+
} else {
50+
eprintln!(
51+
"=> Errors for `{}` (error code: {}) <=",
52+
file.display(),
53+
status.code().unwrap_or(-1)
54+
);
55+
eprintln!("{}", stderr);
56+
stderr.lines().count()
57+
}
58+
}
59+
}
60+
61+
// Returns the number of files read and the number of errors.
62+
fn find_all_html_files(dir: &Path) -> (usize, usize) {
63+
let mut files_read = 0;
64+
let mut errors = 0;
65+
66+
for entry in walkdir::WalkDir::new(dir) {
67+
let entry = entry.expect("failed to read file");
68+
if !entry.file_type().is_file() {
69+
continue;
70+
}
71+
let entry = entry.path();
72+
if entry.extension().and_then(|s| s.to_str()) == Some("html") {
73+
errors += check_html_file(&entry);
74+
files_read += 1;
75+
}
76+
}
77+
(files_read, errors)
78+
}
79+
80+
fn main() -> Result<(), String> {
81+
let args = env::args().collect::<Vec<_>>();
82+
if args.len() != 2 {
83+
return Err(format!("Usage: {} <doc folder>", args[0]));
84+
}
85+
86+
println!("Running HTML checker...");
87+
88+
let (files_read, errors) = find_all_html_files(&Path::new(&args[1]));
89+
println!("Done! Read {} files...", files_read);
90+
if errors > 0 {
91+
Err(format!("HTML check failed: {} errors", errors))
92+
} else {
93+
println!("No error found!");
94+
Ok(())
95+
}
96+
}

0 commit comments

Comments
 (0)