Skip to content

Commit be82340

Browse files
committed
Auto merge of rust-lang#13139 - xFrednet:lintcheck-limit-summery-output, r=Alexendoo
Lintcheck: Rework and limit diff output for GH's CI ### Background While working on rust-lang/rust-clippy#13136 I found an amazing limitation of GH's CI. The summary can at most have be 1MB of text. Here is the warning message: > $GITHUB_STEP_SUMMARY upload aborted, supports content up to a size of 1024k, got 46731k. For more information see: https://docs.github.com/actions/using-workflows/workflow-commands-for-github-actions#adding-a-markdown-summary [The PR](rust-lang/rust-clippy#13136) produced a *casual* 61808 changes. Guess that's why those lints are not *warn-by-default* :P. ### Changes: This PR limits the lintcheck diff output in two ways. 1. The diff is limited to 200 messages per lint per section. Hidden messages are indicated by a message at the end of the section. 2. The output is first written to a file and only the first 1MB is written to ` >> $GITHUB_STEP_SUMMARY`. The entire file is also written to the normal CI log. This helps for cases where several lints change and the total size exceeds the 1MB limit. An example of these changes can be seen here: https://github.com/xFrednet/rust-clippy/actions/runs/10028799118?pr=4 --- changelog: none r? `@Alexendoo` Sorry for bombarding you with so many PR's lately 😅 Feel free to pass some of you reviews to me.
2 parents 5e6540f + bdf3e58 commit be82340

File tree

5 files changed

+167
-43
lines changed

5 files changed

+167
-43
lines changed

.github/workflows/lintcheck.yml

+17-1
Original file line numberDiff line numberDiff line change
@@ -115,4 +115,20 @@ jobs:
115115
uses: actions/download-artifact@v4
116116

117117
- name: Diff results
118-
run: ./target/debug/lintcheck diff {base,head}/ci_crates_logs.json >> $GITHUB_STEP_SUMMARY
118+
# GH's summery has a maximum size of 1024k:
119+
# https://docs.github.com/actions/using-workflows/workflow-commands-for-github-actions#adding-a-markdown-summary
120+
# That's why we first log to file and then to the summary and logs
121+
run: |
122+
./target/debug/lintcheck diff {base,head}/ci_crates_logs.json --truncate >> truncated_diff.md
123+
head -c 1024000 truncated_diff.md >> $GITHUB_STEP_SUMMARY
124+
cat truncated_diff.md
125+
./target/debug/lintcheck diff {base,head}/ci_crates_logs.json >> full_diff.md
126+
127+
- name: Upload full diff
128+
uses: actions/upload-artifact@v4
129+
with:
130+
name: diff
131+
if-no-files-found: ignore
132+
path: |
133+
full_diff.md
134+
truncated_diff.md

lintcheck/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ clap = { version = "4.4", features = ["derive", "env"] }
1616
crossbeam-channel = "0.5.6"
1717
diff = "0.1.13"
1818
flate2 = "1.0"
19-
itertools = "0.12"
19+
itertools = "0.13"
2020
rayon = "1.5.1"
2121
serde = { version = "1.0", features = ["derive"] }
2222
serde_json = "1.0.85"

lintcheck/src/config.rs

+7-1
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,13 @@ pub(crate) struct LintcheckConfig {
5353
#[derive(Subcommand, Clone, Debug)]
5454
pub(crate) enum Commands {
5555
/// Display a markdown diff between two lintcheck log files in JSON format
56-
Diff { old: PathBuf, new: PathBuf },
56+
Diff {
57+
old: PathBuf,
58+
new: PathBuf,
59+
/// This will limit the number of warnings that will be printed for each lint
60+
#[clap(long)]
61+
truncate: bool,
62+
},
5763
/// Create a lintcheck crates TOML file containing the top N popular crates
5864
Popular {
5965
/// Output TOML file name

lintcheck/src/json.rs

+141-39
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
use std::fs;
22
use std::path::Path;
33

4-
use itertools::EitherOrBoth;
4+
use itertools::{EitherOrBoth, Itertools};
55
use serde::{Deserialize, Serialize};
66

77
use crate::ClippyWarning;
88

9-
#[derive(Deserialize, Serialize)]
9+
/// This is the total number. 300 warnings results in 100 messages per section.
10+
const DEFAULT_LIMIT_PER_LINT: usize = 300;
11+
const TRUNCATION_TOTAL_TARGET: usize = 1000;
12+
13+
#[derive(Debug, Deserialize, Serialize)]
1014
struct LintJson {
1115
lint: String,
1216
krate: String,
@@ -18,7 +22,7 @@ struct LintJson {
1822

1923
impl LintJson {
2024
fn key(&self) -> impl Ord + '_ {
21-
(self.file_name.as_str(), self.byte_pos, self.lint.as_str())
25+
(self.lint.as_str(), self.file_name.as_str(), self.byte_pos)
2226
}
2327

2428
fn info_text(&self, action: &str) -> String {
@@ -52,14 +56,117 @@ fn load_warnings(path: &Path) -> Vec<LintJson> {
5256
serde_json::from_slice(&file).unwrap_or_else(|e| panic!("failed to deserialize {}: {e}", path.display()))
5357
}
5458

55-
fn print_warnings(title: &str, warnings: &[LintJson]) {
59+
pub(crate) fn diff(old_path: &Path, new_path: &Path, truncate: bool) {
60+
let old_warnings = load_warnings(old_path);
61+
let new_warnings = load_warnings(new_path);
62+
63+
let mut lint_warnings = vec![];
64+
65+
for (name, changes) in &itertools::merge_join_by(old_warnings, new_warnings, |old, new| old.key().cmp(&new.key()))
66+
.chunk_by(|change| change.as_ref().into_left().lint.to_string())
67+
{
68+
let mut added = Vec::new();
69+
let mut removed = Vec::new();
70+
let mut changed = Vec::new();
71+
for change in changes {
72+
match change {
73+
EitherOrBoth::Both(old, new) => {
74+
if old.rendered != new.rendered {
75+
changed.push((old, new));
76+
}
77+
},
78+
EitherOrBoth::Left(old) => removed.push(old),
79+
EitherOrBoth::Right(new) => added.push(new),
80+
}
81+
}
82+
83+
if !added.is_empty() || !removed.is_empty() || !changed.is_empty() {
84+
lint_warnings.push(LintWarnings {
85+
name,
86+
added,
87+
removed,
88+
changed,
89+
});
90+
}
91+
}
92+
93+
print_summary_table(&lint_warnings);
94+
println!();
95+
96+
if lint_warnings.is_empty() {
97+
return;
98+
}
99+
100+
let truncate_after = if truncate {
101+
// Max 15 ensures that we at least have five messages per lint
102+
DEFAULT_LIMIT_PER_LINT
103+
.min(TRUNCATION_TOTAL_TARGET / lint_warnings.len())
104+
.max(15)
105+
} else {
106+
// No lint should ever each this number of lint emissions, so this is equivialent to
107+
// No truncation
108+
usize::MAX
109+
};
110+
111+
for lint in lint_warnings {
112+
print_lint_warnings(&lint, truncate_after);
113+
}
114+
}
115+
116+
#[derive(Debug)]
117+
struct LintWarnings {
118+
name: String,
119+
added: Vec<LintJson>,
120+
removed: Vec<LintJson>,
121+
changed: Vec<(LintJson, LintJson)>,
122+
}
123+
124+
fn print_lint_warnings(lint: &LintWarnings, truncate_after: usize) {
125+
let name = &lint.name;
126+
let html_id = to_html_id(name);
127+
128+
// The additional anchor is added for non GH viewers that don't prefix ID's
129+
println!(r#"## `{name}` <a id="user-content-{html_id}"></a>"#);
130+
println!();
131+
132+
print!(
133+
r##"{}, {}, {}"##,
134+
count_string(name, "added", lint.added.len()),
135+
count_string(name, "removed", lint.removed.len()),
136+
count_string(name, "changed", lint.changed.len()),
137+
);
138+
println!();
139+
140+
print_warnings("Added", &lint.added, truncate_after / 3);
141+
print_warnings("Removed", &lint.removed, truncate_after / 3);
142+
print_changed_diff(&lint.changed, truncate_after / 3);
143+
}
144+
145+
fn print_summary_table(lints: &[LintWarnings]) {
146+
println!("| Lint | Added | Removed | Changed |");
147+
println!("| ------------------------------------------ | ------: | ------: | ------: |");
148+
149+
for lint in lints {
150+
println!(
151+
"| {:<62} | {:>7} | {:>7} | {:>7} |",
152+
format!("[`{}`](#user-content-{})", lint.name, to_html_id(&lint.name)),
153+
lint.added.len(),
154+
lint.removed.len(),
155+
lint.changed.len()
156+
);
157+
}
158+
}
159+
160+
fn print_warnings(title: &str, warnings: &[LintJson], truncate_after: usize) {
56161
if warnings.is_empty() {
57162
return;
58163
}
59164

60-
//We have to use HTML here to be able to manually add an id.
61-
println!(r#"<h3 id="{title}">{title}</h3>"#);
165+
print_h3(&warnings[0].lint, title);
62166
println!();
167+
168+
let warnings = truncate(warnings, truncate_after);
169+
63170
for warning in warnings {
64171
println!("{}", warning.info_text(title));
65172
println!();
@@ -70,14 +177,16 @@ fn print_warnings(title: &str, warnings: &[LintJson]) {
70177
}
71178
}
72179

73-
fn print_changed_diff(changed: &[(LintJson, LintJson)]) {
180+
fn print_changed_diff(changed: &[(LintJson, LintJson)], truncate_after: usize) {
74181
if changed.is_empty() {
75182
return;
76183
}
77184

78-
//We have to use HTML here to be able to manually add an id.
79-
println!(r#"<h3 id="changed">Changed</h3>"#);
185+
print_h3(&changed[0].0.lint, "Changed");
80186
println!();
187+
188+
let changed = truncate(changed, truncate_after);
189+
81190
for (old, new) in changed {
82191
println!("{}", new.info_text("Changed"));
83192
println!();
@@ -101,51 +210,44 @@ fn print_changed_diff(changed: &[(LintJson, LintJson)]) {
101210
}
102211
}
103212

104-
pub(crate) fn diff(old_path: &Path, new_path: &Path) {
105-
let old_warnings = load_warnings(old_path);
106-
let new_warnings = load_warnings(new_path);
213+
fn truncate<T>(list: &[T], truncate_after: usize) -> &[T] {
214+
if list.len() > truncate_after {
215+
println!(
216+
"{} warnings have been truncated for this summary.",
217+
list.len() - truncate_after
218+
);
219+
println!();
107220

108-
let mut added = Vec::new();
109-
let mut removed = Vec::new();
110-
let mut changed = Vec::new();
111-
112-
for change in itertools::merge_join_by(old_warnings, new_warnings, |old, new| old.key().cmp(&new.key())) {
113-
match change {
114-
EitherOrBoth::Both(old, new) => {
115-
if old.rendered != new.rendered {
116-
changed.push((old, new));
117-
}
118-
},
119-
EitherOrBoth::Left(old) => removed.push(old),
120-
EitherOrBoth::Right(new) => added.push(new),
121-
}
221+
list.split_at(truncate_after).0
222+
} else {
223+
list
122224
}
225+
}
123226

124-
print!(
125-
r##"{}, {}, {}"##,
126-
count_string("added", added.len()),
127-
count_string("removed", removed.len()),
128-
count_string("changed", changed.len()),
129-
);
130-
println!();
131-
println!();
227+
fn print_h3(lint: &str, title: &str) {
228+
let html_id = to_html_id(lint);
229+
// We have to use HTML here to be able to manually add an id.
230+
println!(r#"### {title} <a id="user-content-{html_id}-{title}"></a>"#);
231+
}
132232

133-
print_warnings("Added", &added);
134-
print_warnings("Removed", &removed);
135-
print_changed_diff(&changed);
233+
/// GitHub's markdown parsers doesn't like IDs with `::` and `_`. This simplifies
234+
/// the lint name for the HTML ID.
235+
fn to_html_id(lint_name: &str) -> String {
236+
lint_name.replace("clippy::", "").replace('_', "-")
136237
}
137238

138239
/// This generates the `x added` string for the start of the job summery.
139240
/// It linkifies them if possible to jump to the respective heading.
140-
fn count_string(label: &str, count: usize) -> String {
241+
fn count_string(lint: &str, label: &str, count: usize) -> String {
141242
// Headlines are only added, if anything will be displayed under the headline.
142243
// We therefore only want to add links to them if they exist
143244
if count == 0 {
144245
format!("0 {label}")
145246
} else {
247+
let html_id = to_html_id(lint);
146248
// GitHub's job summaries don't add HTML ids to headings. That's why we
147249
// manually have to add them. GitHub prefixes these manual ids with
148250
// `user-content-` and that's how we end up with these awesome links :D
149-
format!("[{count} {label}](#user-content-{label})")
251+
format!("[{count} {label}](#user-content-{html_id}-{label})")
150252
}
151253
}

lintcheck/src/main.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ fn main() {
260260
let config = LintcheckConfig::new();
261261

262262
match config.subcommand {
263-
Some(Commands::Diff { old, new }) => json::diff(&old, &new),
263+
Some(Commands::Diff { old, new, truncate }) => json::diff(&old, &new, truncate),
264264
Some(Commands::Popular { output, number }) => popular_crates::fetch(output, number).unwrap(),
265265
None => lintcheck(config),
266266
}

0 commit comments

Comments
 (0)