Skip to content

Commit d5d633d

Browse files
committed
Group diffs by tests, rather than job groups
1 parent 5a7f227 commit d5d633d

File tree

1 file changed

+86
-76
lines changed

1 file changed

+86
-76
lines changed

src/ci/citool/src/merge_report.rs

+86-76
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
use std::cmp::Reverse;
2-
use std::collections::HashMap;
1+
use std::collections::{HashMap, HashSet};
32
use std::path::PathBuf;
43

54
use anyhow::Context;
@@ -14,10 +13,10 @@ type JobName = String;
1413
/// Computes a post merge CI analysis report between the `parent` and `current` commits.
1514
pub fn post_merge_report(job_db: JobDatabase, parent: Sha, current: Sha) -> anyhow::Result<()> {
1615
let jobs = download_all_metrics(&job_db, &parent, &current)?;
17-
let diffs = aggregate_test_diffs(&jobs)?;
16+
let aggregated_test_diffs = aggregate_test_diffs(&jobs)?;
1817

1918
println!("Comparing {parent} (base) -> {current} (this PR)\n");
20-
report_test_changes(diffs);
19+
report_test_diffs(aggregated_test_diffs);
2120

2221
Ok(())
2322
}
@@ -95,81 +94,80 @@ fn get_metrics_url(job_name: &str, sha: &str) -> String {
9594
format!("https://ci-artifacts.rust-lang.org/rustc-builds{suffix}/{sha}/metrics-{job_name}.json")
9695
}
9796

97+
/// Represents a difference in the outcome of tests between a base and a current commit.
98+
/// Maps test diffs to jobs that contained them.
99+
#[derive(Debug)]
100+
struct AggregatedTestDiffs {
101+
diffs: HashMap<TestDiff, Vec<JobName>>,
102+
}
103+
98104
fn aggregate_test_diffs(
99105
jobs: &HashMap<JobName, JobMetrics>,
100-
) -> anyhow::Result<Vec<AggregatedTestDiffs>> {
101-
let mut job_diffs = vec![];
106+
) -> anyhow::Result<AggregatedTestDiffs> {
107+
let mut diffs: HashMap<TestDiff, Vec<JobName>> = HashMap::new();
102108

103109
// Aggregate test suites
104110
for (name, metrics) in jobs {
105111
if let Some(parent) = &metrics.parent {
106112
let tests_parent = aggregate_tests(parent);
107113
let tests_current = aggregate_tests(&metrics.current);
108-
let test_diffs = calculate_test_diffs(tests_parent, tests_current);
109-
if !test_diffs.is_empty() {
110-
job_diffs.push((name.clone(), test_diffs));
114+
for diff in calculate_test_diffs(tests_parent, tests_current) {
115+
diffs.entry(diff).or_default().push(name.to_string());
111116
}
112117
}
113118
}
114119

115-
// Aggregate jobs with the same diff, as often the same diff will appear in many jobs
116-
let job_diffs: HashMap<Vec<(Test, TestOutcomeDiff)>, Vec<String>> =
117-
job_diffs.into_iter().fold(HashMap::new(), |mut acc, (job, diffs)| {
118-
acc.entry(diffs).or_default().push(job);
119-
acc
120-
});
121-
122-
Ok(job_diffs
123-
.into_iter()
124-
.map(|(test_diffs, jobs)| AggregatedTestDiffs { jobs, test_diffs })
125-
.collect())
120+
Ok(AggregatedTestDiffs { diffs })
121+
}
122+
123+
#[derive(Eq, PartialEq, Hash, Debug)]
124+
enum TestOutcomeDiff {
125+
ChangeOutcome { before: TestOutcome, after: TestOutcome },
126+
Missing { before: TestOutcome },
127+
Added(TestOutcome),
128+
}
129+
130+
#[derive(Eq, PartialEq, Hash, Debug)]
131+
struct TestDiff {
132+
test: Test,
133+
diff: TestOutcomeDiff,
126134
}
127135

128-
fn calculate_test_diffs(
129-
reference: TestSuiteData,
130-
current: TestSuiteData,
131-
) -> Vec<(Test, TestOutcomeDiff)> {
132-
let mut diffs = vec![];
136+
fn calculate_test_diffs(parent: TestSuiteData, current: TestSuiteData) -> HashSet<TestDiff> {
137+
let mut diffs = HashSet::new();
133138
for (test, outcome) in &current.tests {
134-
match reference.tests.get(test) {
139+
match parent.tests.get(test) {
135140
Some(before) => {
136141
if before != outcome {
137-
diffs.push((
138-
test.clone(),
139-
TestOutcomeDiff::ChangeOutcome {
142+
diffs.insert(TestDiff {
143+
test: test.clone(),
144+
diff: TestOutcomeDiff::ChangeOutcome {
140145
before: before.clone(),
141146
after: outcome.clone(),
142147
},
143-
));
148+
});
144149
}
145150
}
146-
None => diffs.push((test.clone(), TestOutcomeDiff::Added(outcome.clone()))),
151+
None => {
152+
diffs.insert(TestDiff {
153+
test: test.clone(),
154+
diff: TestOutcomeDiff::Added(outcome.clone()),
155+
});
156+
}
147157
}
148158
}
149-
for (test, outcome) in &reference.tests {
159+
for (test, outcome) in &parent.tests {
150160
if !current.tests.contains_key(test) {
151-
diffs.push((test.clone(), TestOutcomeDiff::Missing { before: outcome.clone() }));
161+
diffs.insert(TestDiff {
162+
test: test.clone(),
163+
diff: TestOutcomeDiff::Missing { before: outcome.clone() },
164+
});
152165
}
153166
}
154167

155168
diffs
156169
}
157170

158-
/// Represents a difference in the outcome of tests between a base and a current commit.
159-
#[derive(Debug)]
160-
struct AggregatedTestDiffs {
161-
/// All jobs that had the exact same test diffs.
162-
jobs: Vec<String>,
163-
test_diffs: Vec<(Test, TestOutcomeDiff)>,
164-
}
165-
166-
#[derive(Eq, PartialEq, Hash, Debug)]
167-
enum TestOutcomeDiff {
168-
ChangeOutcome { before: TestOutcome, after: TestOutcome },
169-
Missing { before: TestOutcome },
170-
Added(TestOutcome),
171-
}
172-
173171
/// Aggregates test suite executions from all bootstrap invocations in a given CI job.
174172
#[derive(Default)]
175173
struct TestSuiteData {
@@ -200,16 +198,13 @@ fn normalize_test_name(name: &str) -> String {
200198
}
201199

202200
/// Prints test changes in Markdown format to stdout.
203-
fn report_test_changes(mut diffs: Vec<AggregatedTestDiffs>) {
201+
fn report_test_diffs(mut diff: AggregatedTestDiffs) {
204202
println!("## Test differences");
205-
if diffs.is_empty() {
203+
if diff.diffs.is_empty() {
206204
println!("No test diffs found");
207205
return;
208206
}
209207

210-
// Sort diffs in decreasing order by diff count
211-
diffs.sort_by_key(|entry| Reverse(entry.test_diffs.len()));
212-
213208
fn format_outcome(outcome: &TestOutcome) -> String {
214209
match outcome {
215210
TestOutcome::Passed => "pass".to_string(),
@@ -238,36 +233,51 @@ fn report_test_changes(mut diffs: Vec<AggregatedTestDiffs>) {
238233
}
239234
}
240235

241-
let max_diff_count = 10;
242-
let max_job_count = 5;
243-
let max_test_count = 10;
244-
245-
for diff in diffs.iter().take(max_diff_count) {
246-
let mut jobs = diff.jobs.clone();
236+
// It would be quite noisy to repeat the jobs that contained the test changes after/next to
237+
// every test diff. At the same time, grouping the test diffs by
238+
// [unique set of jobs that contained them] also doesn't work well, because the test diffs
239+
// would have to be duplicated several times.
240+
// Instead, we create a set of unique job groups, and then print a job group after each test.
241+
// We then print the job groups at the end, as a sort of index.
242+
let mut grouped_diffs: Vec<(&TestDiff, u64)> = vec![];
243+
let mut job_list_to_group: HashMap<&[JobName], u64> = HashMap::new();
244+
let mut job_index: Vec<&[JobName]> = vec![];
245+
246+
for (_, jobs) in diff.diffs.iter_mut() {
247247
jobs.sort();
248+
}
248249

249-
let jobs = jobs.iter().take(max_job_count).map(|j| format!("`{j}`")).collect::<Vec<_>>();
250-
251-
let extra_jobs = diff.jobs.len().saturating_sub(max_job_count);
252-
let suffix = if extra_jobs > 0 {
253-
format!(" (and {extra_jobs} {})", pluralize("other", extra_jobs))
254-
} else {
255-
String::new()
250+
let max_diff_count = 100;
251+
for (diff, jobs) in diff.diffs.iter().take(max_diff_count) {
252+
let jobs = &*jobs;
253+
let job_group = match job_list_to_group.get(jobs.as_slice()) {
254+
Some(id) => *id,
255+
None => {
256+
let id = job_index.len() as u64;
257+
job_index.push(jobs);
258+
job_list_to_group.insert(jobs, id);
259+
id
260+
}
256261
};
257-
println!("- {}{suffix}", jobs.join(","));
262+
grouped_diffs.push((diff, job_group));
263+
}
258264

259-
let extra_tests = diff.test_diffs.len().saturating_sub(max_test_count);
260-
for (test, outcome_diff) in diff.test_diffs.iter().take(max_test_count) {
261-
println!(" - {}: {}", test.name, format_diff(&outcome_diff));
262-
}
263-
if extra_tests > 0 {
264-
println!(" - (and {extra_tests} additional {})", pluralize("test", extra_tests));
265-
}
265+
// Sort diffs by job group and test name
266+
grouped_diffs.sort_by(|(d1, g1), (d2, g2)| g1.cmp(&g2).then(d1.test.name.cmp(&d2.test.name)));
267+
268+
for (diff, job_group) in grouped_diffs {
269+
println!("- `{}`: {} (*J{job_group}*)", diff.test.name, format_diff(&diff.diff));
266270
}
267271

268-
let extra_diffs = diffs.len().saturating_sub(max_diff_count);
272+
let extra_diffs = diff.diffs.len().saturating_sub(max_diff_count);
269273
if extra_diffs > 0 {
270-
println!("\n(and {extra_diffs} additional {})", pluralize("diff", extra_diffs));
274+
println!("\n(and {extra_diffs} additional {})", pluralize("test diff", extra_diffs));
275+
}
276+
277+
// Now print the job index
278+
println!("\n**Job index**\n");
279+
for (group, jobs) in job_index.into_iter().enumerate() {
280+
println!("- J{group}: `{}`", jobs.join(", "));
271281
}
272282
}
273283

0 commit comments

Comments
 (0)