Skip to content

Commit bba6f0a

Browse files
authored
Merge pull request rust-lang#4104 from RalfJung/bench
Provide a way to compare benchmark results with baseline
2 parents 58ad698 + 41f3edc commit bba6f0a

File tree

5 files changed

+111
-10
lines changed

5 files changed

+111
-10
lines changed

Diff for: src/tools/miri/CONTRIBUTING.md

+9
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,15 @@ Miri comes with a few benchmarks; you can run `./miri bench` to run them with th
212212
Miri. Note: this will run `./miri install` as a side-effect. Also requires `hyperfine` to be
213213
installed (`cargo install hyperfine`).
214214

215+
To compare the benchmark results with a baseline, do the following:
216+
- Before applying your changes, run `./miri bench --save-baseline=baseline.json`.
217+
- Then do your changes.
218+
- Then run `./miri bench --load-baseline=baseline.json`; the results will include
219+
a comparison with the baseline.
220+
221+
You can run only some of the benchmarks by listing them, e.g. `./miri bench mse`.
222+
The names refer to the folders in `bench-cargo-miri`.
223+
215224
## Configuring `rust-analyzer`
216225

217226
To configure `rust-analyzer` and the IDE for working on Miri, copy one of the provided

Diff for: src/tools/miri/miri-script/Cargo.lock

+2
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,8 @@ dependencies = [
250250
"itertools",
251251
"path_macro",
252252
"rustc_version",
253+
"serde",
254+
"serde_derive",
253255
"serde_json",
254256
"shell-words",
255257
"tempfile",

Diff for: src/tools/miri/miri-script/Cargo.toml

+2
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ xshell = "0.2.6"
2323
rustc_version = "0.4"
2424
dunce = "1.0.4"
2525
directories = "5"
26+
serde = "1"
2627
serde_json = "1"
28+
serde_derive = "1"
2729
tempfile = "3.13.0"
2830
clap = { version = "4.5.21", features = ["derive"] }

Diff for: src/tools/miri/miri-script/src/commands.rs

+89-9
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
1+
use std::collections::HashMap;
12
use std::ffi::{OsStr, OsString};
2-
use std::io::Write;
3+
use std::fs::File;
4+
use std::io::{BufReader, BufWriter, Write};
35
use std::ops::{Not, Range};
46
use std::path::PathBuf;
57
use std::time::Duration;
68
use std::{env, net, process};
79

810
use anyhow::{Context, Result, anyhow, bail};
911
use path_macro::path;
12+
use serde_derive::{Deserialize, Serialize};
13+
use tempfile::TempDir;
1014
use walkdir::WalkDir;
1115
use xshell::{Shell, cmd};
1216

@@ -179,8 +183,8 @@ impl Command {
179183
Command::Doc { flags } => Self::doc(flags),
180184
Command::Fmt { flags } => Self::fmt(flags),
181185
Command::Clippy { flags } => Self::clippy(flags),
182-
Command::Bench { target, no_install, benches } =>
183-
Self::bench(target, no_install, benches),
186+
Command::Bench { target, no_install, save_baseline, load_baseline, benches } =>
187+
Self::bench(target, no_install, save_baseline, load_baseline, benches),
184188
Command::Toolchain { flags } => Self::toolchain(flags),
185189
Command::RustcPull { commit } => Self::rustc_pull(commit.clone()),
186190
Command::RustcPush { github_user, branch } => Self::rustc_push(github_user, branch),
@@ -379,27 +383,44 @@ impl Command {
379383
Ok(())
380384
}
381385

382-
fn bench(target: Option<String>, no_install: bool, benches: Vec<String>) -> Result<()> {
386+
fn bench(
387+
target: Option<String>,
388+
no_install: bool,
389+
save_baseline: Option<String>,
390+
load_baseline: Option<String>,
391+
benches: Vec<String>,
392+
) -> Result<()> {
393+
if save_baseline.is_some() && load_baseline.is_some() {
394+
bail!("Only one of `--save-baseline` and `--load-baseline` can be set");
395+
}
396+
383397
// The hyperfine to use
384398
let hyperfine = env::var("HYPERFINE");
385399
let hyperfine = hyperfine.as_deref().unwrap_or("hyperfine -w 1 -m 5 --shell=none");
386400
let hyperfine = shell_words::split(hyperfine)?;
387401
let Some((program_name, args)) = hyperfine.split_first() else {
388402
bail!("expected HYPERFINE environment variable to be non-empty");
389403
};
404+
390405
if !no_install {
391406
// Make sure we have an up-to-date Miri installed and selected the right toolchain.
392407
Self::install(vec![])?;
393408
}
409+
let results_json_dir = if save_baseline.is_some() || load_baseline.is_some() {
410+
Some(TempDir::new()?)
411+
} else {
412+
None
413+
};
394414

415+
let miri_dir = miri_dir()?;
395416
let sh = Shell::new()?;
396-
sh.change_dir(miri_dir()?);
417+
sh.change_dir(&miri_dir);
397418
let benches_dir = "bench-cargo-miri";
398-
let benches: Vec<OsString> = if benches.is_empty() {
419+
let benches: Vec<String> = if benches.is_empty() {
399420
sh.read_dir(benches_dir)?
400421
.into_iter()
401422
.filter(|path| path.is_dir())
402-
.map(Into::into)
423+
.map(|path| path.into_os_string().into_string().unwrap())
403424
.collect()
404425
} else {
405426
benches.into_iter().map(Into::into).collect()
@@ -414,16 +435,75 @@ impl Command {
414435
let target_flag = &target_flag;
415436
let toolchain = active_toolchain()?;
416437
// Run the requested benchmarks
417-
for bench in benches {
438+
for bench in &benches {
418439
let current_bench = path!(benches_dir / bench / "Cargo.toml");
440+
let mut export_json = None;
441+
if let Some(baseline_temp_dir) = &results_json_dir {
442+
export_json = Some(format!(
443+
"--export-json={}",
444+
path!(baseline_temp_dir / format!("{bench}.bench.json")).display()
445+
));
446+
}
419447
// We don't attempt to escape `current_bench`, but we wrap it in quotes.
420448
// That seems to make Windows CI happy.
421449
cmd!(
422450
sh,
423-
"{program_name} {args...} 'cargo +'{toolchain}' miri run '{target_flag}' --manifest-path \"'{current_bench}'\"'"
451+
"{program_name} {args...} {export_json...} 'cargo +'{toolchain}' miri run '{target_flag}' --manifest-path \"'{current_bench}'\"'"
424452
)
425453
.run()?;
426454
}
455+
456+
// Gather/load results for baseline saving.
457+
458+
#[derive(Serialize, Deserialize)]
459+
struct BenchResult {
460+
mean: f64,
461+
stddev: f64,
462+
}
463+
464+
let gather_results = || -> Result<HashMap<&str, BenchResult>> {
465+
let baseline_temp_dir = results_json_dir.unwrap();
466+
let mut results = HashMap::new();
467+
for bench in &benches {
468+
let result = File::open(path!(baseline_temp_dir / format!("{bench}.bench.json")))?;
469+
let mut result: serde_json::Value =
470+
serde_json::from_reader(BufReader::new(result))?;
471+
let result: BenchResult = serde_json::from_value(result["results"][0].take())?;
472+
results.insert(bench as &str, result);
473+
}
474+
Ok(results)
475+
};
476+
477+
if let Some(baseline_file) = save_baseline {
478+
let results = gather_results()?;
479+
let baseline = File::create(baseline_file)?;
480+
serde_json::to_writer_pretty(BufWriter::new(baseline), &results)?;
481+
} else if let Some(baseline_file) = load_baseline {
482+
let new_results = gather_results()?;
483+
let baseline_results: HashMap<String, BenchResult> = {
484+
let f = File::open(baseline_file)?;
485+
serde_json::from_reader(BufReader::new(f))?
486+
};
487+
println!(
488+
"Comparison with baseline (relative speed, lower is better for the new results):"
489+
);
490+
for (bench, new_result) in new_results.iter() {
491+
let Some(baseline_result) = baseline_results.get(*bench) else { continue };
492+
493+
// Compare results (inspired by hyperfine)
494+
let ratio = new_result.mean / baseline_result.mean;
495+
// https://en.wikipedia.org/wiki/Propagation_of_uncertainty#Example_formulae
496+
// Covariance asssumed to be 0, i.e. variables are assumed to be independent
497+
let ratio_stddev = ratio
498+
* f64::sqrt(
499+
(new_result.stddev / new_result.mean).powi(2)
500+
+ (baseline_result.stddev / baseline_result.mean).powi(2),
501+
);
502+
503+
println!(" {bench}: {ratio:.2} ± {ratio_stddev:.2}");
504+
}
505+
}
506+
427507
Ok(())
428508
}
429509

Diff for: src/tools/miri/miri-script/src/main.rs

+9-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#![allow(clippy::needless_question_mark)]
1+
#![allow(clippy::needless_question_mark, rustc::internal)]
22

33
mod commands;
44
mod coverage;
@@ -117,6 +117,14 @@ pub enum Command {
117117
/// When `true`, skip the `./miri install` step.
118118
#[arg(long)]
119119
no_install: bool,
120+
/// Store the benchmark result in the given file, so it can be used
121+
/// as the baseline for a future run.
122+
#[arg(long)]
123+
save_baseline: Option<String>,
124+
/// Load previous stored benchmark results as baseline, and print an analysis of how the
125+
/// current run compares.
126+
#[arg(long)]
127+
load_baseline: Option<String>,
120128
/// List of benchmarks to run (default: run all benchmarks).
121129
benches: Vec<String>,
122130
},

0 commit comments

Comments
 (0)