Merge pull request rust-lang#4104 from RalfJung/bench

RalfJung · web-flow · commit bba6f0a6b2cb · 2024-12-22T20:13:50.000Z
Provide a way to compare benchmark results with baseline
diff --git a/src/tools/miri/CONTRIBUTING.md b/src/tools/miri/CONTRIBUTING.md
@@ -212,6 +212,15 @@ Miri comes with a few benchmarks; you can run `./miri bench` to run them with th
 Miri. Note: this will run `./miri install` as a side-effect. Also requires `hyperfine` to be
 installed (`cargo install hyperfine`).
 
+To compare the benchmark results with a baseline, do the following:
+- Before applying your changes, run `./miri bench --save-baseline=baseline.json`.
+- Then do your changes.
+- Then run `./miri bench --load-baseline=baseline.json`; the results will include
+  a comparison with the baseline.
+
+You can run only some of the benchmarks by listing them, e.g. `./miri bench mse`.
+The names refer to the folders in `bench-cargo-miri`.
+
 ## Configuring `rust-analyzer`
 
 To configure `rust-analyzer` and the IDE for working on Miri, copy one of the provided
diff --git a/src/tools/miri/miri-script/Cargo.lock b/src/tools/miri/miri-script/Cargo.lock
@@ -250,6 +250,8 @@ dependencies = [
  "itertools",
  "path_macro",
  "rustc_version",
+ "serde",
+ "serde_derive",
  "serde_json",
  "shell-words",
  "tempfile",
diff --git a/src/tools/miri/miri-script/Cargo.toml b/src/tools/miri/miri-script/Cargo.toml
@@ -23,6 +23,8 @@ xshell = "0.2.6"
 rustc_version = "0.4"
 dunce = "1.0.4"
 directories = "5"
+serde = "1"
 serde_json = "1"
+serde_derive = "1"
 tempfile = "3.13.0"
 clap = { version = "4.5.21", features = ["derive"] }
diff --git a/src/tools/miri/miri-script/src/commands.rs b/src/tools/miri/miri-script/src/commands.rs
@@ -1,12 +1,16 @@
+use std::collections::HashMap;
 use std::ffi::{OsStr, OsString};
-use std::io::Write;
+use std::fs::File;
+use std::io::{BufReader, BufWriter, Write};
 use std::ops::{Not, Range};
 use std::path::PathBuf;
 use std::time::Duration;
 use std::{env, net, process};
 
 use anyhow::{Context, Result, anyhow, bail};
 use path_macro::path;
+use serde_derive::{Deserialize, Serialize};
+use tempfile::TempDir;
 use walkdir::WalkDir;
 use xshell::{Shell, cmd};
 
@@ -179,8 +183,8 @@ impl Command {
             Command::Doc { flags } => Self::doc(flags),
             Command::Fmt { flags } => Self::fmt(flags),
             Command::Clippy { flags } => Self::clippy(flags),
-            Command::Bench { target, no_install, benches } =>
-                Self::bench(target, no_install, benches),
+            Command::Bench { target, no_install, save_baseline, load_baseline, benches } =>
+                Self::bench(target, no_install, save_baseline, load_baseline, benches),
             Command::Toolchain { flags } => Self::toolchain(flags),
             Command::RustcPull { commit } => Self::rustc_pull(commit.clone()),
             Command::RustcPush { github_user, branch } => Self::rustc_push(github_user, branch),
@@ -379,27 +383,44 @@ impl Command {
         Ok(())
     }
 
-    fn bench(target: Option<String>, no_install: bool, benches: Vec<String>) -> Result<()> {
+    fn bench(
+        target: Option<String>,
+        no_install: bool,
+        save_baseline: Option<String>,
+        load_baseline: Option<String>,
+        benches: Vec<String>,
+    ) -> Result<()> {
+        if save_baseline.is_some() && load_baseline.is_some() {
+            bail!("Only one of `--save-baseline` and `--load-baseline` can be set");
+        }
+
         // The hyperfine to use
         let hyperfine = env::var("HYPERFINE");
         let hyperfine = hyperfine.as_deref().unwrap_or("hyperfine -w 1 -m 5 --shell=none");
         let hyperfine = shell_words::split(hyperfine)?;
         let Some((program_name, args)) = hyperfine.split_first() else {
             bail!("expected HYPERFINE environment variable to be non-empty");
         };
+
         if !no_install {
             // Make sure we have an up-to-date Miri installed and selected the right toolchain.
             Self::install(vec![])?;
         }
+        let results_json_dir = if save_baseline.is_some() || load_baseline.is_some() {
+            Some(TempDir::new()?)
+        } else {
+            None
+        };
 
+        let miri_dir = miri_dir()?;
         let sh = Shell::new()?;
-        sh.change_dir(miri_dir()?);
+        sh.change_dir(&miri_dir);
         let benches_dir = "bench-cargo-miri";
-        let benches: Vec<OsString> = if benches.is_empty() {
+        let benches: Vec<String> = if benches.is_empty() {
             sh.read_dir(benches_dir)?
                 .into_iter()
                 .filter(|path| path.is_dir())
-                .map(Into::into)
+                .map(|path| path.into_os_string().into_string().unwrap())
                 .collect()
         } else {
             benches.into_iter().map(Into::into).collect()
@@ -414,16 +435,75 @@ impl Command {
         let target_flag = &target_flag;
         let toolchain = active_toolchain()?;
         // Run the requested benchmarks
-        for bench in benches {
+        for bench in &benches {
             let current_bench = path!(benches_dir / bench / "Cargo.toml");
+            let mut export_json = None;
+            if let Some(baseline_temp_dir) = &results_json_dir {
+                export_json = Some(format!(
+                    "--export-json={}",
+                    path!(baseline_temp_dir / format!("{bench}.bench.json")).display()
+                ));
+            }
             // We don't attempt to escape `current_bench`, but we wrap it in quotes.
             // That seems to make Windows CI happy.
             cmd!(
                 sh,
-                "{program_name} {args...} 'cargo +'{toolchain}' miri run '{target_flag}' --manifest-path \"'{current_bench}'\"'"
+                "{program_name} {args...} {export_json...} 'cargo +'{toolchain}' miri run '{target_flag}' --manifest-path \"'{current_bench}'\"'"
             )
             .run()?;
         }
+
+        // Gather/load results for baseline saving.
+
+        #[derive(Serialize, Deserialize)]
+        struct BenchResult {
+            mean: f64,
+            stddev: f64,
+        }
+
+        let gather_results = || -> Result<HashMap<&str, BenchResult>> {
+            let baseline_temp_dir = results_json_dir.unwrap();
+            let mut results = HashMap::new();
+            for bench in &benches {
+                let result = File::open(path!(baseline_temp_dir / format!("{bench}.bench.json")))?;
+                let mut result: serde_json::Value =
+                    serde_json::from_reader(BufReader::new(result))?;
+                let result: BenchResult = serde_json::from_value(result["results"][0].take())?;
+                results.insert(bench as &str, result);
+            }
+            Ok(results)
+        };
+
+        if let Some(baseline_file) = save_baseline {
+            let results = gather_results()?;
+            let baseline = File::create(baseline_file)?;
+            serde_json::to_writer_pretty(BufWriter::new(baseline), &results)?;
+        } else if let Some(baseline_file) = load_baseline {
+            let new_results = gather_results()?;
+            let baseline_results: HashMap<String, BenchResult> = {
+                let f = File::open(baseline_file)?;
+                serde_json::from_reader(BufReader::new(f))?
+            };
+            println!(
+                "Comparison with baseline (relative speed, lower is better for the new results):"
+            );
+            for (bench, new_result) in new_results.iter() {
+                let Some(baseline_result) = baseline_results.get(*bench) else { continue };
+
+                // Compare results (inspired by hyperfine)
+                let ratio = new_result.mean / baseline_result.mean;
+                // https://en.wikipedia.org/wiki/Propagation_of_uncertainty#Example_formulae
+                // Covariance asssumed to be 0, i.e. variables are assumed to be independent
+                let ratio_stddev = ratio
+                    * f64::sqrt(
+                        (new_result.stddev / new_result.mean).powi(2)
+                            + (baseline_result.stddev / baseline_result.mean).powi(2),
+                    );
+
+                println!("  {bench}: {ratio:.2} ± {ratio_stddev:.2}");
+            }
+        }
+
         Ok(())
     }
 
diff --git a/src/tools/miri/miri-script/src/main.rs b/src/tools/miri/miri-script/src/main.rs
@@ -1,4 +1,4 @@
-#![allow(clippy::needless_question_mark)]
+#![allow(clippy::needless_question_mark, rustc::internal)]
 
 mod commands;
 mod coverage;
@@ -117,6 +117,14 @@ pub enum Command {
         /// When `true`, skip the `./miri install` step.
         #[arg(long)]
         no_install: bool,
+        /// Store the benchmark result in the given file, so it can be used
+        /// as the baseline for a future run.
+        #[arg(long)]
+        save_baseline: Option<String>,
+        /// Load previous stored benchmark results as baseline, and print an analysis of how the
+        /// current run compares.
+        #[arg(long)]
+        load_baseline: Option<String>,
         /// List of benchmarks to run (default: run all benchmarks).
         benches: Vec<String>,
     },