Add performance regression comparison in CI (rust-lang#2370)

karkhaz · web-flow · commit 00c19e2b0295 · 2023-04-17T23:43:14.000+01:00
This commit adds a CI job that runs the benchcomp tool on the performance regression suite, comparing the HEAD of the pull request to the branch that the PR targets. The CI job fails if any performance benchmark regresses when run using the HEAD version of Kani with respect to the 'base' branch. Regression is defined as a regression in symex or solver time of 10% for any benchmark for which this value is >2s, or if any performance benchmark fails with the HEAD version while passing with the base. This fixes rust-lang#2277.
diff --git a/.github/workflows/kani.yml b/.github/workflows/kani.yml
@@ -226,3 +226,61 @@ jobs:
           if-no-files-found: error
           # Aggressively short retention: we don't really need these
           retention-days: 3
+
+  perf-benchcomp:
+    runs-on: ubuntu-20.04
+    steps:
+      - name: Save push event HEAD and HEAD~ to environment variables
+        if: ${{ github.event_name == 'push' }}
+        run: |
+          echo "NEW_REF=${{ github.event.after}}" | tee -a "$GITHUB_ENV"
+          # We want to compare with $NEW_REF~. But we can't know what
+          # that ref actually is until we clone the repository, so for
+          # now make it equal to $NEW_REF
+          echo "OLD_REF=${{ github.event.after }}" | tee -a "$GITHUB_ENV"
+
+      - name: Save pull request HEAD and target to environment variables
+        if: ${{ github.event_name == 'pull_request' }}
+        run: |
+          echo "OLD_REF=${{ github.event.pull_request.base.sha }}" | tee -a "$GITHUB_ENV"
+          echo "NEW_REF=${{ github.event.pull_request.head.sha }}" | tee -a "$GITHUB_ENV"
+
+      - name: Check out Kani (old variant)
+        uses: actions/checkout@v3
+        with:
+          path: ./old
+          ref: ${{ env.OLD_REF }}
+          fetch-depth: 2
+
+      - name: Check out HEAD~ of push event as 'old' variant
+        if: ${{ github.event_name == 'push' }}
+        run: pushd old && git checkout "${NEW_REF}^"
+
+      - name: Check out Kani (new variant)
+        uses: actions/checkout@v3
+        with:
+          path: ./new
+          ref: ${{ env.NEW_REF }}
+
+      - name: Set up Kani Dependencies (old variant)
+        uses: ./old/.github/actions/setup
+        with:
+          os: ubuntu-20.04
+          kani_dir: old
+
+      - name: Set up Kani Dependencies (new variant)
+        uses: ./new/.github/actions/setup
+        with:
+          os: ubuntu-20.04
+          kani_dir: new
+
+      - name: Build Kani (new variant)
+        run: pushd new && cargo build-dev
+
+      - name: Build Kani (old variant)
+        run: pushd old && cargo build-dev
+
+      - name: Run benchcomp
+        run: |
+          new/tools/benchcomp/bin/benchcomp \
+            --config new/tools/benchcomp/configs/perf-regression.yaml
diff --git a/tools/benchcomp/benchcomp/entry/run.py b/tools/benchcomp/benchcomp/entry/run.py
@@ -57,7 +57,8 @@ def __call__(self):
 
         if self.copy_benchmarks_dir:
             shutil.copytree(
-                self.directory, self.working_copy, ignore_dangling_symlinks=True)
+                self.directory, self.working_copy,
+                ignore_dangling_symlinks=True, symlinks=True)
 
         try:
             subprocess.run(
diff --git a/tools/benchcomp/benchcomp/visualizers/utils.py b/tools/benchcomp/benchcomp/visualizers/utils.py
@@ -3,6 +3,7 @@
 
 
 import dataclasses
+import logging
 import typing
 
 import benchcomp.visualizers
@@ -82,7 +83,7 @@ def __call__(self, results):
                 new = bench["variants"][new_variant]["metrics"][self.metric]
 
                 if has_regressed(old, new):
-                    logging.warining(
+                    logging.warning(
                         "Benchmark '%s' regressed on metric '%s' (%s -> %s)",
                         bench_name, self.metric, old, new)
                     ret = True
diff --git a/tools/benchcomp/configs/README.md b/tools/benchcomp/configs/README.md
@@ -0,0 +1,4 @@
+Example Benchcomp Configurations
+================================
+
+The files in this directory can be passed to benchcomp's -c/--config flag.
diff --git a/tools/benchcomp/configs/perf-regression.yaml b/tools/benchcomp/configs/perf-regression.yaml
@@ -0,0 +1,41 @@
+# Copyright Kani Contributors
+# SPDX-License-Identifier: Apache-2.0 OR MIT
+#
+# Run the Kani perf suite twice, erroring out on regression. This config
+# file is primarily intended to be used in CI, because it assumes that
+# there are two Kani checkouts in the 'old' and 'new' directories;
+# benchcomp compares the performance of these two checkouts.
+
+variants:
+  kani_new:
+    config:
+      directory: new
+      command_line: PATH=$(realpath new/scripts):$PATH scripts/kani-perf.sh
+      env:
+        RUST_TEST_THREADS: "1"
+  kani_old:
+    config:
+      directory: old
+      command_line: PATH=$(realpath old/scripts):$PATH scripts/kani-perf.sh
+      env:
+        RUST_TEST_THREADS: "1"
+
+run:
+  suites:
+    kani_perf:
+      parser:
+        module: kani_perf
+      variants: [kani_old, kani_new]
+
+visualize:
+  - type: error_on_regression
+    variant_pairs: [[kani_old, kani_new]]
+    checks:
+      - metric: success
+        # Compare the old and new variants of each benchmark. The
+        # benchmark has regressed if the lambda returns true.
+        test: "lambda old, new: False if not old else not new"
+      - metric: solver_runtime
+        test: "lambda old, new: False if new < 2 else new/old > 1.1"
+      - metric: symex_runtime
+        test: "lambda old, new: False if new < 2 else new/old > 1.1"