Allow executable parsers for benchcomp (rust-lang#2521)

karkhaz · web-flow · commit d8a75729e246 · 2023-06-14T13:28:42.000Z
Users can now specify a command that will be run to parse the result of
a single suite x variant run, as an alternative to specifying a python
module that is checked into the Kani codebase. This allows for parsers
to be maintained outside the Kani codebase.
diff --git a/docs/src/benchcomp-parse.md b/docs/src/benchcomp-parse.md
@@ -0,0 +1,61 @@
+# Custom parsers
+
+Benchcomp ships with built-in *parsers* that retrieve the results of a benchmark suite after the run has completed.
+You can also create your own parser, either to run locally or to check into the Kani codebase.
+
+## Built-in parsers
+
+You specify which parser should run for each benchmark suite in `benchcomp.yaml`.
+For example, if you're running the kani performance suite, you would use the built-in `kani_perf` parser to parse the results:
+
+```yaml
+suites:
+    my_benchmark_suite:
+      variants: [variant_1, variant_2]
+      parser:
+        module: kani_perf
+```
+
+## Custom parsers
+
+A parser is a program that benchcomp runs inside the root directory of a benchmark suite, after the suite run has completed.
+The parser should retrieve the results of the run (by parsing output files etc.) and print the results out as a YAML document.
+You can use your executable parser by specifying the `command` key rather than the `module` key in your `benchconf.yaml` file:
+
+```yaml
+suites:
+    my_benchmark_suite:
+      variants: [variant_1, variant_2]
+      parser:
+        command: ./my-cool-parser.sh
+```
+
+The `kani_perf` parser mentioned above, in `tools/benchcomp/benchcomp/parsers/kani_perf.py`, is a good starting point for writing a custom parser, as it also works as a standalone executable.
+Here is an example output from an executable parser:
+
+```yaml
+metrics:
+    runtime: {}
+    success: {}
+    errors: {}
+benchmarks:
+    bench_1:
+        metrics:
+            runtime: 32
+            success: true
+            errors: []
+    bench_2:
+        metrics:
+            runtime: 0
+            success: false
+            errors: ["compilation failed"]
+```
+
+The above format is different from the final `result.yaml` file that benchcomp writes, because the above file represents the output of running a single benchmark suite using a single variant.
+Your parser will run once for each variant, and benchcomp combines the dictionaries into the final `result.yaml` file.
+
+
+## Contributing custom parsers to Kani
+
+To turn your executable parser into one that benchcomp can invoke as a module, ensure that it has a `main(working_directory)` method that returns a dict (the same dict that it would print out as a YAML file to stdout).
+Save the file in `tools/benchcomp/benchcomp/parsers` using python module naming conventions (filename should be an identifier and end in `.py`).
diff --git a/tools/benchcomp/benchcomp/entry/run.py b/tools/benchcomp/benchcomp/entry/run.py
@@ -10,17 +10,18 @@
 
 
 import dataclasses
-import importlib
 import logging
 import os
 import pathlib
 import shutil
 import subprocess
+import typing
 import uuid
 
 import yaml
 
 import benchcomp
+import benchcomp.parsers
 
 
 @dataclasses.dataclass
@@ -30,7 +31,7 @@ class _SingleInvocation:
     suite_id: str
     variant_id: str
 
-    parser: str
+    parse: typing.Any
 
     suite_yaml_out_dir: pathlib.Path
     copy_benchmarks_dir: bool
@@ -73,9 +74,7 @@ def __call__(self):
                 "Invocation of suite %s with variant %s failed", self.suite_id,
                 self.variant_id)
 
-        parser_mod_name = f"benchcomp.parsers.{self.parser}"
-        parser = importlib.import_module(parser_mod_name)
-        suite = parser.main(self.working_copy)
+        suite = self.parse(self.working_copy)
 
         suite["suite_id"] = self.suite_id
         suite["variant_id"] = self.variant_id
@@ -103,13 +102,13 @@ def __call__(self):
         out_path.mkdir(parents=True)
 
         for suite_id, suite in self.config["run"]["suites"].items():
+            parse = benchcomp.parsers.get_parser(suite["parser"])
             for variant_id in suite["variants"]:
                 variant = self.config["variants"][variant_id]
                 config = dict(variant).pop("config")
                 invoke = _SingleInvocation(
                     suite_id, variant_id,
-                    suite["parser"]["module"],
-                    suite_yaml_out_dir=out_path,
+                    parse, suite_yaml_out_dir=out_path,
                     copy_benchmarks_dir=self.copy_benchmarks_dir,
                     **config)
                 invoke()
diff --git a/tools/benchcomp/benchcomp/parsers/__init__.py b/tools/benchcomp/benchcomp/parsers/__init__.py
@@ -1,2 +1,98 @@
 # Copyright Kani Contributors
 # SPDX-License-Identifier: Apache-2.0 OR MIT
+#
+# Each *Parser class here specifies a different way that a parser can be
+# invoked: as an executable (for parsers that users write on their local
+# machine) or python module (that is checked into the Kani codebase).
+
+# Each class has a __call__ method that takes a directory. The directory should
+# be a benchmark suite that has completed a run. The __call__ method parses and
+# returns the result of the run (by parsing output files in the directory etc).
+
+
+import dataclasses
+import subprocess
+import logging
+import importlib
+import sys
+
+import yaml
+
+
+def get_parser(parser_config):
+    if "module" in parser_config:
+        return _ModuleParser(parser_config["module"])
+    if "command" in parser_config:
+        return _CommandParser(parser_config["command"])
+
+    logging.error(
+        "Parser dict should contain either a"
+        "'module' or 'command' key: '%s'", str(parser_config))
+    sys.exit(1)
+
+
+
+class _ModuleParser:
+    """A parser implemented as a module under benchcomp.parsers"""
+
+    def __init__(self, mod):
+        self.parser_mod_name = f"benchcomp.parsers.{mod}"
+        try:
+            self.parser = importlib.import_module(self.parser_mod_name)
+        except BaseException as exe:
+            logging.error(
+                "Failed to load parser module %s: %s",
+                self.parser_mod_name, str(exe))
+            sys.exit(1)
+
+
+    def __call__(self, root_directory):
+        try:
+            return self.parser.main(root_directory)
+        except BaseException as exe:
+            logging.error(
+                "Parser '%s' in directory %s failed: %s",
+                self.parser_mod_name, str(root_directory), str(exe))
+            return get_empty_parser_result()
+
+
+
+@dataclasses.dataclass
+class _CommandParser:
+    """A parser that is a command that prints the parse result to stdout"""
+
+    shell_cmd: str
+
+
+    def __call__(self, root_directory):
+        try:
+            with subprocess.Popen(
+                    self.shell_cmd, shell=True, text=True,
+                    stdout=subprocess.PIPE, cwd=root_directory) as proc:
+                out, _ = proc.communicate(timeout=120)
+        except subprocess.CalledProcessError as exc:
+            logging.warning(
+                "Invocation of parser '%s' in directory %s exited with code %d",
+                self.shell_cmd, str(root_directory), exc.returncode)
+            return get_empty_parser_result()
+        except (OSError, subprocess.SubprocessError) as exe:
+            logging.error(
+                "Invocation of parser '%s' in directory %s failed: %s",
+                self.shell_cmd, str(root_directory), str(exe))
+            return get_empty_parser_result()
+
+        try:
+            return yaml.safe_load(out)
+        except yaml.YAMLError:
+            logging.error(
+                "Parser '%s' in directory %s printed invalid YAML:<%s>",
+                self.shell_cmd, str(root_directory), out)
+            return get_empty_parser_result()
+
+
+
+def get_empty_parser_result():
+    return {
+        "benchmarks": {},
+        "metrics": {},
+    }
diff --git a/tools/benchcomp/benchcomp/parsers/kani_perf.py b/tools/benchcomp/benchcomp/parsers/kani_perf.py
@@ -2,9 +2,14 @@
 # SPDX-License-Identifier: Apache-2.0 OR MIT
 
 
+import os
 import pathlib
-import textwrap
 import re
+import textwrap
+
+import yaml
+
+import benchcomp.parsers
 
 
 def get_description():
@@ -107,3 +112,13 @@ def main(root_dir):
         "metrics": get_metrics(),
         "benchmarks": benchmarks,
     }
+
+
+if __name__ == "__main__":
+    try:
+        result = main(os.getcwd())
+        print(yaml.dump(result, default_flow_style=False))
+    except BaseException:
+        print(yaml.dump(
+            benchcomp.parsers.get_empty_parser_result(),
+            default_flow_style=False))
diff --git a/tools/benchcomp/test/test_regression.py b/tools/benchcomp/test/test_regression.py
@@ -690,3 +690,50 @@ def test_env(self):
                 result["benchmarks"]["suite_1"]["variants"][
                     "env_unset"]["metrics"]["foos"], 0,
                 msg=yaml.dump(result, default_flow_style=False))
+
+
+    def test_command_parser(self):
+        """Ensure that CommandParser can execute and read the output of a parser"""
+
+        with tempfile.TemporaryDirectory() as tmp:
+            run_bc = Benchcomp({
+                "variants": {
+                    "v1": {
+                        "config": {
+                            "command_line": "true",
+                            "directory": tmp,
+                        }
+                    },
+                    "v2": {
+                        "config": {
+                            "command_line": "true",
+                            "directory": tmp,
+                        }
+                    }
+                },
+                "run": {
+                    "suites": {
+                        "suite_1": {
+                            "parser": {
+                                "command": """
+                                    echo '{
+                                        "benchmarks": {},
+                                        "metrics": {}
+                                    }'
+                                """
+                            },
+                            "variants": ["v2", "v1"]
+                        }
+                    }
+                },
+                "visualize": [],
+            })
+            run_bc()
+            self.assertEqual(
+                run_bc.proc.returncode, 0, msg=run_bc.stderr)
+
+            with open(run_bc.working_directory / "result.yaml") as handle:
+                result = yaml.safe_load(handle)
+
+            for item in ["benchmarks", "metrics"]:
+                self.assertIn(item, result)