|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +"""Parallelise checking of all properties in a GOTO binary using CBMC |
| 4 | +
|
| 5 | +This script invokes a chosen number of parallel CBMC processes to check all the |
| 6 | +properties contained in a given GOTO binary. Each of these processes will check |
| 7 | +just a single property, possibly permitting slicing away considerable parts of |
| 8 | +the program. The expected benefit is that the results for some of the |
| 9 | +properties are reported very quickly, and that checking scales with the number |
| 10 | +of available processing cores. On the downside, however, multiple invocations |
| 11 | +of CBMC come with overhead, such as repeated symbolic execution. |
| 12 | +
|
| 13 | +Before running this script, the following must be true: |
| 14 | +
|
| 15 | + 1. "cbmc" must be in your system PATH |
| 16 | + 2. A GOTO binary has been built from source (using goto-cc) |
| 17 | + 3. The GOTO binary has been instrumented with all the required checks |
| 18 | + (using goto-instrument) |
| 19 | +
|
| 20 | +A typical usage of this script will be: |
| 21 | +
|
| 22 | + scripts/parallel-properties.py -J 32 binary.gb --timeout 600 |
| 23 | +
|
| 24 | +See --help for the list of available command-line options. |
| 25 | +""" |
| 26 | + |
| 27 | +import argparse |
| 28 | +import colorama |
| 29 | +from colorama import Fore, Style |
| 30 | +import concurrent.futures |
| 31 | +import json |
| 32 | +import logging |
| 33 | +import multiprocessing |
| 34 | +import re |
| 35 | +import subprocess |
| 36 | +import sys |
| 37 | +import threading |
| 38 | +import time |
| 39 | + |
| 40 | + |
| 41 | +def get_logger(verbose): |
| 42 | + # workaround for https://github.com/joblib/joblib/issues/692 |
| 43 | + LOGGER_NAME = 'parallel-cbmc' |
| 44 | + logger = logging.getLogger(LOGGER_NAME) |
| 45 | + logging.basicConfig() |
| 46 | + if verbose: |
| 47 | + logger.setLevel('DEBUG') |
| 48 | + else: |
| 49 | + logger.setLevel('INFO') |
| 50 | + return logger |
| 51 | + |
| 52 | + |
| 53 | +def add_to_stats(stats, key, value): |
| 54 | + key_path = key.split('/') |
| 55 | + k = key_path[0] |
| 56 | + |
| 57 | + if len(key_path) > 1: |
| 58 | + if not stats.get(k): |
| 59 | + stats[k] = {} |
| 60 | + add_to_stats(stats[k], '/'.join(key_path[1:]), value) |
| 61 | + else: |
| 62 | + if not stats.get(k): |
| 63 | + stats[k] = 0 |
| 64 | + stats[k] += value |
| 65 | + |
| 66 | + |
| 67 | +def merge_stats_rec(src, prefix, dest): |
| 68 | + if isinstance(src, dict): |
| 69 | + for k in src: |
| 70 | + p = prefix + '/' + k |
| 71 | + merge_stats_rec(src[k], p, dest) |
| 72 | + else: |
| 73 | + add_to_stats(dest, prefix[1:], src) |
| 74 | + |
| 75 | + |
| 76 | +def merge_stats(src, dest): |
| 77 | + merge_stats_rec(src, '', dest) |
| 78 | + |
| 79 | + |
| 80 | +def print_stats(stats): |
| 81 | + for cat in sorted(stats): |
| 82 | + print(cat + ':') |
| 83 | + csv = dict(stats[cat]) |
| 84 | + columns = set() |
| 85 | + for s in csv: |
| 86 | + if isinstance(csv[s], dict): |
| 87 | + columns |= set(csv[s].keys()) |
| 88 | + if not columns: |
| 89 | + print(cat + ',count') |
| 90 | + for s in sorted(csv): |
| 91 | + print('{},{}'.format(s, csv[s])) |
| 92 | + else: |
| 93 | + print(cat + ',' + ','.join(sorted(columns))) |
| 94 | + for s in sorted(csv): |
| 95 | + values = [s] |
| 96 | + for c in sorted(columns): |
| 97 | + values.append(str(csv[s].get(c, 0))) |
| 98 | + print(','.join(values)) |
| 99 | + |
| 100 | + |
| 101 | +def run_cmd_checked(cmd, verbose): |
| 102 | + """ |
| 103 | + Execute `cmd` in a subprocess and check the return code. Raise an exception |
| 104 | + if the return code is not 0. |
| 105 | + """ |
| 106 | + proc = subprocess.Popen( |
| 107 | + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, |
| 108 | + universal_newlines=True) |
| 109 | + (out, err) = proc.communicate() |
| 110 | + logger = get_logger(verbose) |
| 111 | + if logger.getEffectiveLevel() <= logging.DEBUG: |
| 112 | + with lock: |
| 113 | + logger.debug("BEGIN " + str(cmd)) |
| 114 | + logger.debug(out) |
| 115 | + logger.debug(err) |
| 116 | + logger.debug("END " + str(cmd)) |
| 117 | + if proc.returncode != 0: |
| 118 | + e = subprocess.CalledProcessError(proc.return_code, cmd, output=out) |
| 119 | + e.stdout = out |
| 120 | + e.stderr = err |
| 121 | + raise e |
| 122 | + return None |
| 123 | + |
| 124 | + |
| 125 | +def run_cmd_with_timeout(cmd, timeout_sec, verbose): |
| 126 | + # http://www.ostricher.com/2015/01/python-subprocess-with-timeout/ |
| 127 | + """ |
| 128 | + Execute `cmd` in a subprocess and enforce timeout `timeout_sec` seconds. |
| 129 | + Return subprocess exit code on natural completion of the subprocess. |
| 130 | + Raise an exception if timeout expires before subprocess completes. |
| 131 | + """ |
| 132 | + t1 = time.time() |
| 133 | + proc = subprocess.Popen( |
| 134 | + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, |
| 135 | + universal_newlines=True) |
| 136 | + timer = threading.Timer(timeout_sec, proc.kill) |
| 137 | + out = None |
| 138 | + err = None |
| 139 | + try: |
| 140 | + timer.start() |
| 141 | + (out, err) = proc.communicate() |
| 142 | + finally: |
| 143 | + timer.cancel() |
| 144 | + t2 = time.time() |
| 145 | + cnf_vars = cnf_clauses = None |
| 146 | + m = re.search(r'(\d+) variables, (\d+) clauses', out) |
| 147 | + if m: |
| 148 | + cnf_vars = m[1] |
| 149 | + cnf_clauses = m[2] |
| 150 | + logger = get_logger(verbose) |
| 151 | + if logger.getEffectiveLevel() <= logging.DEBUG: |
| 152 | + with lock: |
| 153 | + logger.debug("BEGIN " + str(cmd)) |
| 154 | + logger.debug(out) |
| 155 | + logger.debug("END " + str(cmd)) |
| 156 | + return (proc.returncode, err, t2 - t1, cnf_vars, cnf_clauses) |
| 157 | + |
| 158 | + |
| 159 | +C_ERROR = 'ERROR' |
| 160 | +C_TIMEOUT = 'TIMEOUT' |
| 161 | +C_TRUE = 'SUCCESS' |
| 162 | +C_FALSE = 'FAILURE' |
| 163 | + |
| 164 | + |
| 165 | +def source_location_as_string(loc): |
| 166 | + result = '' |
| 167 | + if loc.get('file'): |
| 168 | + result = 'file ' + loc['file'] |
| 169 | + if loc.get('function'): |
| 170 | + result += ' function ' + loc['function'] |
| 171 | + if loc.get('line'): |
| 172 | + result += ' line ' + loc['line'] |
| 173 | + return result.lstrip() |
| 174 | + |
| 175 | + |
| 176 | +def output_result(result_entry, stats, status, verbose): |
| 177 | + prop = result_entry['property'] |
| 178 | + loc = "{}:{}".format("missing file", "missing line") |
| 179 | + |
| 180 | + result = { |
| 181 | + 'property': prop, |
| 182 | + 'sourceLocation': loc, |
| 183 | + 'status': status, |
| 184 | + 'time': result_entry['time'] |
| 185 | + } |
| 186 | + |
| 187 | + """ |
| 188 | + add_to_stats(stats, 'warning type/{}/total'.format(prop), 1) |
| 189 | + add_to_stats(stats, 'warning type/{}/{}'.format(prop, status.lower()), 1) |
| 190 | + add_to_stats(stats, 'total time'.format(analysis), result_entry['time']) |
| 191 | + """ |
| 192 | + |
| 193 | + logger = get_logger(verbose) |
| 194 | + logger.debug('p:{}, r:{}'.format(prop['name'], status)) |
| 195 | + if status == C_TRUE: |
| 196 | + c_status = Fore.GREEN + status + Style.RESET_ALL |
| 197 | + elif status == C_FALSE: |
| 198 | + c_status = Fore.RED + status + Style.RESET_ALL |
| 199 | + elif status == C_ERROR: |
| 200 | + c_status = Fore.YELLOW + status + Style.RESET_ALL |
| 201 | + else: |
| 202 | + c_status = Fore.BLUE + status + Style.RESET_ALL |
| 203 | + result_str = '[{}] {}: {}: {}'.format( |
| 204 | + prop['name'], source_location_as_string(prop['sourceLocation']), |
| 205 | + prop['description'], c_status) |
| 206 | + if result_entry['variables']: |
| 207 | + result_str += ' [{}/{}]'.format(result_entry['variables'], |
| 208 | + result_entry['clauses']) |
| 209 | + if status == C_ERROR: |
| 210 | + result_str += ' stderr: ' + result_entry['stderr'] |
| 211 | + with lock: |
| 212 | + print(result_str) |
| 213 | + return (result, result_str) |
| 214 | + |
| 215 | + |
| 216 | +def verify_one(goto_binary, unwind, unwindset, unwinding_assertions, depth, |
| 217 | + object_bits, prop, verbose, timeout, stats): |
| 218 | + # run CBMC with extended statistics |
| 219 | + cbmc_cmd = ['cbmc', '--verbosity', '8', goto_binary, |
| 220 | + '--property', prop['name'], '--reachability-slice', |
| 221 | + # '--full-slice', |
| 222 | + '--slice-formula'] |
| 223 | + if unwind: |
| 224 | + cbmc_cmd.extend(['--unwind', unwind]) |
| 225 | + if unwindset: |
| 226 | + cbmc_cmd.extend(['--unwindset', unwindset]) |
| 227 | + if unwinding_assertions: |
| 228 | + cbmc_cmd.extend(['--unwinding-assertions']) |
| 229 | + if depth: |
| 230 | + cbmc_cmd.extend(['--depth', str(depth)]) |
| 231 | + if object_bits: |
| 232 | + cbmc_cmd.extend(['--object-bits', str(object_bits)]) |
| 233 | + (cbmc_ret, stderr_output, t, cnf_vars, cnf_clauses) = run_cmd_with_timeout( |
| 234 | + cbmc_cmd, timeout, verbose) |
| 235 | + result_entry = {'time': t, 'property': prop, 'variables': cnf_vars, |
| 236 | + 'clauses': cnf_clauses, 'stderr': stderr_output} |
| 237 | + if cbmc_ret == 0: |
| 238 | + return output_result(result_entry, stats, C_TRUE, verbose) |
| 239 | + elif cbmc_ret == 10: |
| 240 | + return output_result(result_entry, stats, C_FALSE, verbose) |
| 241 | + elif cbmc_ret < 0: |
| 242 | + return output_result(result_entry, stats, C_TIMEOUT, verbose) |
| 243 | + else: |
| 244 | + return output_result(result_entry, stats, C_ERROR, verbose) |
| 245 | + |
| 246 | + |
| 247 | +def verify(goto_binary, unwind, unwindset, unwinding_assertions, depth, |
| 248 | + object_bits, verbose, timeout, n_jobs, stats): |
| 249 | + # find names of desired properties |
| 250 | + show_prop_cmd = ['goto-instrument', '--verbosity', '4', '--json-ui', |
| 251 | + '--show-properties', goto_binary] |
| 252 | + props = subprocess.check_output(show_prop_cmd, universal_newlines=True) |
| 253 | + json_props = json.loads(props) |
| 254 | + if not json_props[1].get('properties'): |
| 255 | + return {} |
| 256 | + |
| 257 | + results = {} |
| 258 | + global lock |
| 259 | + lock = multiprocessing.Lock() |
| 260 | + |
| 261 | + with concurrent.futures.ThreadPoolExecutor(max_workers=n_jobs) as e: |
| 262 | + future_to_result = {e.submit(verify_one, goto_binary, unwind, |
| 263 | + unwindset, unwinding_assertions, depth, |
| 264 | + object_bits, prop, verbose, timeout, |
| 265 | + stats): prop |
| 266 | + for prop in json_props[1]['properties']} |
| 267 | + for future in concurrent.futures.as_completed(future_to_result): |
| 268 | + prop = future_to_result[future] |
| 269 | + (result_json, result_str) = future.result() |
| 270 | + results[prop['name']] = result_json |
| 271 | + |
| 272 | + return results |
| 273 | + |
| 274 | + |
| 275 | +def main(): |
| 276 | + parser = argparse.ArgumentParser(description=__doc__) |
| 277 | + parser.add_argument( |
| 278 | + '-j', '--json', |
| 279 | + type=str, |
| 280 | + help='write results to chosen file with JSON formatting'), |
| 281 | + parser.add_argument( |
| 282 | + '-s', '--statistics', |
| 283 | + action='store_true', |
| 284 | + help='output overall statistics') |
| 285 | + parser.add_argument( |
| 286 | + '-t', '--timeout', |
| 287 | + type=int, default=10, |
| 288 | + help='timeout (seconds) per analysis-tool invocation ' + |
| 289 | + '(default: 10 seconds; use more time in case of timeouts)') |
| 290 | + parser.add_argument( |
| 291 | + '-J', '--parallel', |
| 292 | + type=int, default=11, |
| 293 | + help='run a specified number of a analyses in parallel') |
| 294 | + parser.add_argument( |
| 295 | + '-v', '--verbose', |
| 296 | + action='store_true', |
| 297 | + help='enable verbose output') |
| 298 | + parser.add_argument( |
| 299 | + '--unwind', |
| 300 | + type=str, |
| 301 | + help='loop unwinding, forwarded to CBMC'), |
| 302 | + parser.add_argument( |
| 303 | + '--unwindset', |
| 304 | + type=str, |
| 305 | + help='loop unwinding, forwarded to CBMC'), |
| 306 | + parser.add_argument( |
| 307 | + '--unwinding-assertions', |
| 308 | + action='store_true', |
| 309 | + help='enable unwinding assertions, forwarded to CBMC'), |
| 310 | + parser.add_argument( |
| 311 | + '--depth', |
| 312 | + type=int, |
| 313 | + help='symex depth, forwarded to CBMC'), |
| 314 | + parser.add_argument( |
| 315 | + '--object-bits', |
| 316 | + type=int, |
| 317 | + help='object bits, forwarded to CBMC'), |
| 318 | + parser.add_argument( |
| 319 | + 'goto_binary', |
| 320 | + help='instrumented goto binary to verify') |
| 321 | + args = parser.parse_args() |
| 322 | + |
| 323 | + logger = get_logger(args.verbose) |
| 324 | + |
| 325 | + stats = {} |
| 326 | + results = {} |
| 327 | + results['results'] = verify( |
| 328 | + args.goto_binary, args.unwind, args.unwindset, |
| 329 | + args.unwinding_assertions, |
| 330 | + args.depth, args.object_bits, args.verbose, |
| 331 | + args.timeout, args.parallel, stats) |
| 332 | + |
| 333 | + if args.statistics: |
| 334 | + results['statistics'] = stats |
| 335 | + print_stats(stats) |
| 336 | + |
| 337 | + if args.json is not None: |
| 338 | + with open(args.json, 'w') as output_file: |
| 339 | + json.dump(results, output_file) |
| 340 | + |
| 341 | + |
| 342 | +if __name__ == "__main__": |
| 343 | + colorama.init() |
| 344 | + rc = main() |
| 345 | + sys.exit(rc) |
0 commit comments