diff --git a/.github/workflows/csmith.yaml b/.github/workflows/csmith.yaml new file mode 100644 index 00000000000..2aedd9582eb --- /dev/null +++ b/.github/workflows/csmith.yaml @@ -0,0 +1,45 @@ +name: Run CSmith + +on: + pull_request: + branches: [ develop ] + +jobs: + CompileXen: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + with: + submodules: recursive + - name: Fetch dependencies + env: + # This is needed in addition to -yq to prevent apt-get from asking for + # user input + DEBIAN_FRONTEND: noninteractive + run: | + sudo apt-get install --no-install-recommends -y build-essential flex bison maven ccache + sudo apt-get install --no-install-recommends -y csmith libcsmith-dev + make -C src minisat2-download + - name: Prepare ccache + uses: actions/cache@v2 + with: + path: .ccache + key: ${{ runner.os }}-20.04-make-${{ github.ref }}-${{ github.sha }}-CSMITH + restore-keys: | + ${{ runner.os }}-20.04-make-${{ github.ref }} + ${{ runner.os }}-20.04-make + - name: ccache environment + run: | + echo "CCACHE_BASEDIR=$PWD" >> $GITHUB_ENV + echo "CCACHE_DIR=$PWD/.ccache" >> $GITHUB_ENV + - name: Zero ccache stats and limit in size + run: ccache -z --max-size=500M + - name: Build with make + run: | + make -C src CXX='ccache /usr/bin/g++' cbmc.dir goto-cc.dir goto-instrument.dir -j2 + - name: Print ccache stats + run: ccache -s + - name: Run 10 randomly-generated CSmith tests + run: | + export PATH=$PWD/src/cbmc:$PWD/src/goto-cc:$PWD/src/goto-instrument:$PATH + scripts/csmith.sh 10 diff --git a/scripts/csmith.sh b/scripts/csmith.sh new file mode 100755 index 00000000000..1ee73f0a5b2 --- /dev/null +++ b/scripts/csmith.sh @@ -0,0 +1,141 @@ +#!/bin/bash +# +# This script runs CSmith to generate N examples, and for each of them: +# 1. Compiles the code with the system compiler. +# 2. Runs the compiled code to obtain the checksum that the CSmith test +# computes. +# 3. Takes that checksum to generate an assertion. +# 4. Runs CBMC (for up to 90 seconds and with a loop unwinding of 257) to verify +# the generated assertion. If CBMC were to come up with a counterexample, then +# CBMC's symbolic execution would differ from the concrete execution of what +# ought to be a fully deterministic test. Such a failing test would therefore +# indicate a bug in CBMC. For such a failing case, the source code of the test +# is emitted. +# 5. If CBMC's execution completed successfully or timed out, the test is next +# compiled using goto-cc and dumped back to C code using goto-instrument. +# 6. The dumped C code is compiled using GCC and executed again, computing a +# fresh checksum. +# 7. This checksum is compared to the previous one to ensure that the program is +# equivalent to the original test generated by CSmith. +# +# The number "N" of such tests run can be specified as a command-line +# argument to this script. If no command-line arguments are provided, only a +# single test is run. +# +# As a second command-line argument, a random seed can be provided. This is +# primarily useful to reproduce earlier failures. When a random seed is +# provided, N is forced to 1. +# +# The script assumes that csmith (including its development headers) is +# installed (or otherwise found on the PATH). cbmc, goto-cc, and goto-instrument +# must also be found via the system PATH. + +set -e + +workdir=$(mktemp -d csmith.XXX) +print_dir() { + echo "CSmith working directory: $workdir" +} + +# Run a single (previously generated) test. +csmith_test() { + trap print_dir ERR + + local f=$1 + local r=$(cbmc --version) + local bn=`basename $f` + bn=`basename $bn .c` + + echo "Trying $f with build $r" + + # Compile and run (for up to 30 seconds) the CSmith-generated test. + gcc -I/usr/include/csmith -w $f -o $bn + check="`timeout 30 ./$bn | sed 's/checksum = /0x/'`" + if [ -z "$check" ] ; then + echo "TIMEOUT while executing CSmith test" + return 0 + fi + + # Prepare the test CBMC by injecting the checksum as an assertion. + gcc -E -I/usr/include/csmith -D__FRAMAC \ + -D'Frama_C_dump_assert_each()'="assert($check==(crc))" $f -o $bn.i + # Run the test using CBMC for up to 90 seconds, unwinding loops up to 257 + # times. + ec=0 + /usr/bin/time -v timeout -k15 90 cbmc \ + --unwind 257 --no-unwinding-assertions --object-bits 13 \ + $bn.i || ec=$? + if [ $ec -eq 124 ] || [ $ec -eq 137 ] ; then + echo "TIMEOUT in CBMC" + elif [ $ec -eq 6 ] ; then + echo "Limitation in CBMC" + elif [ $ec -ne 0 ] ; then + cat $f + return 1 + fi + + # Compile the test using goto-cc and dump it back to C code using + # goto-instrument. + goto-cc -I /usr/include/csmith $f -o $bn + goto-instrument --dump-c $bn ${bn}_dumped.c + # Re-compile the dumped test and execute again to confirm the checksum hasn't + # changed. + gcc ${bn}_dumped.c -D__CPROVER_bool=_Bool -w -o $bn + check_d="`timeout 30 ./$bn | sed 's/checksum = /0x/'`" + if [ -z "$check_d" ] ; then + echo "TIMEOUT while executing dumped CSmith test" + return 0 + fi + if [ "$check" != "$check_d" ] ; then + echo "Checksum mismatch: GCC: $check dump-c: $check_d" + cat $f + return 1 + fi + + echo "OK: $f" + echo +} + +# Generate and run a number of tests. +csmith_random_test() { + trap print_dir ERR + + local fixed_seed=$2 + + for i in `seq 1 $1`; do + local seed=`date +%s` + if [ "x$fixed_seed" != x ] ; then + seed=$fixed_seed + fi + echo "Random seed being used: $seed" + csmith --seed $seed > t_$i.c + if ! csmith_test t_$i.c ; then + echo "Failed test obtained with random seed $seed" + return 1 + fi + done +} + +# Determine the number of tests to run from the command line; the default is to +# run a single test. +N=$1 +if [ -z $N ] ; then + N=1 +fi + +# A random seed may be provided on the command line. Really only makes sense +# when N is set to 1 +SEED=$2 +if [ $N != 1 ] ; then + echo "Only running a single test with fixed random seed" + N=1 +fi + +basedir=$PWD +cd $workdir +trap print_dir ERR + +csmith_random_test $N $SEED + +cd $basedir +rm -rf $workdir