From 6827ce54dba53917de9e5da26d44039f8d00b551 Mon Sep 17 00:00:00 2001 From: Qinheping Hu Date: Sun, 18 Dec 2022 14:06:22 -0600 Subject: [PATCH] Add enumerative loop invariant synthesizer Implement the functionality described below. Motivation --- This loop invariant synthesizer use the idea counter-example-guided synthesis (CEGIS) to synthesize loop invariants for programs with only checks instrumented by `goto-instrument` with flag `--pointer-check`. This PR contain the driver of the synthesizer and the verifier we use to check invariant candidates. Verifier --- The verifier take as input a goto program with pointer checks and a map from loop id to loop invariant candidates. It first annotate and apply the loop invariant candidates into the goto model; and then simulate the CBMC api to verify the instrumented goto program. If there are some violations---loop invariants are not inductive, or some pointer checks fail---, it record valuation from trace generated by the back end to construct a formatted counterexample `cext`. Counterexample --- A counterexample `cext` record valuations of variables in the trace, and other information about the violation. The valuation we record including 1. set of live variables upon the entry of the loop. 2. the havoced value of all primitive-typed variables; 3. the havoced offset and the object size of all pointer-typed variables; 4. history values of 2 and 3. The valuations will be used as true positive (history values) and true negative (havcoed valuation) to filter out bad invariant clause with the idea of the Daikon invariant detector in a following PR. However, in this PR we only construct the valuation but not actually use them. Synthesizer --- Loop invariants we synthesize are of the form `` (in_clause || !guard) && (!guard -> pos_clause)`` where `in_clause` and `out_clause` are predicates we store in two different map, and `guard` is the loop guard. The idea is that we separately synthesize the condition `in_clause` that should hold before the execution of the loop body, and condition `pos_clause` that should hold as post-condition of the loop. When the violation happen in the loop, we update `in_clause`. When the violation happen after the loop, we update `pos_clause`. When the invariant candidate it not inductive, we enumerate strengthening clause to make it inductive. To be more efficient, we choose different synthesis strategy for different type of violation * For out-of-boundary violation, we choose to use the violated predicate as the new clause, which is the WLP of the violation if the violation is only dependent on the havocing instruction. **TODO**: to make it more complete, we need to implement a WLP algorithm * For null-pointer violation, we choose `__CPROVER_same_object(ptr, __CPROVER_loop_entry(ptr))` as the new clause. That is, the havoced pointer should points to the same object at the start of every iteration of the loop. It is a heuristic choice. This can be extended with the idea of alias analysis if needed. * For invariant-not-preserved violation, we enumerate strengthening clauses and check that if the invariant will be inductive after strengthening (disjunction with the new clause). The synthesizer works as follow 1. initialize the two invariant maps, 2. verify the current candidates built from the two maps, _a. return the candidate if there is no violation _b. synthesize a new clause to resolve the **first** violation and add it to the correct map, 3. repeat 2. --- regression/goto-synthesizer/CMakeLists.txt | 3 +- regression/goto-synthesizer/Makefile | 6 +- regression/goto-synthesizer/chain.sh | 18 +- .../loop_contracts_synthesis_01/test.desc | 2 +- .../loop_contracts_synthesis_02/main.c | 17 + .../loop_contracts_synthesis_02/test.desc | 11 + .../loop_contracts_synthesis_03/main.c | 16 + .../loop_contracts_synthesis_03/test.desc | 12 + .../loop_contracts_synthesis_04/main.c | 17 + .../loop_contracts_synthesis_04/test.desc | 11 + src/goto-instrument/contracts/contracts.cpp | 96 ++- src/goto-instrument/contracts/contracts.h | 23 + src/goto-instrument/contracts/utils.cpp | 46 ++ src/goto-instrument/contracts/utils.h | 17 + src/goto-programs/loop_ids.h | 7 + src/goto-synthesizer/Makefile | 24 +- src/goto-synthesizer/cegis_verifier.cpp | 589 ++++++++++++++++++ src/goto-synthesizer/cegis_verifier.h | 159 +++++ ...enumerative_loop_contracts_synthesizer.cpp | 332 +++++++++- .../enumerative_loop_contracts_synthesizer.h | 35 +- src/goto-synthesizer/expr_enumerator.cpp | 42 +- .../goto_synthesizer_parse_options.cpp | 2 +- .../loop_contracts_synthesizer_base.h | 9 +- src/goto-synthesizer/module_dependencies.txt | 3 + src/goto-synthesizer/synthesizer_utils.cpp | 29 +- src/goto-synthesizer/synthesizer_utils.h | 13 +- 26 files changed, 1490 insertions(+), 49 deletions(-) create mode 100644 regression/goto-synthesizer/loop_contracts_synthesis_02/main.c create mode 100644 regression/goto-synthesizer/loop_contracts_synthesis_02/test.desc create mode 100644 regression/goto-synthesizer/loop_contracts_synthesis_03/main.c create mode 100644 regression/goto-synthesizer/loop_contracts_synthesis_03/test.desc create mode 100644 regression/goto-synthesizer/loop_contracts_synthesis_04/main.c create mode 100644 regression/goto-synthesizer/loop_contracts_synthesis_04/test.desc create mode 100644 src/goto-synthesizer/cegis_verifier.cpp create mode 100644 src/goto-synthesizer/cegis_verifier.h diff --git a/regression/goto-synthesizer/CMakeLists.txt b/regression/goto-synthesizer/CMakeLists.txt index 0bae5992db2..f05af9306c2 100644 --- a/regression/goto-synthesizer/CMakeLists.txt +++ b/regression/goto-synthesizer/CMakeLists.txt @@ -12,9 +12,8 @@ else() set(gcc_only_string "") endif() - add_test_pl_tests( - "${CMAKE_CURRENT_SOURCE_DIR}/chain.sh $ $ $ ${is_windows}" + "${CMAKE_CURRENT_SOURCE_DIR}/chain.sh $ $ $ $ ${is_windows}" ) ## Enabling these causes a very significant increase in the time taken to run the regressions diff --git a/regression/goto-synthesizer/Makefile b/regression/goto-synthesizer/Makefile index ae4556c8ea7..68f8d95d5c8 100644 --- a/regression/goto-synthesizer/Makefile +++ b/regression/goto-synthesizer/Makefile @@ -14,16 +14,16 @@ else endif test: - @../test.pl -e -p -c '../chain.sh $(exe) ../../../src/goto-synthesizer/goto-synthesizer ../../../src/cbmc/cbmc $(is_windows)' -X smt-backend $(GCC_ONLY) + @../test.pl -e -p -c '../chain.sh $(exe) ../../../src/goto-instrument/goto-instrument ../../../src/goto-synthesizer/goto-synthesizer ../../../src/cbmc/cbmc $(is_windows)' -X smt-backend $(GCC_ONLY) test-cprover-smt2: - @../test.pl -e -p -c '../chain.sh $(exe) ../../../src/goto-synthesizer/goto-synthesizer "../../../src/cbmc/cbmc --cprover-smt2" $(is_windows)' \ + @../test.pl -e -p -c '../chain.sh $(exe) ../../../src/goto-instrument/goto-instrument ../../../src/goto-synthesizer/goto-synthesizer "../../../src/cbmc/cbmc --cprover-smt2" $(is_windows)' \ -X broken-smt-backend -X thorough-smt-backend \ -X broken-cprover-smt-backend -X thorough-cprover-smt-backend \ -s cprover-smt2 $(GCC_ONLY) test-z3: - @../test.pl -e -p -c '../chain.sh $(exe) ../../../src/goto-synthesizer/goto-synthesizer "../../../src/cbmc/cbmc --z3" $(is_windows)' \ + @../test.pl -e -p -c '../chain.sh $(exe) ../../../src/goto-instrument/goto-instrument ../../../src/goto-synthesizer/goto-synthesizer "../../../src/cbmc/cbmc --z3" $(is_windows)' \ -X broken-smt-backend -X thorough-smt-backend \ -X broken-z3-smt-backend -X thorough-z3-smt-backend \ -s z3 $(GCC_ONLY) diff --git a/regression/goto-synthesizer/chain.sh b/regression/goto-synthesizer/chain.sh index 3751b4b8403..49e9c85f91d 100755 --- a/regression/goto-synthesizer/chain.sh +++ b/regression/goto-synthesizer/chain.sh @@ -3,14 +3,15 @@ set -e goto_cc=$1 -goto_synthesizer=$2 -cbmc=$3 -is_windows=$4 +goto_instrument=$2 +goto_synthesizer=$3 +cbmc=$4 +is_windows=$5 name=${*:$#} name=${name%.c} -args=${*:5:$#-5} +args=${*:6:$#-6} if [[ "$args" != *" _ "* ]] then args_inst=$args @@ -27,7 +28,9 @@ else fi rm -f "${name}-mod.gb" -$goto_synthesizer ${args_inst} "${name}.gb" "${name}-mod.gb" +rm -f "${name}-mod-2.gb" +echo "Running goto-instrument: " +$goto_instrument ${args_inst} "${name}.gb" "${name}-mod.gb" if [ ! -e "${name}-mod.gb" ] ; then cp "$name.gb" "${name}-mod.gb" elif echo $args_inst | grep -q -- "--dump-c" ; then @@ -41,4 +44,7 @@ elif echo $args_inst | grep -q -- "--dump-c" ; then rm "${name}-mod.c" fi -$cbmc "${name}-mod.gb" ${args_cbmc} +echo "Running goto-synthesizer: " +$goto_synthesizer "${name}-mod.gb" "${name}-mod-2.gb" +echo "Running CBMC: " +$cbmc "${name}-mod-2.gb" ${args_cbmc} diff --git a/regression/goto-synthesizer/loop_contracts_synthesis_01/test.desc b/regression/goto-synthesizer/loop_contracts_synthesis_01/test.desc index 8b97754a993..405303402c7 100644 --- a/regression/goto-synthesizer/loop_contracts_synthesis_01/test.desc +++ b/regression/goto-synthesizer/loop_contracts_synthesis_01/test.desc @@ -1,6 +1,6 @@ CORE main.c - +--pointer-check ^EXIT=0$ ^SIGNAL=0$ ^\[main\.\d+\] line 10 Check loop invariant before entry: SUCCESS$ diff --git a/regression/goto-synthesizer/loop_contracts_synthesis_02/main.c b/regression/goto-synthesizer/loop_contracts_synthesis_02/main.c new file mode 100644 index 00000000000..09a782991fd --- /dev/null +++ b/regression/goto-synthesizer/loop_contracts_synthesis_02/main.c @@ -0,0 +1,17 @@ +#define SIZE 80 + +void main() +{ + unsigned len; + __CPROVER_assume(len <= SIZE); + __CPROVER_assume(len >= 8); + char *array = malloc(len); + unsigned s = 0; + + for(unsigned i = 0; i < SIZE; ++i) + { + if(i == len - 1) + break; + s += array[i]; + } +} diff --git a/regression/goto-synthesizer/loop_contracts_synthesis_02/test.desc b/regression/goto-synthesizer/loop_contracts_synthesis_02/test.desc new file mode 100644 index 00000000000..71ecf710669 --- /dev/null +++ b/regression/goto-synthesizer/loop_contracts_synthesis_02/test.desc @@ -0,0 +1,11 @@ +CORE +main.c +--pointer-check +^EXIT=0$ +^SIGNAL=0$ +^\[main.pointer\_dereference.\d+\] .* SUCCESS$ +^VERIFICATION SUCCESSFUL$ +-- +-- +This test shows that loop invariants using range predicates can be correctly +synthesized for programs with only pointer checks but no other assertions. diff --git a/regression/goto-synthesizer/loop_contracts_synthesis_03/main.c b/regression/goto-synthesizer/loop_contracts_synthesis_03/main.c new file mode 100644 index 00000000000..be272d35f2c --- /dev/null +++ b/regression/goto-synthesizer/loop_contracts_synthesis_03/main.c @@ -0,0 +1,16 @@ +#define SIZE 80 + +void main() +{ + unsigned long len; + __CPROVER_assume(len <= SIZE); + __CPROVER_assume(len >= 8); + char *array = malloc(len); + const char *end = array + len; + unsigned s = 0; + + while(array != end) + { + s += *array++; + } +} diff --git a/regression/goto-synthesizer/loop_contracts_synthesis_03/test.desc b/regression/goto-synthesizer/loop_contracts_synthesis_03/test.desc new file mode 100644 index 00000000000..70c75536ac2 --- /dev/null +++ b/regression/goto-synthesizer/loop_contracts_synthesis_03/test.desc @@ -0,0 +1,12 @@ +CORE +main.c +--pointer-check +^EXIT=0$ +^SIGNAL=0$ +^\[main.pointer\_dereference.\d+\] .* SUCCESS$ +^VERIFICATION SUCCESSFUL$ +-- +-- +This test shows that loop invariants using range predicates and same-object +predicates can be correctly synthesized for programs with only pointer +checks but no other assertions. diff --git a/regression/goto-synthesizer/loop_contracts_synthesis_04/main.c b/regression/goto-synthesizer/loop_contracts_synthesis_04/main.c new file mode 100644 index 00000000000..8e75063dc4a --- /dev/null +++ b/regression/goto-synthesizer/loop_contracts_synthesis_04/main.c @@ -0,0 +1,17 @@ +#define SIZE 80 + +void main() +{ + unsigned long len; + __CPROVER_assume(len <= SIZE); + __CPROVER_assume(len >= 8); + char *array = malloc(len); + unsigned long s = 0; + + unsigned long j = 0; + for(unsigned long i = 0; i < len; i++) + { + s += array[j]; + j++; + } +} diff --git a/regression/goto-synthesizer/loop_contracts_synthesis_04/test.desc b/regression/goto-synthesizer/loop_contracts_synthesis_04/test.desc new file mode 100644 index 00000000000..847f74c0ae1 --- /dev/null +++ b/regression/goto-synthesizer/loop_contracts_synthesis_04/test.desc @@ -0,0 +1,11 @@ +CORE +main.c +--pointer-check +^EXIT=0$ +^SIGNAL=0$ +^\[main.pointer\_dereference.\d+\] .* SUCCESS$ +^VERIFICATION SUCCESSFUL$ +-- +-- +This test shows that the loop-invariant synthesizer can enumerate +strengthening clauses for invariant-not-preserved violation. diff --git a/src/goto-instrument/contracts/contracts.cpp b/src/goto-instrument/contracts/contracts.cpp index 784333dfa4a..6caea9b9955 100644 --- a/src/goto-instrument/contracts/contracts.cpp +++ b/src/goto-instrument/contracts/contracts.cpp @@ -59,6 +59,7 @@ void code_contractst::check_apply_loop_contracts( const irep_idt &mode) { const auto loop_head_location = loop_head->source_location(); + const auto loop_number = loop_end->loop_number; // Vector representing a (possibly multidimensional) decreases clause const auto &decreases_clause_exprs = decreases_clause.operands(); @@ -95,10 +96,12 @@ void code_contractst::check_apply_loop_contracts( // | STEP: // --. | assert (initial_invariant_val); // loop assigns check | | in_base_case = false; - // - not applicable >======= havoc (assigns_set); - // func assigns check | | assume (invariant_expr); - // + deferred | `- old_variant_val = decreases_clause_expr; - // --' * HEAD: + // - not applicable >======= in_loop_havoc_block = true; + // func assigns check | | havoc (assigns_set); + // + deferred | | in_loop_havoc_block = false; + // --' | assume (invariant_expr); + // `- old_variant_val = decreases_clause_expr; + // * HEAD: // loop assigns check ,- ... eval guard ... // + assertions added | if (!guard) // func assigns check | goto EXIT; @@ -142,7 +145,11 @@ void code_contractst::check_apply_loop_contracts( // i.e., the loop guard was satisfied. const auto entered_loop = new_tmp_symbol( - bool_typet(), loop_head_location, mode, symbol_table, "__entered_loop") + bool_typet(), + loop_head_location, + mode, + symbol_table, + std::string(ENTERED_LOOP) + "__" + std::to_string(loop_number)) .symbol_expr(); pre_loop_head_instrs.add( goto_programt::make_decl(entered_loop, loop_head_location)); @@ -153,7 +160,7 @@ void code_contractst::check_apply_loop_contracts( // if the loop is not vacuous and must be abstracted with contracts. const auto initial_invariant_val = new_tmp_symbol( - bool_typet(), loop_head_location, mode, symbol_table, "__init_invariant") + bool_typet(), loop_head_location, mode, symbol_table, INIT_INVARIANT) .symbol_expr(); pre_loop_head_instrs.add( goto_programt::make_decl(initial_invariant_val, loop_head_location)); @@ -292,8 +299,25 @@ void code_contractst::check_apply_loop_contracts( loop_head, add_pragma_disable_assigns_check(pre_loop_head_instrs)); // Generate havocing code for assignment targets. + // ASSIGN in_loop_havoc_block = true; + // havoc (assigns_set); + // ASSIGN in_loop_havoc_block = false; + const auto in_loop_havoc_block = + new_tmp_symbol( + bool_typet(), + loop_head_location, + mode, + symbol_table, + std::string(IN_LOOP_HAVOC_BLOCK) + +"__" + std::to_string(loop_number)) + .symbol_expr(); + pre_loop_head_instrs.add( + goto_programt::make_decl(in_loop_havoc_block, loop_head_location)); + pre_loop_head_instrs.add( + goto_programt::make_assignment(in_loop_havoc_block, true_exprt{})); havoc_assigns_targetst havoc_gen(to_havoc, ns); havoc_gen.append_full_havoc_code(loop_head_location, pre_loop_head_instrs); + pre_loop_head_instrs.add( + goto_programt::make_assignment(in_loop_havoc_block, false_exprt{})); // Insert the second block of pre_loop_head_instrs: the havocing code. // We do not `add_pragma_disable_assigns_check`, @@ -1414,6 +1438,66 @@ void code_contractst::apply_loop_contracts( unwindset.parse_unwindset(loop_names, log.get_message_handler()); goto_unwindt goto_unwind; goto_unwind(goto_model, unwindset, goto_unwindt::unwind_strategyt::ASSUME); + + remove_skip(goto_model); + + // Record original loop number for some instrumented instructions. + for(auto &goto_function_entry : goto_functions.function_map) + { + auto &goto_function = goto_function_entry.second; + bool is_in_loop_havoc_block = false; + + unsigned loop_number_of_loop_havoc = 0; + for(goto_programt::const_targett it_instr = + goto_function.body.instructions.begin(); + it_instr != goto_function.body.instructions.end(); + it_instr++) + { + // Don't override original loop numbers. + if(original_loop_number_map.count(it_instr) != 0) + continue; + + // Store loop number for + // ASSIGN ENTERED_LOOP = TRUE + if( + is_assignment_to_instrumented_variable(it_instr, ENTERED_LOOP) && + it_instr->assign_rhs() == true_exprt()) + { + const auto &assign_lhs = + expr_try_dynamic_cast(it_instr->assign_lhs()); + original_loop_number_map[it_instr] = get_suffix_unsigned( + id2string(assign_lhs->get_identifier()), + std::string(ENTERED_LOOP) + "__"); + continue; + } + + // Loop havocs are assignments between + // ASSIGN IN_LOOP_HAVOC_BLOCK = true + // and + // ASSIGN IN_LOOP_HAVOC_BLOCK = false + + // Entering the loop-havoc block. + if(is_assignment_to_instrumented_variable(it_instr, IN_LOOP_HAVOC_BLOCK)) + { + is_in_loop_havoc_block = it_instr->assign_rhs() == true_exprt(); + const auto &assign_lhs = + expr_try_dynamic_cast(it_instr->assign_lhs()); + loop_number_of_loop_havoc = get_suffix_unsigned( + id2string(assign_lhs->get_identifier()), + std::string(IN_LOOP_HAVOC_BLOCK) + "__"); + continue; + } + + // Assignments in loop-havoc block are loop havocs. + if(is_in_loop_havoc_block && it_instr->is_assign()) + { + loop_havoc_set.emplace(it_instr); + + // Store loop number for loop havoc. + original_loop_number_map[it_instr] = loop_number_of_loop_havoc; + } + } + } } void code_contractst::enforce_contracts( diff --git a/src/goto-instrument/contracts/contracts.h b/src/goto-instrument/contracts/contracts.h index a6024782fc5..208c548a87f 100644 --- a/src/goto-instrument/contracts/contracts.h +++ b/src/goto-instrument/contracts/contracts.h @@ -122,6 +122,18 @@ class code_contractst symbol_tablet &get_symbol_table(); goto_functionst &get_goto_functions(); + std::unordered_map + get_original_loop_number_map() const + { + return original_loop_number_map; + } + + std::unordered_set + get_loop_havoc_set() const + { + return loop_havoc_set; + } + namespacet ns; protected: @@ -137,6 +149,17 @@ class code_contractst /// Name of loops we are going to unwind. std::list loop_names; + /// Store the map from instrumented instructions for loop contracts to their + /// original loop numbers. Following instrumented instructions are stored. + /// 1. loop-havoc --- begin of transformed loops + /// 2. ASSIGN ENTERED_LOOP = TRUE --- end of transformed loops + std::unordered_map + original_loop_number_map; + + /// Loop havoc instructions instrumneted during applying loop contracts. + std::unordered_set + loop_havoc_set; + public: /// \brief Enforce contract of a single function void enforce_contract(const irep_idt &function); diff --git a/src/goto-instrument/contracts/utils.cpp b/src/goto-instrument/contracts/utils.cpp index 2c2ab147534..0d8f222b66b 100644 --- a/src/goto-instrument/contracts/utils.cpp +++ b/src/goto-instrument/contracts/utils.cpp @@ -447,3 +447,49 @@ void generate_history_variables_initialization( // Add all the history variable initialization instructions program.destructive_append(history); } + +bool is_transformed_loop_end(const goto_programt::const_targett &target) +{ + // The end of the loop end of transformed loop is + // ASSIGN entered_loop = true + if(!is_assignment_to_instrumented_variable(target, ENTERED_LOOP)) + return false; + + return target->assign_rhs() == true_exprt(); +} + +bool is_assignment_to_instrumented_variable( + const goto_programt::const_targett &target, + std::string var_name) +{ + INVARIANT( + var_name == IN_BASE_CASE || var_name == ENTERED_LOOP || + var_name == IN_LOOP_HAVOC_BLOCK, + "var_name is not of instrumented variables."); + + if(!target->is_assign()) + return false; + + if(can_cast_expr(target->assign_lhs())) + { + const auto &lhs = to_symbol_expr(target->assign_lhs()); + return id2string(lhs.get_identifier()).find("::" + var_name) != + std::string::npos; + } + + return false; +} + +unsigned get_suffix_unsigned(const std::string &str, const std::string &prefix) +{ + // first_index is the end of the `prefix`. + auto first_index = str.find(prefix); + INVARIANT( + first_index != std::string::npos, "Prefix not found in the given string"); + first_index += prefix.length(); + + // last_index is the index of not-digit. + auto last_index = str.find_first_not_of("0123456789", first_index); + std::string result = str.substr(first_index, last_index - first_index); + return std::stol(result); +} diff --git a/src/goto-instrument/contracts/utils.h b/src/goto-instrument/contracts/utils.h index ff4ced40065..0741b9a2dae 100644 --- a/src/goto-instrument/contracts/utils.h +++ b/src/goto-instrument/contracts/utils.h @@ -17,6 +17,11 @@ Date: September 2021 #include +#define IN_BASE_CASE "__in_base_case" +#define ENTERED_LOOP "__entered_loop" +#define IN_LOOP_HAVOC_BLOCK "__in_loop_havoc_block" +#define INIT_INVARIANT "__init_invariant" + /// \brief A class that overrides the low-level havocing functions in the base /// utility class, to havoc only when expressions point to valid memory, /// i.e. if all dereferences within an expression are valid @@ -205,4 +210,16 @@ void generate_history_variables_initialization( const irep_idt &mode, goto_programt &program); +/// Return true if `target` is the loop end of some transformed code. +bool is_transformed_loop_end(const goto_programt::const_targett &target); + +/// Return true if `target` is an assignment to an instrumented variable with +/// name `var_name`. +bool is_assignment_to_instrumented_variable( + const goto_programt::const_targett &target, + std::string var_name); + +/// Convert the suffix digits right after `prefix` of `str` into unsigned. +unsigned get_suffix_unsigned(const std::string &str, const std::string &prefix); + #endif // CPROVER_GOTO_INSTRUMENT_CONTRACTS_UTILS_H diff --git a/src/goto-programs/loop_ids.h b/src/goto-programs/loop_ids.h index 69f77e00834..dc8860fabb5 100644 --- a/src/goto-programs/loop_ids.h +++ b/src/goto-programs/loop_ids.h @@ -31,6 +31,8 @@ struct loop_idt { } + loop_idt(const loop_idt &other) = default; + irep_idt function_id; unsigned int loop_number; @@ -39,6 +41,11 @@ struct loop_idt return function_id == o.function_id && loop_number == o.loop_number; } + bool operator!=(const loop_idt &o) const + { + return !operator==(o); + } + bool operator<(const loop_idt &o) const { return function_id < o.function_id || diff --git a/src/goto-synthesizer/Makefile b/src/goto-synthesizer/Makefile index 729d94d747a..4893078ffdf 100644 --- a/src/goto-synthesizer/Makefile +++ b/src/goto-synthesizer/Makefile @@ -1,4 +1,5 @@ -SRC = enumerative_loop_contracts_synthesizer.cpp \ +SRC = cegis_verifier.cpp \ + enumerative_loop_contracts_synthesizer.cpp \ expr_enumerator.cpp \ goto_synthesizer_languages.cpp \ goto_synthesizer_main.cpp \ @@ -8,25 +9,30 @@ SRC = enumerative_loop_contracts_synthesizer.cpp \ OBJ += ../ansi-c/ansi-c$(LIBEXT) \ ../analyses/analyses$(LIBEXT) \ - ../cpp/cpp$(LIBEXT) \ - ../linking/linking$(LIBEXT) \ + ../assembler/assembler$(LIBEXT) \ ../big-int/big-int$(LIBEXT) \ + ../cpp/cpp$(LIBEXT) \ ../goto-checker/goto-checker$(LIBEXT) \ ../goto-programs/goto-programs$(LIBEXT) \ ../goto-instrument/contracts/contracts$(OBJEXT) \ - ../goto-instrument/contracts/utils$(OBJEXT) \ + ../goto-instrument/contracts/havoc_assigns_clause_targets$(OBJEXT) \ + ../goto-instrument/contracts/inlining_decorator$(OBJEXT) \ ../goto-instrument/contracts/instrument_spec_assigns$(OBJEXT) \ ../goto-instrument/contracts/memory_predicates$(OBJEXT) \ - ../goto-instrument/contracts/inlining_decorator$(OBJEXT) \ - ../goto-instrument/contracts/havoc_assigns_clause_targets$(OBJEXT) \ + ../goto-instrument/contracts/utils$(OBJEXT) \ ../goto-instrument/havoc_utils$(OBJEXT) \ ../goto-instrument/loop_utils$(OBJEXT) \ - ../goto-instrument/unwindset$(OBJEXT) \ - ../goto-instrument/unwind$(OBJEXT) \ ../goto-instrument/nondet_static$(OBJEXT) \ + ../goto-instrument/unwind$(OBJEXT) \ + ../goto-instrument/unwindset$(OBJEXT) \ + ../goto-symex/goto-symex$(LIBEXT) \ + ../json/json$(LIBEXT) \ ../langapi/langapi$(LIBEXT) \ + ../linking/linking$(LIBEXT) \ + ../pointer-analysis/pointer-analysis$(LIBEXT) \ + ../solvers/solvers$(LIBEXT) \ ../util/util$(LIBEXT) \ - ../json/json$(LIBEXT) \ + ../xmllang/xmllang$(LIBEXT) \ # Empty last line INCLUDES= -I .. diff --git a/src/goto-synthesizer/cegis_verifier.cpp b/src/goto-synthesizer/cegis_verifier.cpp new file mode 100644 index 00000000000..4ede29ac02d --- /dev/null +++ b/src/goto-synthesizer/cegis_verifier.cpp @@ -0,0 +1,589 @@ +/*******************************************************************\ + +Module: Verifier for Counterexample-Guided Synthesis + +Author: Qinheping Hu + +\*******************************************************************/ + +/// \file +/// Verifier for Counterexample-Guided Synthesis + +#include "cegis_verifier.h" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static bool contains_symbol_prefix(const exprt &expr, const std::string &prefix) +{ + for(auto it = expr.depth_begin(), itend = expr.depth_end(); it != itend; ++it) + { + if( + expr.id() == ID_symbol && + has_prefix(id2string(to_symbol_expr(expr).get_identifier()), prefix)) + { + return true; + } + } + return false; +} + +optionst cegis_verifiert::get_options() +{ + optionst options; + + // Default options + // These options are the same as we run CBMC without any set flag. + options.set_option("built-in-assertions", true); + options.set_option("propagation", true); + options.set_option("simple-slice", true); + options.set_option("simplify", true); + options.set_option("show-goto-symex-steps", false); + options.set_option("show-points-to-sets", false); + options.set_option("show-array-constraints", false); + options.set_option("built-in-assertions", true); + options.set_option("assertions", true); + options.set_option("assumptions", true); + options.set_option("arrays-uf", "auto"); + options.set_option("depth", UINT32_MAX); + options.set_option("exploration-strategy", "lifo"); + options.set_option("symex-cache-dereferences", false); + options.set_option("rewrite-union", true); + options.set_option("self-loops-to-assumptions", true); + + // Generating trace for counterexamples. + options.set_option("trace", true); + + // Preprocess `goto_model`. Copied from `cbmc_parse_options.cpp`. + remove_asm(goto_model); + link_to_library( + goto_model, log.get_message_handler(), cprover_cpp_library_factory); + link_to_library( + goto_model, log.get_message_handler(), cprover_c_library_factory); + process_goto_program(goto_model, options, log); + + add_failed_symbols(goto_model.symbol_table); + + remove_skip(goto_model); + label_properties(goto_model); + return options; +} + +optionalt cegis_verifiert::get_cause_loop_id( + const goto_tracet &goto_trace, + const goto_programt::const_targett violation) +{ + optionalt result; + + // build the dependence graph + const namespacet ns(goto_model.symbol_table); + dependence_grapht dependence_graph(ns); + dependence_graph(goto_model); + + // Checking if `to` is dependent on `from`. + // `from` : loop havocing + // `to` : violation + + // Get `to`---the instruction where the violation happens + irep_idt to_fun_name = goto_trace.get_last_step().function_id; + const goto_functionst::goto_functiont &to_function = + goto_model.get_goto_function(to_fun_name); + goto_programt::const_targett to = to_function.body.instructions.end(); + for(goto_programt::const_targett it = to_function.body.instructions.begin(); + it != to_function.body.instructions.end(); + ++it) + { + if(it->location_number == violation->location_number) + { + to = it; + } + } + + INVARIANT( + to != to_function.body.instructions.end(), + "There must be a violation in a trace."); + + // Compute the backward reachable set. + const auto reachable_vector = + dependence_graph.get_reachable(dependence_graph[to].get_node_id(), false); + const std::set reachable_set = + std::set(reachable_vector.begin(), reachable_vector.end()); + + // A loop is the cause loop if the violation is dependent on the write set of + // the loop. + for(const auto &step : goto_trace.steps) + { + // Being dependent on a write set is equivalent to being dependent on one + // of the loop havocing instruction. + if(loop_havoc_set.count(step.pc)) + { + // Get `from`---a loop havoc instruction. + irep_idt from_fun_name = step.function_id; + const goto_functionst::goto_functiont &from_function = + goto_model.get_goto_function(from_fun_name); + goto_programt::const_targett from = from_function.body.instructions.end(); + for(goto_programt::const_targett it = + from_function.body.instructions.begin(); + it != from_function.body.instructions.end(); + ++it) + { + if(it->location_number == step.pc->location_number) + { + from = it; + } + } + + INVARIANT( + from != from_function.body.instructions.end(), + "Failed to find the location number of the loop havoc."); + + // The violation is caused by the loop havoc + // if it is dependent on the loop havoc. + if(reachable_set.count(dependence_graph[from].get_node_id())) + { + result = loop_idt(step.function_id, original_loop_number_map[step.pc]); + return result; + } + } + } + return result; +} + +bool cegis_verifiert::is_instruction_in_transfomed_loop( + const loop_idt &loop_id, + const goto_functiont &function, + unsigned location_number_of_target) +{ + // The loop body after transformation are instructions from + // loop havocing instructions + // to + // loop end of transformed code. + unsigned location_number_of_havocing = 0; + + for(goto_programt::const_targett it = function.body.instructions.begin(); + it != function.body.instructions.end(); + ++it) + { + // Record the location number of the begin of a transformed loop. + if( + loop_havoc_set.count(it) && + original_loop_number_map[it] == loop_id.loop_number) + { + location_number_of_havocing = it->location_number; + } + + // Reach the end of a transformed loop. + if( + is_transformed_loop_end(it) && + original_loop_number_map[it] == loop_id.loop_number) + { + INVARIANT( + location_number_of_havocing != 0, + "We must have entered the transformed loop before reaching the end"); + + // Check if `location_number_of_target` is between the begin and the end + // of the transformed loop. + if((location_number_of_havocing < location_number_of_target && + location_number_of_target < it->location_number)) + { + return true; + } + } + } + + return false; +} + +cext cegis_verifiert::build_cex( + const goto_tracet &goto_trace, + const source_locationt &loop_entry_loc) +{ + const namespacet ns(goto_model.symbol_table); + + // Valuations of havoced variables right after havoc instructions. + std::unordered_map object_sizes; + std::unordered_map havoced_values; + std::unordered_map havoced_pointer_offsets; + + // Loop-entry valuations of havoced variables. + std::unordered_map loop_entry_values; + std::unordered_map loop_entry_offsets; + + // Live variables upon the loop head. + std::set live_variables; + + bool entered_loop = false; + + // Scan the trace step by step to store needed valuations. + for(const auto &step : goto_trace.steps) + { + switch(step.type) + { + case goto_trace_stept::typet::DECL: + case goto_trace_stept::typet::ASSIGNMENT: + { + if(!step.full_lhs_value.is_nil()) + { + // Entered loop? + if(is_assignment_to_instrumented_variable(step.pc, ENTERED_LOOP)) + entered_loop = step.full_lhs_value == true_exprt(); + + // skip hidden steps + if(step.hidden) + break; + + // Live variables + // 1. must be in the same function as the target loop; + // 2. alive before entering the target loop; + // 3. a pointer or a primitive-typed variable; + // TODO: add support for union pointer + if( + step.pc->source_location().get_function() == + loop_entry_loc.get_function() && + !entered_loop && + (step.full_lhs.type().id() == ID_unsignedbv || + step.full_lhs.type().id() == ID_signedbv || + step.full_lhs.type().id() == ID_pointer) && + step.full_lhs.id() == ID_symbol) + { + const auto &symbol = + expr_try_dynamic_cast(step.full_lhs); + + // malloc_size is not-hidden tmp variable. + if(id2string(symbol->get_identifier()) != "malloc::malloc_size") + { + live_variables.emplace(step.full_lhs); + } + } + + // Record valuation of primitive-typed variable. + if( + step.full_lhs.type().id() == ID_unsignedbv || + step.full_lhs.type().id() == ID_signedbv) + { + bool is_signed = step.full_lhs.type().id() == ID_signedbv; + const auto &bv_type = + type_try_dynamic_cast(step.full_lhs.type()); + const auto width = bv_type->get_width(); + // Store the value into the map for loop_entry value if we haven't + // entered the loop. + if(!entered_loop) + { + loop_entry_values[step.full_lhs] = bvrep2integer( + step.full_lhs_value.get_string(ID_value), width, is_signed); + } + + // Store the value into the the map for havoced value if this step + // is a loop havocing instruction. + if(loop_havoc_set.count(step.pc)) + { + havoced_values[step.full_lhs] = bvrep2integer( + step.full_lhs_value.get_string(ID_value), width, is_signed); + } + } + + // Record object_size, pointer_offset, and loop_entry(pointer_offset). + if( + can_cast_expr( + step.full_lhs_value) && + contains_symbol_prefix( + step.full_lhs_value, SYMEX_DYNAMIC_PREFIX "::dynamic_object")) + { + const auto &pointer_constant_expr = + expr_try_dynamic_cast( + step.full_lhs_value); + + pointer_arithmetict pointer_arithmetic( + pointer_constant_expr->symbolic_pointer()); + if(pointer_constant_expr->symbolic_pointer().id() == ID_typecast) + { + pointer_arithmetic = pointer_arithmetict( + pointer_constant_expr->symbolic_pointer().operands()[0]); + } + + // Extract object size. + exprt &underlying_array = pointer_arithmetic.pointer; + // Object sizes are stored in the type of underlying arrays. + while(!can_cast_type(underlying_array.type())) + { + if( + underlying_array.id() == ID_address_of || + underlying_array.id() == ID_index) + { + underlying_array = underlying_array.operands()[0]; + continue; + } + UNREACHABLE; + } + mp_integer object_size = + pointer_offset_size(to_array_type(underlying_array.type()), ns) + .value(); + object_sizes[step.full_lhs] = object_size; + + // Extract offsets. + mp_integer offset = 0; + if(pointer_arithmetic.offset.is_not_nil()) + { + constant_exprt offset_expr = + expr_dynamic_cast(pointer_arithmetic.offset); + offset = bvrep2integer( + offset_expr.get_value(), size_type().get_width(), false); + } + + // Store the offset into the map for loop_entry if we haven't + // entered the loop. + if(!entered_loop) + { + loop_entry_offsets[step.full_lhs] = offset; + } + + // Store the offset into the the map for havoced offset if this step + // is a loop havocing instruction. + if(loop_havoc_set.count(step.pc)) + { + havoced_pointer_offsets[step.full_lhs] = offset; + } + } + } + } + + case goto_trace_stept::typet::ASSERT: + case goto_trace_stept::typet::FUNCTION_CALL: + case goto_trace_stept::typet::FUNCTION_RETURN: + case goto_trace_stept::typet::ASSUME: + case goto_trace_stept::typet::LOCATION: + case goto_trace_stept::typet::GOTO: + case goto_trace_stept::typet::OUTPUT: + case goto_trace_stept::typet::INPUT: + case goto_trace_stept::typet::SPAWN: + case goto_trace_stept::typet::MEMORY_BARRIER: + case goto_trace_stept::typet::ATOMIC_BEGIN: + case goto_trace_stept::typet::ATOMIC_END: + case goto_trace_stept::typet::DEAD: + case goto_trace_stept::typet::CONSTRAINT: + case goto_trace_stept::typet::SHARED_READ: + case goto_trace_stept::typet::SHARED_WRITE: + break; + + case goto_trace_stept::typet::NONE: + UNREACHABLE; + } + } + + return cext( + object_sizes, + havoced_values, + havoced_pointer_offsets, + loop_entry_values, + loop_entry_offsets, + live_variables); +} + +void cegis_verifiert::restore_functions() +{ + for(const auto &fun_entry : goto_model.goto_functions.function_map) + { + irep_idt fun_name = fun_entry.first; + goto_model.goto_functions.function_map[fun_name].body.swap( + original_functions[fun_name]); + } +} + +optionalt cegis_verifiert::verify() +{ + // This method does the following three things to verify the `goto_model` and + // return a formatted counterexample if there is any violated property. + // + // 1. annotate and apply the loop contracts stored in `invariant_candidates`. + // + // 2. run the CBMC API to verify the intrumented goto model. As the API is not + // merged yet, we preprocess the goto model and run the symex checker on it + // to simulate CBMC API. + // TODO: ^^^ replace the symex checker once the real API is merged. + // + // 3. construct the formatted counterexample from the violated property and + // its trace. + + // Store the original functions. We will restore them after the verification. + for(const auto &fun_entry : goto_model.goto_functions.function_map) + { + original_functions[fun_entry.first].copy_from(fun_entry.second.body); + } + + // Annotate the candidates tot the goto_model for checking. + annotate_invariants(invariant_candidates, goto_model); + + // Control verbosity. + // We allow non-error output message only when verbosity is set to at least 9. + const unsigned original_verbosity = log.get_message_handler().get_verbosity(); + if(original_verbosity < 9) + log.get_message_handler().set_verbosity(1); + + // Apply loop contracts we annotated. + code_contractst cont(goto_model, log); + cont.apply_loop_contracts(); + original_loop_number_map = cont.get_original_loop_number_map(); + loop_havoc_set = cont.get_loop_havoc_set(); + + // Get the checker same as CBMC api without any flag. + // TODO: replace the checker with CBMC api once it is implemented. + ui_message_handlert ui_message_handler(log.get_message_handler()); + const auto options = get_options(); + std::unique_ptr< + all_properties_verifier_with_trace_storaget> + checker = util_make_unique< + all_properties_verifier_with_trace_storaget>( + options, ui_message_handler, goto_model); + + goto_convert( + goto_model.symbol_table, + goto_model.goto_functions, + log.get_message_handler()); + + // Run the checker to get the result. + const resultt result = (*checker)(); + + if(original_verbosity >= 9) + checker->report(); + + // Restore the verbosity. + log.get_message_handler().set_verbosity(original_verbosity); + + // + // Start to construct the counterexample. + // + + if(result == resultt::PASS) + { + restore_functions(); + return optionalt(); + } + + if(result == resultt::ERROR || result == resultt::UNKNOWN) + { + INVARIANT(false, "Verification failed during loop contract synthesis."); + } + + properties = checker->get_properties(); + // Find the violation and construct conterexample from its trace. + for(const auto &property_it : properties) + { + if(property_it.second.status != property_statust::FAIL) + continue; + + first_violation = property_it.first; + exprt violated_predicate = property_it.second.pc->condition(); + + // The pointer checked in the null-pointer-check violation. + exprt checked_pointer = true_exprt(); + + // Type of the violation + cext::violation_typet violation_type = cext::violation_typet::cex_other; + + // The violation is a pointer OOB check. + if((property_it.second.description.find( + "dereference failure: pointer outside object bounds in") != + std::string::npos)) + { + violation_type = cext::violation_typet::cex_out_of_boundary; + } + + // The violation is a null pointer check. + if(property_it.second.description.find("pointer NULL") != std::string::npos) + { + violation_type = cext::violation_typet::cex_null_pointer; + checked_pointer = property_it.second.pc->condition() + .operands()[0] + .operands()[1] + .operands()[0]; + INVARIANT(checked_pointer.id() == ID_symbol, "Checking pointer symbol"); + } + + // The violation is a loop-invariant-preservation check. + if(property_it.second.description.find("preserved") != std::string::npos) + { + violation_type = cext::violation_typet::cex_not_preserved; + } + + // The violation is a loop-invariant-preservation check. + if( + property_it.second.description.find("invariant before entry") != + std::string::npos) + { + violation_type = cext::violation_typet::cex_not_hold_upon_entry; + } + + // The loop which could be the cause of the violation. + // We say a loop is the cause loop if the violated predicate is dependent + // upon the write set of the loop. + optionalt cause_loop_id = get_cause_loop_id( + checker->get_traces()[property_it.first], property_it.second.pc); + + if(!cause_loop_id.has_value()) + { + log.debug() << "No cause loop found!\n"; + restore_functions(); + + return cext(violation_type); + } + + log.debug() << "Found cause loop with function id: " + << cause_loop_id.value().function_id + << ", and loop number: " << cause_loop_id.value().loop_number + << "\n"; + + // Decide whether the violation is in the cause loop. + bool violation_in_loop = is_instruction_in_transfomed_loop( + cause_loop_id.value(), + goto_model.get_goto_function(cause_loop_id.value().function_id), + property_it.second.pc->location_number); + + // We always strengthen in_clause if the violation is + // invariant-not-preserved. + if(violation_type == cext::violation_typet::cex_not_preserved) + violation_in_loop = true; + + restore_functions(); + + auto return_cex = build_cex( + checker->get_traces()[property_it.first], + get_loop_head( + cause_loop_id.value().loop_number, + goto_model.goto_functions + .function_map[cause_loop_id.value().function_id]) + ->source_location()); + return_cex.violated_predicate = violated_predicate; + return_cex.cause_loop_id = cause_loop_id; + return_cex.checked_pointer = checked_pointer; + return_cex.is_violation_in_loop = violation_in_loop; + return_cex.violation_type = violation_type; + + return return_cex; + } + + UNREACHABLE; +} diff --git a/src/goto-synthesizer/cegis_verifier.h b/src/goto-synthesizer/cegis_verifier.h new file mode 100644 index 00000000000..e0c53b46047 --- /dev/null +++ b/src/goto-synthesizer/cegis_verifier.h @@ -0,0 +1,159 @@ +/*******************************************************************\ + +Module: Verifier for Counterexample-Guided Synthesis + +Author: Qinheping Hu + +\*******************************************************************/ + +/// \file +/// Verifier for Counterexample-Guided Synthesis + +#ifndef CPROVER_GOTO_SYNTHESIZER_CEGIS_VERIFIER_H +#define CPROVER_GOTO_SYNTHESIZER_CEGIS_VERIFIER_H + +#include +#include + +#include + +#include "synthesizer_utils.h" + +class messaget; + +/// Formatted counterexample. +class cext +{ +public: + enum class violation_typet + { + cex_out_of_boundary, + cex_null_pointer, + cex_not_preserved, + cex_not_hold_upon_entry, + cex_other + }; + + cext( + const std::unordered_map &object_sizes, + const std::unordered_map &havoced_values, + const std::unordered_map + &havoced_pointer_offsets, + const std::unordered_map &loop_entry_values, + const std::unordered_map &loop_entry_offsets, + const std::set &live_variables) + : object_sizes(object_sizes), + havoced_values(havoced_values), + havoced_pointer_offsets(havoced_pointer_offsets), + loop_entry_values(loop_entry_values), + loop_entry_offsets(loop_entry_offsets), + live_variables(live_variables) + { + } + + explicit cext(const violation_typet &violation_type) + : violation_type(violation_type) + { + } + + // pointer that failed the null pointer check + exprt checked_pointer; + exprt violated_predicate; + + // true if the violation happens in the cause loop + // false if the violation happens after the cause loop + bool is_violation_in_loop = true; + + // We collect havoced evaluations of havoced variables and their object sizes + // and pointer offsets. + + // __CPROVER_OBJECT_SIZE + std::unordered_map object_sizes; + // all the valuation of havoced variables with primitived type. + std::unordered_map havoced_values; + // __CPROVER_POINTER_OFFSET + std::unordered_map havoced_pointer_offsets; + + // We also collect loop-entry evaluations of havoced variables. + // __CPROVER_loop_entry + std::unordered_map loop_entry_values; + // __CPROVER_POINTER_OFFSET(__CPROVER_loop_entry( )) + std::unordered_map loop_entry_offsets; + + // Set of live variables upon loop head. + std::set live_variables; + + violation_typet violation_type; + optionalt cause_loop_id; +}; + +/// Verifier that take a goto program as input, and ouptut formatted +/// counterexamples for counterexample-guided-synthesis. +class cegis_verifiert +{ +public: + cegis_verifiert( + const invariant_mapt &invariant_candidates, + goto_modelt &goto_model, + messaget &log) + : invariant_candidates(invariant_candidates), + goto_model(goto_model), + log(log) + { + } + + /// Verify `goto_model`. Return an empty `optionalt if there is no violation. + /// Otherwise, return the formatted counterexample. + optionalt verify(); + + /// Result counterexample. + propertiest properties; + irep_idt first_violation; + +protected: + // Get the options same as using CBMC api without any flag, and + // preprocess `goto_model`. + // TODO: replace the checker with CBMC api once it is implemented. + optionst get_options(); + + // Compute the cause loop of `violation`. + // We say a loop is the cause loop if the violated predicate is dependent + // upon the write set of the loop. + optionalt get_cause_loop_id( + const goto_tracet &goto_trace, + const goto_programt::const_targett violation); + + /// Restore transformed functions to original functions. + void restore_functions(); + + // Build counterexample from trace, and store it in `return_cex`. + cext build_cex( + const goto_tracet &goto_trace, + const source_locationt &loop_entry_loc); + + /// Decide whether the target instruction is in the body of the transformed + /// loop specified by `loop_id`. + bool is_instruction_in_transfomed_loop( + const loop_idt &loop_id, + const goto_functiont &function, + unsigned location_number_of_target); + + const invariant_mapt &invariant_candidates; + goto_modelt &goto_model; + messaget log; + + /// Map from function names to original functions. It is used to + /// restore functions with annotated loops to original functions. + std::unordered_map original_functions; + + /// Map from instrumented instructions for loop contracts to their + /// original loop numbers. Returned by `code_contractst` + std::unordered_map + original_loop_number_map; + + /// Loop havoc instructions instrumneted during applying loop contracts. + std::unordered_set + loop_havoc_set; +}; + +#endif // CPROVER_GOTO_SYNTHESIZER_CEGIS_VERIFIER_H diff --git a/src/goto-synthesizer/enumerative_loop_contracts_synthesizer.cpp b/src/goto-synthesizer/enumerative_loop_contracts_synthesizer.cpp index a7e60f0fa5c..fb857e6f670 100644 --- a/src/goto-synthesizer/enumerative_loop_contracts_synthesizer.cpp +++ b/src/goto-synthesizer/enumerative_loop_contracts_synthesizer.cpp @@ -11,8 +11,71 @@ Author: Qinheping Hu #include "enumerative_loop_contracts_synthesizer.h" +#include +#include +#include +#include +#include +#include + #include +#include "cegis_verifier.h" +#include "expr_enumerator.h" + +// substitute all tmp_post variables with their origins in `expr` +void replace_tmp_post( + exprt &dest, + const std::unordered_map &tmp_post_map) +{ + replace_symbolt r; + for(const auto &tmp_post_entry : tmp_post_map) + { + INVARIANT( + can_cast_expr(tmp_post_entry.first), + "tmp_post variables must be symbol expression."); + const auto &tmp_post_symbol = + expr_dynamic_cast(tmp_post_entry.first); + r.insert(tmp_post_symbol, tmp_post_entry.second); + } + r.replace(dest); +} + +std::vector construct_terminals(const std::set &symbols) +{ + std::vector result; + for(const auto &e : symbols) + { + if(e.type().id() == ID_unsignedbv) + { + // For a variable v with primitive type, we add + // v, __CPROVER_loop_entry(v) + // into the result. + result.push_back(typecast_exprt(e, size_type())); + result.push_back( + typecast_exprt(unary_exprt(ID_loop_entry, e, e.type()), size_type())); + } + if((e.type().id() == ID_signedbv)) + { + result.push_back(e); + result.push_back(unary_exprt(ID_loop_entry, e, e.type())); + } + if((e.type().id() == ID_pointer)) + { + // For a variable v with pointer type, we add + // __CPROVER_pointer_offset(v), + // __CPROVER_pointer_offset(__CPROVER_loop_entry(v)) + // into the result. + result.push_back(pointer_offset_exprt(e, size_type())); + result.push_back(pointer_offset_exprt( + unary_exprt(ID_loop_entry, e, e.type()), size_type())); + } + } + result.push_back(from_integer(1, unsigned_int_type())); + result.push_back(from_integer(1, unsigned_long_int_type())); + return result; +} + void enumerative_loop_contracts_synthesizert::init_candidates() { for(auto &function_p : goto_model.goto_functions.function_map) @@ -32,19 +95,280 @@ void enumerative_loop_contracts_synthesizert::init_candidates() // we only synthesize invariants for unannotated loops if(loop_end->condition().find(ID_C_spec_loop_invariant).is_nil()) { - invariant_candiate_map[new_id] = true_exprt(); + // Store the loop guard. + exprt guard = + get_loop_head( + loop_end->loop_number, + goto_model.goto_functions.function_map[function_p.first]) + ->condition(); + neg_guards[new_id] = guard; + + // Initialize invariant clauses as `true`. + in_invariant_clause_map[new_id] = true_exprt(); + pos_invariant_clause_map[new_id] = true_exprt(); + } + } + } +} + +void enumerative_loop_contracts_synthesizert::build_tmp_post_map() +{ + for(auto &goto_function : goto_model.goto_functions.function_map) + { + for(const auto &instruction : goto_function.second.body.instructions) + { + // tmp_post variables are symbol lhs of ASSIGN. + if(!instruction.is_assign() || instruction.assign_lhs().id() != ID_symbol) + continue; + + const auto symbol_lhs = + expr_try_dynamic_cast(instruction.assign_lhs()); + + // tmp_post variables have identifiers with the prefix tmp::tmp_post. + if( + id2string(symbol_lhs->get_identifier()).find("tmp::tmp_post") != + std::string::npos) + { + tmp_post_map[instruction.assign_lhs()] = instruction.assign_rhs(); } } } } +std::set +enumerative_loop_contracts_synthesizert::compute_dependent_symbols( + const loop_idt &cause_loop_id, + const exprt &new_clause, + const std::set &live_vars) +{ + // We overapproximate dependent symbols as all symbols in live variables. + // TODO: using flow-dependency analysis to rule out not dependent symbols. + + std::set result; + for(const auto &e : live_vars) + find_symbols(e, result); + + return result; +} + +exprt enumerative_loop_contracts_synthesizert::synthesize_range_predicate( + const exprt &violated_predicate) +{ + // For the case where the violated predicate is dependent on no instruction + // other than loop havocing, the violated_predicate is + // WLP(loop_body_before_violation, violated_predicate). + // TODO: implement a more complete WLP algorithm. + return violated_predicate; +} + +exprt enumerative_loop_contracts_synthesizert::synthesize_same_object_predicate( + const exprt &checked_pointer) +{ + // The same object predicate says that the checked pointer points to the + // same object as it pointed before entering the loop. + // It works for the array-manuplating loops where only offset of pointer + // are modified but not the object pointers point to. + return same_object( + checked_pointer, unary_exprt(ID_loop_entry, checked_pointer)); +} + +exprt enumerative_loop_contracts_synthesizert::synthesize_strengthening_clause( + const std::vector terminal_symbols, + const loop_idt &cause_loop_id, + const irep_idt &violation_id) +{ + // Synthesis of strengthning clauses is a enumerate-and-check proecess. + // We first construct the enumerator for the following grammar. + // And then enumerate clause and check that if it can make the invariant + // inductive. + + // Initialize factory representing grammar + // StartBool -> StartBool && StartBool | Start == Start + // | StartBool <= StartBool | StartBool < StartBool + // Start -> Start + Start | terminal_symbols + // where a0, and a1 are symbol expressions. + namespacet ns(goto_model.symbol_table); + enumerator_factoryt factory = enumerator_factoryt(ns); + recursive_enumerator_placeholdert start_bool_ph(factory, "StartBool", ns); + recursive_enumerator_placeholdert start_ph(factory, "Start", ns); + + // terminals + expr_sett leafexprs(terminal_symbols.begin(), terminal_symbols.end()); + + // rules for Start + enumeratorst start_args; + // Start -> terminals + leaf_enumeratort leaf_g(leafexprs, ns); + start_args.push_back(&leaf_g); + + // Start -> Start + Start + binary_functional_enumeratort plus_g( + ID_plus, + start_ph, + start_ph, + [](const partitiont &partition) { + if(partition.size() <= 1) + return true; + return partition.front() == 1; + }, + ns); + start_args.push_back(&plus_g); + + // rules for StartBool + enumeratorst start_bool_args; + // StartBool -> StartBool && StartBool + binary_functional_enumeratort and_g(ID_and, start_bool_ph, start_bool_ph, ns); + start_bool_args.push_back(&and_g); + // StartBool -> Start == Start + binary_functional_enumeratort equal_g(ID_equal, start_ph, start_ph, ns); + start_bool_args.push_back(&equal_g); + // StartBool -> Start <= Start + binary_functional_enumeratort le_g(ID_le, start_ph, start_ph, ns); + start_bool_args.push_back(&le_g); + // StartBool -> Start < Start + binary_functional_enumeratort lt_g(ID_lt, start_ph, start_ph, ns); + start_bool_args.push_back(<_g); + + // add the two nonterminals to the factory + factory.attach_productions("Start", start_args); + factory.attach_productions("StartBool", start_bool_args); + + // size of candidates we are searching now, + // starting from 0 + size_t size_bound = 0; + + // numbers of candidates we have seen, + // used for quantitative analysis + size_t seen_terms = 0; + + // Start to enumerate and check. + while(true) + { + size_bound++; + + // generate candidate and verify + for(auto strengthening_candidate : start_bool_ph.enumerate(size_bound)) + { + seen_terms++; + invariant_mapt new_in_clauses = invariant_mapt(in_invariant_clause_map); + new_in_clauses[cause_loop_id] = + and_exprt(new_in_clauses[cause_loop_id], strengthening_candidate); + const auto &combined_invariant = combine_in_and_post_invariant_clauses( + new_in_clauses, pos_invariant_clause_map, neg_guards); + + // The verifier we use to check current invariant candidates. + cegis_verifiert verifier(combined_invariant, goto_model, log); + + // A good strengthening clause if + // 1. all checks pass, or + // 2. the loop invariant is inductive and hold upon the entry. + const auto &return_cex = verifier.verify(); + if( + !return_cex.has_value() || + (verifier.properties.at(violation_id).status != + property_statust::FAIL && + return_cex->violation_type != + cext::violation_typet::cex_not_hold_upon_entry)) + { + return strengthening_candidate; + } + } + } + UNREACHABLE; +} + invariant_mapt enumerative_loop_contracts_synthesizert::synthesize_all() { init_candidates(); + build_tmp_post_map(); + + // The invariants we are going to synthesize and verify are the combined + // invariants from in- and post- invariant clauses. + auto combined_invariant = combine_in_and_post_invariant_clauses( + in_invariant_clause_map, pos_invariant_clause_map, neg_guards); + + // The verifier we use to check current invariant candidates. + cegis_verifiert verifier(combined_invariant, goto_model, log); + + // Set of symbols the violation may be dependent on. + // We enumerate strenghening clauses built from symbols from the set. + std::set dependent_symbols; + // Set of symbols we used to enumerate strenghening clauses. + std::vector terminal_symbols; + + auto return_cex = verifier.verify(); + + while(return_cex.has_value()) + { + exprt new_invariant_clause = true_exprt(); + + // Synthsize the new_clause + // We use difference strategies for different type of violations. + switch(return_cex->violation_type) + { + case cext::violation_typet::cex_out_of_boundary: + new_invariant_clause = + synthesize_range_predicate(return_cex->violated_predicate); + break; + + case cext::violation_typet ::cex_null_pointer: + new_invariant_clause = + synthesize_same_object_predicate(return_cex->checked_pointer); + break; + + case cext::violation_typet::cex_not_preserved: + terminal_symbols = construct_terminals(dependent_symbols); + new_invariant_clause = synthesize_strengthening_clause( + terminal_symbols, + return_cex->cause_loop_id.value(), + verifier.first_violation); + break; + + case cext::violation_typet::cex_not_hold_upon_entry: + case cext::violation_typet::cex_other: + INVARIANT(false, "unsupported violation type"); + break; + } + + INVARIANT(return_cex->cause_loop_id.has_value(), "No cause loop found!"); + + INVARIANT( + new_invariant_clause != true_exprt(), + "failed to synthesized meaningful clause"); + + // There could be tmp_post varialbes in the synthesized clause. + // We substitute them with their original variables. + replace_tmp_post(new_invariant_clause, tmp_post_map); + + const auto &cause_loop_id = return_cex->cause_loop_id.value(); + // Update the dependent symbols. + dependent_symbols = compute_dependent_symbols( + cause_loop_id, new_invariant_clause, return_cex->live_variables); + + // add the new cluase to the candidate invariants. + if(return_cex->is_violation_in_loop) + { + in_invariant_clause_map[cause_loop_id] = + and_exprt(in_invariant_clause_map[cause_loop_id], new_invariant_clause); + } + else + { + // violation happens post-loop. + pos_invariant_clause_map[cause_loop_id] = and_exprt( + pos_invariant_clause_map[cause_loop_id], new_invariant_clause); + } + + // Re-combine invariant clauses and update the candidate map. + combined_invariant = combine_in_and_post_invariant_clauses( + in_invariant_clause_map, pos_invariant_clause_map, neg_guards); + + return_cex = verifier.verify(); + } + + log.result() << "result : " << log.green << "PASS" << messaget::eom + << log.reset; - // Now this method only generate true for all unnotated loops. - // The implementation will be added later. - return invariant_candiate_map; + return combined_invariant; } exprt enumerative_loop_contracts_synthesizert::synthesize(loop_idt loop_id) diff --git a/src/goto-synthesizer/enumerative_loop_contracts_synthesizer.h b/src/goto-synthesizer/enumerative_loop_contracts_synthesizer.h index ba9b538e9a2..ee14c8c6da7 100644 --- a/src/goto-synthesizer/enumerative_loop_contracts_synthesizer.h +++ b/src/goto-synthesizer/enumerative_loop_contracts_synthesizer.h @@ -29,7 +29,7 @@ class enumerative_loop_contracts_synthesizert { public: enumerative_loop_contracts_synthesizert( - const goto_modelt &goto_model, + goto_modelt &goto_model, messaget &log) : loop_contracts_synthesizer_baset(goto_model, log) { @@ -44,7 +44,38 @@ class enumerative_loop_contracts_synthesizert /// Initialize invariants as true for all unannotated loops. void init_candidates(); - invariant_mapt invariant_candiate_map; + /// Scan all ASSIGN instructions to build the map from tmp_post variables + /// to their original variables. + void build_tmp_post_map(); + + /// Compute the depedent symbols for a loop with invariant-not-preserved + /// violation which happen after `new_clause` was added. + std::set compute_dependent_symbols( + const loop_idt &cause_loop_id, + const exprt &new_clause, + const std::set &live_vars); + + /// Synthesize range predicate for OOB violation with `violated_predicate`. + exprt synthesize_range_predicate(const exprt &violated_predicate); + + /// Synthesize same object predicate for null-pointer violation for + /// `checked_pointer`. + exprt synthesize_same_object_predicate(const exprt &checked_pointer); + + /// Synthesize strengthening clause for invariant-not-preserved violation. + exprt synthesize_strengthening_clause( + const std::vector terminal_symbols, + const loop_idt &cause_loop_id, + const irep_idt &violation_id); + + /// Synthesize invariant of form + /// (in_inv || !guard) && (!guard -> pos_inv) + invariant_mapt in_invariant_clause_map; + invariant_mapt pos_invariant_clause_map; + invariant_mapt neg_guards; + + /// Map from tmp_post variables to their original variables. + std::unordered_map tmp_post_map; }; // NOLINTNEXTLINE(whitespace/line_length) diff --git a/src/goto-synthesizer/expr_enumerator.cpp b/src/goto-synthesizer/expr_enumerator.cpp index 617a7aed25d..a86efc3add0 100644 --- a/src/goto-synthesizer/expr_enumerator.cpp +++ b/src/goto-synthesizer/expr_enumerator.cpp @@ -316,7 +316,47 @@ exprt binary_functional_enumeratort::instantiate(const expr_listt &exprs) const exprs.size() == 2, "number of arguments should be 2: " + integer2string(exprs.size())); if(op_id == ID_equal) + { + auto &lhs = exprs.front(); + auto &rhs = exprs.back(); + + // Widening conversion. + if( + lhs.type() != rhs.type() && + (lhs.type().id() == ID_unsignedbv || lhs.type().id() == ID_signedbv) && + (rhs.type().id() == ID_unsignedbv || rhs.type().id() == ID_signedbv)) + { + const auto &lhs_type = type_try_dynamic_cast(lhs.type()); + const auto &rhs_type = type_try_dynamic_cast(rhs.type()); + if(lhs_type->get_width() >= rhs_type->get_width()) + return equal_exprt(lhs, typecast_exprt(rhs, lhs.type())); + else + return equal_exprt(typecast_exprt(lhs, rhs.type()), rhs); + } + return equal_exprt(exprs.front(), exprs.back()); + } + if(op_id == ID_notequal) + { + auto &lhs = exprs.front(); + auto &rhs = exprs.back(); + + // Widening conversion. + if( + lhs.type() != rhs.type() && + (lhs.type().id() == ID_unsignedbv || lhs.type().id() == ID_signedbv) && + (rhs.type().id() == ID_unsignedbv || rhs.type().id() == ID_signedbv)) + { + const auto &lhs_type = type_try_dynamic_cast(lhs.type()); + const auto &rhs_type = type_try_dynamic_cast(rhs.type()); + if(lhs_type->get_width() >= rhs_type->get_width()) + return notequal_exprt(lhs, typecast_exprt(rhs, lhs.type())); + else + return notequal_exprt(typecast_exprt(lhs, rhs.type()), rhs); + } + + return notequal_exprt(exprs.front(), exprs.back()); + } if(op_id == ID_le) return less_than_or_equal_exprt(exprs.front(), exprs.back()); if(op_id == ID_lt) @@ -333,8 +373,6 @@ exprt binary_functional_enumeratort::instantiate(const expr_listt &exprs) const return plus_exprt(exprs.front(), exprs.back()); if(op_id == ID_minus) return minus_exprt(exprs.front(), exprs.back()); - if(op_id == ID_notequal) - return notequal_exprt(exprs.front(), exprs.back()); return binary_exprt(exprs.front(), op_id, exprs.back()); } diff --git a/src/goto-synthesizer/goto_synthesizer_parse_options.cpp b/src/goto-synthesizer/goto_synthesizer_parse_options.cpp index a241666b674..979323adf5c 100644 --- a/src/goto-synthesizer/goto_synthesizer_parse_options.cpp +++ b/src/goto-synthesizer/goto_synthesizer_parse_options.cpp @@ -53,7 +53,7 @@ int goto_synthesizer_parse_optionst::doit() { // Synthesize loop invariants and annotate them into `goto_model` enumerative_loop_contracts_synthesizert synthesizer(goto_model, log); - annotate_invariants(synthesizer.synthesize_all(), goto_model, log); + annotate_invariants(synthesizer.synthesize_all(), goto_model); // Apply loop contracts. std::set to_exclude_from_nondet_static( diff --git a/src/goto-synthesizer/loop_contracts_synthesizer_base.h b/src/goto-synthesizer/loop_contracts_synthesizer_base.h index 1be6c305917..b1f31b8bf3a 100644 --- a/src/goto-synthesizer/loop_contracts_synthesizer_base.h +++ b/src/goto-synthesizer/loop_contracts_synthesizer_base.h @@ -16,11 +16,6 @@ Author: Qinheping Hu #include "synthesizer_utils.h" -#define OPT_SYNTHESIZE_LOOP_INVARIANTS "(synthesize-loop-invariants)" -#define HELP_LOOP_INVARIANT_SYNTHESIZER \ - " --synthesize-loop-invariants\n" \ - " synthesize and apply loop invariants\n" - class messaget; /// A base class for loop contracts synthesizers. @@ -32,7 +27,7 @@ class messaget; class loop_contracts_synthesizer_baset { public: - loop_contracts_synthesizer_baset(const goto_modelt &goto_model, messaget &log) + loop_contracts_synthesizer_baset(goto_modelt &goto_model, messaget &log) : goto_model(goto_model), log(log) { } @@ -48,7 +43,7 @@ class loop_contracts_synthesizer_baset virtual exprt synthesize(loop_idt) = 0; protected: - const goto_modelt &goto_model; + goto_modelt &goto_model; messaget &log; }; diff --git a/src/goto-synthesizer/module_dependencies.txt b/src/goto-synthesizer/module_dependencies.txt index 2ce15c2eaad..82f3529763f 100644 --- a/src/goto-synthesizer/module_dependencies.txt +++ b/src/goto-synthesizer/module_dependencies.txt @@ -1,8 +1,11 @@ analyses ansi-c +assembler cpp goto-checker goto-instrument goto-programs langapi +pointer-analysis +solvers util diff --git a/src/goto-synthesizer/synthesizer_utils.cpp b/src/goto-synthesizer/synthesizer_utils.cpp index f1a618b1dc9..f035ccabe76 100644 --- a/src/goto-synthesizer/synthesizer_utils.cpp +++ b/src/goto-synthesizer/synthesizer_utils.cpp @@ -8,7 +8,11 @@ Author: Qinheping Hu #include "synthesizer_utils.h" +#include + #include +#include +#include goto_programt::const_targett get_loop_end_from_loop_head_and_content( const goto_programt::const_targett &loop_head, @@ -94,8 +98,7 @@ get_loop_head(const unsigned int target_loop_number, goto_functiont &function) void annotate_invariants( const invariant_mapt &invariant_map, - goto_modelt &goto_model, - messaget &log) + goto_modelt &goto_model) { for(const auto &invariant_map_entry : invariant_map) { @@ -112,3 +115,25 @@ void annotate_invariants( invariant_map_entry.second; } } + +invariant_mapt combine_in_and_post_invariant_clauses( + const invariant_mapt &in_clauses, + const invariant_mapt &post_clauses, + const invariant_mapt &neg_guards) +{ + // Combine invariant + // (in_inv || !guard) && (!guard -> pos_inv) + invariant_mapt result; + for(const auto &in_clause : in_clauses) + { + const auto &id = in_clause.first; + const auto &it_guard = neg_guards.find(id); + + INVARIANT(it_guard != neg_guards.end(), "Some loop guard is missing."); + + result[id] = and_exprt( + or_exprt(it_guard->second, in_clause.second), + implies_exprt(it_guard->second, post_clauses.at(id))); + } + return result; +} diff --git a/src/goto-synthesizer/synthesizer_utils.h b/src/goto-synthesizer/synthesizer_utils.h index a71f0413a87..c8f9c6ecb50 100644 --- a/src/goto-synthesizer/synthesizer_utils.h +++ b/src/goto-synthesizer/synthesizer_utils.h @@ -9,11 +9,10 @@ Author: Qinheping Hu #ifndef CPROVER_GOTO_SYNTHESIZER_SYNTHESIZER_UTILS_H #define CPROVER_GOTO_SYNTHESIZER_SYNTHESIZER_UTILS_H +#include #include #include -#include - class goto_functiont; class messaget; template @@ -50,7 +49,13 @@ get_loop_head(const unsigned int loop_number, goto_functiont &function); /// loops. Corresponding loops are specified by keys of `invariant_map` void annotate_invariants( const invariant_mapt &invariant_map, - goto_modelt &goto_model, - messaget &log); + goto_modelt &goto_model); + +/// Combine invariant of form +/// (in_inv || !guard) && (!guard -> pos_inv) +invariant_mapt combine_in_and_post_invariant_clauses( + const invariant_mapt &in_clauses, + const invariant_mapt &post_clauses, + const invariant_mapt &neg_guards); #endif // CPROVER_GOTO_SYNTHESIZER_SYNTHESIZER_UTILS_H