diff --git a/jbmc/unit/Makefile b/jbmc/unit/Makefile index a9d0d5ad840..05b1342a0a5 100644 --- a/jbmc/unit/Makefile +++ b/jbmc/unit/Makefile @@ -126,6 +126,7 @@ CPROVER_LIBS =../src/java_bytecode/java_bytecode$(LIBEXT) \ $(CPROVER_DIR)/src/goto-instrument/reachability_slicer$(OBJEXT) \ $(CPROVER_DIR)/src/goto-instrument/nondet_static$(OBJEXT) \ $(CPROVER_DIR)/src/goto-instrument/full_slicer$(OBJEXT) \ + $(CPROVER_DIR)/src/goto-instrument/abstract_loops$(OBJEXT) \ $(CPROVER_DIR)/src/goto-instrument/unwindset$(OBJEXT) \ $(CPROVER_DIR)/src/pointer-analysis/pointer-analysis$(LIBEXT) \ $(CPROVER_DIR)/src/langapi/langapi$(LIBEXT) \ diff --git a/regression/cbmc/abstract-loops1/main.c b/regression/cbmc/abstract-loops1/main.c new file mode 100644 index 00000000000..3f010a9ec73 --- /dev/null +++ b/regression/cbmc/abstract-loops1/main.c @@ -0,0 +1,23 @@ +#define ROW 10 +#define COL 10 + +void main() +{ + int sum; + int image[ROW][COL]; + + // shrinkable + for(int j = 0; j < COL; j++) + image[0][j] = 0; + + sum = 0; + // outer loop unshrinkable + // inner loop shrinkable + for(int i = 0; i < ROW; i++) + { + image[i][0] = 0; + sum = sum + i; + for(int j = 0; j < COL; j++) + image[sum][j] = 0; + } +} diff --git a/regression/cbmc/abstract-loops1/test.desc b/regression/cbmc/abstract-loops1/test.desc new file mode 100644 index 00000000000..86ce76d47ad --- /dev/null +++ b/regression/cbmc/abstract-loops1/test.desc @@ -0,0 +1,15 @@ +CORE +main.c +--bounds-check --pointer-check --abstract-loops --show-goto-functions +^EXIT=0$ +^SIGNAL=0$ +ASSUME j < 10 +ASSUME j >= 0 +-- +ASSUME i < 10 +ASSUME i >= 0 +^warning: ignoring +-- +This test ensures that the loop with loop variable i is not shrunk (so it +appears in unwinding). Loops with loop variable j are shrunk. The test also +checks the constraints to the loop variable are correctly applied. diff --git a/regression/cbmc/abstract-loops2/main.c b/regression/cbmc/abstract-loops2/main.c new file mode 100644 index 00000000000..3abd2eb25a5 --- /dev/null +++ b/regression/cbmc/abstract-loops2/main.c @@ -0,0 +1,36 @@ +#define ROW 3 +#define COL 10 + +void boo(int r) +{ + int val = 0; + int image[ROW][COL]; + ; + // shrinkable, and set as shrink target + for(int j = 0; j < COL; j++) + { + int c = j + val; + image[r][c] = val; + } +} + +void main() +{ + int r; + int buffer[ROW]; + // not shrinkable since r is used in mem-safe assertion in boo() + // and r update itself in loop + for(int i = 0; i < ROW; i++) + { + r = r + i; + boo(r); + buffer[i]; + } + + // shrinkable + for(int i = 0; i < ROW; i++) + { + boo(i); + buffer[i]; + } +} diff --git a/regression/cbmc/abstract-loops2/test.desc b/regression/cbmc/abstract-loops2/test.desc new file mode 100644 index 00000000000..0bdbf650b35 --- /dev/null +++ b/regression/cbmc/abstract-loops2/test.desc @@ -0,0 +1,17 @@ +CORE +main.c +--bounds-check --pointer-check --abstract-loops --abstractset boo.0,main.0 --show-goto-functions +^EXIT=0$ +^SIGNAL=0$ +ASSUME j < 10 +ASSUME j >= 0 +-- +ASSUME i < 10 +ASSUME i >= 0 +^warning: ignoring +-- +This test ensures that the loops with loop variable i are not shrunk (so they +appear in unwinding). Although the second loop in main is shrinkable, it is not +in the target loop set provided by --abstractset, thus it shouldn't be shrunk. +The test also ensures the loop in boo() with loop variable j is shrunk and the +constraints are correctly applied. diff --git a/regression/cbmc/abstract-loops3/main.c b/regression/cbmc/abstract-loops3/main.c new file mode 100644 index 00000000000..7e83078c7f3 --- /dev/null +++ b/regression/cbmc/abstract-loops3/main.c @@ -0,0 +1,20 @@ +#include +#define ROW 10 + +void boo(int *x) +{ + *x = *x + 1; +} + +void main() +{ + int *x = (int *)malloc(sizeof(int)); + int buffer[ROW]; + *x = 2; + // not shrinkable since x has a self-update in boo() + for(int i = 0; i < ROW; i++) + { + boo(x); + buffer[*x] = 1; + } +} diff --git a/regression/cbmc/abstract-loops3/test.desc b/regression/cbmc/abstract-loops3/test.desc new file mode 100644 index 00000000000..9823c417ac6 --- /dev/null +++ b/regression/cbmc/abstract-loops3/test.desc @@ -0,0 +1,17 @@ +KNOWNBUG +main.c +--bounds-check --pointer-check --abstract-loops --abstractset main.0 +^EXIT=10$ +^SIGNAL=0$ +^VERIFICATION FAILED$ +main\.0 +-- +ASSUME i < 10 +ASSUME i >= 0 +^warning: ignoring +-- +This test ensures that the loops with loop variable i are not shrunk (so they +appear in unwinding), even when the instruction that makes the loop unshrinkable +is in another function. + +Requires #2646, #2694 to work. diff --git a/regression/goto-analyzer/dependence-graph13/main.c b/regression/goto-analyzer/dependence-graph13/main.c new file mode 100644 index 00000000000..d01d8203a7a --- /dev/null +++ b/regression/goto-analyzer/dependence-graph13/main.c @@ -0,0 +1,12 @@ +void bar(int a, int b) +{ + int result = b; +} + +void main() +{ + int a = 1; + int b = 2; + int c = 3; + bar(a, b + c); +} diff --git a/regression/goto-analyzer/dependence-graph13/test.desc b/regression/goto-analyzer/dependence-graph13/test.desc new file mode 100644 index 00000000000..f1e80bc28a2 --- /dev/null +++ b/regression/goto-analyzer/dependence-graph13/test.desc @@ -0,0 +1,26 @@ +CORE +main.c +--dependence-graph --show +activate-multi-line-match +^EXIT=0$ +^SIGNAL=0$ +\/\/ ([0-9]+).*\n.*b = 2;(.*\n)*Data dependencies: (([0-9]+,\1)|(\1,[0-9]+))\n(.*\n){2,3}.*result = b +\/\/ ([0-9]+).*\n.*c = 3;(.*\n)*Data dependencies: (([0-9]+,\1)|(\1,[0-9]+))\n(.*\n){2,3}.*result = b +-- +^warning: ignoring +-- + +The two regular expressions above match output portions like shown below (with + being a location number). The intention is to check whether a function +parameter in the callee correctly depends on the caller-provided argument. + + // 3 file main.c line 11 function main + b = 2; +... +**** 12 file main.c line 5 function bar +Data dependencies: (,...)|(...,) + + // 12 file main.c line 5 function bar + result = b; + +The second regex matches for c. diff --git a/regression/goto-instrument/abstract-loops1/main.c b/regression/goto-instrument/abstract-loops1/main.c new file mode 100644 index 00000000000..3f010a9ec73 --- /dev/null +++ b/regression/goto-instrument/abstract-loops1/main.c @@ -0,0 +1,23 @@ +#define ROW 10 +#define COL 10 + +void main() +{ + int sum; + int image[ROW][COL]; + + // shrinkable + for(int j = 0; j < COL; j++) + image[0][j] = 0; + + sum = 0; + // outer loop unshrinkable + // inner loop shrinkable + for(int i = 0; i < ROW; i++) + { + image[i][0] = 0; + sum = sum + i; + for(int j = 0; j < COL; j++) + image[sum][j] = 0; + } +} diff --git a/regression/goto-instrument/abstract-loops1/test.desc b/regression/goto-instrument/abstract-loops1/test.desc new file mode 100644 index 00000000000..afe3164b882 --- /dev/null +++ b/regression/goto-instrument/abstract-loops1/test.desc @@ -0,0 +1,19 @@ +CORE +main.c +--bounds-check --pointer-check --abstract-loops +^EXIT=10$ +^SIGNAL=0$ +^VERIFICATION FAILED$ +main\.0 +ASSUME j < 10 +ASSUME j >= 0 +-- +main\.1 +main\.2 +ASSUME i < 10 +ASSUME i >= 0 +^warning: ignoring +-- +This test ensures that the loop with loop variable i is not shrunk (so it +appears in unwinding). Loops with loop variable j are shrunk. The test also +checks the constraints to the loop variable are correctly applied. diff --git a/regression/goto-instrument/abstract-loops2/main.c b/regression/goto-instrument/abstract-loops2/main.c new file mode 100644 index 00000000000..3abd2eb25a5 --- /dev/null +++ b/regression/goto-instrument/abstract-loops2/main.c @@ -0,0 +1,36 @@ +#define ROW 3 +#define COL 10 + +void boo(int r) +{ + int val = 0; + int image[ROW][COL]; + ; + // shrinkable, and set as shrink target + for(int j = 0; j < COL; j++) + { + int c = j + val; + image[r][c] = val; + } +} + +void main() +{ + int r; + int buffer[ROW]; + // not shrinkable since r is used in mem-safe assertion in boo() + // and r update itself in loop + for(int i = 0; i < ROW; i++) + { + r = r + i; + boo(r); + buffer[i]; + } + + // shrinkable + for(int i = 0; i < ROW; i++) + { + boo(i); + buffer[i]; + } +} diff --git a/regression/goto-instrument/abstract-loops2/test.desc b/regression/goto-instrument/abstract-loops2/test.desc new file mode 100644 index 00000000000..e0bb9468433 --- /dev/null +++ b/regression/goto-instrument/abstract-loops2/test.desc @@ -0,0 +1,21 @@ +CORE +main.c +--bounds-check --pointer-check --abstract-loops --abstractset boo.0,main.0 +^EXIT=10$ +^SIGNAL=0$ +^VERIFICATION FAILED$ +main\.0 +main\.1 +ASSUME j < 10 +ASSUME j >= 0 +-- +boo\.0 +ASSUME i < 10 +ASSUME i >= 0 +^warning: ignoring +-- +This test ensures that the loops with loop variable i are not shrunk (so they +appear in unwinding). Although the second loop in main is shrinkable, it is not +in the target loop set provided by --abstractset, thus it shouldn't be shrunk. +The test also ensures the loop in boo() with loop variable j is shrunk and the +constraints are correctly applied. diff --git a/regression/goto-instrument/abstract-loops3/main.c b/regression/goto-instrument/abstract-loops3/main.c new file mode 100644 index 00000000000..7e83078c7f3 --- /dev/null +++ b/regression/goto-instrument/abstract-loops3/main.c @@ -0,0 +1,20 @@ +#include +#define ROW 10 + +void boo(int *x) +{ + *x = *x + 1; +} + +void main() +{ + int *x = (int *)malloc(sizeof(int)); + int buffer[ROW]; + *x = 2; + // not shrinkable since x has a self-update in boo() + for(int i = 0; i < ROW; i++) + { + boo(x); + buffer[*x] = 1; + } +} diff --git a/regression/goto-instrument/abstract-loops3/test.desc b/regression/goto-instrument/abstract-loops3/test.desc new file mode 100644 index 00000000000..937e977ed43 --- /dev/null +++ b/regression/goto-instrument/abstract-loops3/test.desc @@ -0,0 +1,17 @@ +KNOWNBUG +main.c +--bounds-check --pointer-check --add-library --abstract-loops --abstractset main.0 +^EXIT=10$ +^SIGNAL=0$ +^VERIFICATION FAILED$ +main\.0 +-- +ASSUME i < 10 +ASSUME i >= 0 +^warning: ignoring +-- +This test ensures that the loops with loop variable i are not shrunk (so they +appear in unwinding), even when the instruction that makes the loop unshrinkable +is in another function. + +Requires #2646, #2694 to work. diff --git a/src/analyses/dependence_graph.cpp b/src/analyses/dependence_graph.cpp index d80ad819256..dc47d6fa543 100644 --- a/src/analyses/dependence_graph.cpp +++ b/src/analyses/dependence_graph.cpp @@ -238,6 +238,16 @@ void dep_graph_domaint::transform( data_dependencies(from, function_to, to, *dep_graph, ns); } +dep_graph_domaint::depst dep_graph_domaint::get_data_deps() const +{ + return data_deps; +} + +dep_graph_domaint::depst dep_graph_domaint::get_control_deps() const +{ + return control_deps; +} + void dep_graph_domaint::output( std::ostream &out, const ai_baset &, diff --git a/src/analyses/dependence_graph.h b/src/analyses/dependence_graph.h index 8e78f583abe..0f1e7461a8f 100644 --- a/src/analyses/dependence_graph.h +++ b/src/analyses/dependence_graph.h @@ -93,6 +93,10 @@ class dep_graph_domaint:public ai_domain_baset const ai_baset &ai, const namespacet &ns) const final override; + typedef std::set depst; + depst get_data_deps() const; + depst get_control_deps() const; + jsont output_json( const ai_baset &ai, const namespacet &ns) const override; @@ -182,7 +186,6 @@ class dep_graph_domaint:public ai_domain_baset node_indext node_id; bool has_changed; - typedef std::set depst; // Set of locations with control instructions on which the instruction at this // location has a control dependency on diff --git a/src/analyses/goto_rw.cpp b/src/analyses/goto_rw.cpp index e047991b8bf..a947334cc8a 100644 --- a/src/analyses/goto_rw.cpp +++ b/src/analyses/goto_rw.cpp @@ -493,6 +493,23 @@ void rw_range_sett::add( static_cast(*entry->second).push_back( {range_start, range_end}); + + // add to the single expression read set + if(mode == get_modet::READ && expr_r_range_set.has_value()) + { + objectst::iterator expr_entry = + expr_r_range_set + ->insert( + std::pair>( + identifier, nullptr)) + .first; + + if(expr_entry->second == nullptr) + expr_entry->second = util_make_unique(); + + static_cast(*expr_entry->second) + .push_back({range_start, range_end}); + } } void rw_range_sett::get_objects_rec( diff --git a/src/analyses/goto_rw.h b/src/analyses/goto_rw.h index 49ce2de73c7..d191d1bd4f5 100644 --- a/src/analyses/goto_rw.h +++ b/src/analyses/goto_rw.h @@ -121,8 +121,7 @@ class rw_range_sett virtual ~rw_range_sett(); - explicit rw_range_sett(const namespacet &_ns): - ns(_ns) + explicit rw_range_sett(const namespacet &_ns) : ns(_ns) { } @@ -136,6 +135,21 @@ class rw_range_sett return w_range_set; } + /// Enable maintaining a read set for a single expression + void enable_expr_read_set() + { + expr_r_range_set = objectst{}; + } + + /// Obtain the read set for a single expression. Requires a prior call to + /// \ref enable_expr_read_set. + objectst fetch_expr_read_set() + { + objectst result = std::move(*expr_r_range_set); + expr_r_range_set.reset(); + return result; + } + const range_domaint &get_ranges(objectst::const_iterator it) const { PRECONDITION(dynamic_cast(it->second.get())!=nullptr); @@ -167,6 +181,7 @@ class rw_range_sett const namespacet &ns; objectst r_range_set, w_range_set; + optionalt expr_r_range_set; virtual void get_objects_rec( get_modet mode, diff --git a/src/analyses/reaching_definitions.cpp b/src/analyses/reaching_definitions.cpp index 6912385a90a..b475f2b961d 100644 --- a/src/analyses/reaching_definitions.cpp +++ b/src/analyses/reaching_definitions.cpp @@ -211,22 +211,33 @@ void rd_range_domaint::transform_function_call( ++it; } - const symbol_exprt &fn_symbol_expr=to_symbol_expr(code.function()); - const code_typet &code_type= - to_code_type(ns.lookup(fn_symbol_expr.get_identifier()).type); + rw_range_set_value_sett rw_set(ns, rd.get_value_sets()); - for(const auto ¶m : code_type.parameters()) + const code_typet &code_type = to_code_type(ns.lookup(function_to).type); + PRECONDITION(code_type.parameters().size() == code.arguments().size()); + auto arg_it = code.arguments().begin(); + for(const auto ¶meter : code_type.parameters()) { - const irep_idt &identifier=param.get_identifier(); + const irep_idt &identifier = parameter.get_identifier(); - if(identifier.empty()) - continue; + // get read set of the argument + rw_set.enable_expr_read_set(); + rw_set.get_objects_rec( + function_from, from, rw_range_sett::get_modet::READ, *arg_it); - auto param_bits = pointer_offset_bits(param.type(), ns); - if(param_bits.has_value()) - gen(from, identifier, 0, to_range_spect(*param_bits)); - else - gen(from, identifier, 0, -1); + for(const auto &r_set_pair : rw_set.fetch_expr_read_set()) + { + const rd_range_domaint::ranges_at_loct &w_ranges = + rd[from].get(r_set_pair.first); + for(const auto &w_range : w_ranges) + { + for(const auto &wr : w_range.second) + gen(w_range.first, identifier, wr.first, wr.second); + } + } + + // next argument/parameter pair + ++arg_it; } } else diff --git a/src/cbmc/Makefile b/src/cbmc/Makefile index a8030dbe428..31700d56f67 100644 --- a/src/cbmc/Makefile +++ b/src/cbmc/Makefile @@ -40,6 +40,7 @@ OBJ += ../ansi-c/ansi-c$(LIBEXT) \ ../goto-instrument/reachability_slicer$(OBJEXT) \ ../goto-instrument/nondet_static$(OBJEXT) \ ../goto-instrument/full_slicer$(OBJEXT) \ + ../goto-instrument/abstract_loops$(OBJEXT) \ ../goto-instrument/unwindset$(OBJEXT) \ ../analyses/analyses$(LIBEXT) \ ../langapi/langapi$(LIBEXT) \ diff --git a/src/cbmc/cbmc_parse_options.cpp b/src/cbmc/cbmc_parse_options.cpp index 43b24793e1f..c3542fb11fe 100644 --- a/src/cbmc/cbmc_parse_options.cpp +++ b/src/cbmc/cbmc_parse_options.cpp @@ -67,10 +67,10 @@ Author: Daniel Kroening, kroening@kroening.com #include #include -#include +#include #include #include -#include +#include #include @@ -159,6 +159,12 @@ void cbmc_parse_optionst::get_command_line_options(optionst &options) if(cmdline.isset("full-slice")) options.set_option("full-slice", true); + if(cmdline.isset("abstract-loops")) + options.set_option("abstract-loops", true); + + if(cmdline.isset("abstractset")) + options.set_option("abstractset", cmdline.get_value("abstractset")); + if(cmdline.isset("show-symex-strategies")) { status() << show_path_strategies() << eom; @@ -892,6 +898,24 @@ bool cbmc_parse_optionst::process_goto_program( reachability_slicer(goto_model); } + if(options.get_bool_option("abstract-loops")) + { + // make sure location numbers are set + goto_model.goto_functions.update(); + + log.status() << "Abstracting loops" << eom; + loop_mapt target_loop_map; + if(options.is_set("abstractset")) + { + if(parse_absloopset(options.get_option("abstractset"), target_loop_map)) + { + log.error() << "failed to parse input loop" << eom; + return true; + } + } + abstract_loops(goto_model, target_loop_map); + } + // full slice? if(options.get_bool_option("full-slice")) { @@ -990,6 +1014,7 @@ void cbmc_parse_optionst::help() HELP_REACHABILITY_SLICER_FB " --full-slice run full slicer (experimental)\n" // NOLINT(*) " --drop-unused-functions drop functions trivially unreachable from main function\n" // NOLINT(*) + HELP_ABSTRACT_LOOPS "\n" "Semantic transformations:\n" // NOLINTNEXTLINE(whitespace/line_length) diff --git a/src/cbmc/cbmc_parse_options.h b/src/cbmc/cbmc_parse_options.h index f3d63499913..203bb88b330 100644 --- a/src/cbmc/cbmc_parse_options.h +++ b/src/cbmc/cbmc_parse_options.h @@ -29,6 +29,8 @@ Author: Daniel Kroening, kroening@kroening.com #include +#include + #include #include "bmc.h" @@ -44,6 +46,7 @@ class optionst; OPT_FUNCTIONS \ "(no-simplify)(full-slice)" \ OPT_REACHABILITY_SLICER \ + OPT_ABSTRACT_LOOPS \ "(debug-level):(no-propagation)(no-simplify-if)" \ "(document-subgoals)(outfile):(test-preprocessor)" \ "D:I:(c89)(c99)(c11)(cpp98)(cpp03)(cpp11)" \ diff --git a/src/goto-instrument/Makefile b/src/goto-instrument/Makefile index 8dc28944f66..cbf6a13e7d2 100644 --- a/src/goto-instrument/Makefile +++ b/src/goto-instrument/Makefile @@ -1,4 +1,5 @@ -SRC = accelerate/accelerate.cpp \ +SRC = abstract_loops.cpp \ + accelerate/accelerate.cpp \ accelerate/acceleration_utils.cpp \ accelerate/all_paths_enumerator.cpp \ accelerate/cone_of_influence.cpp \ diff --git a/src/goto-instrument/abstract_loops.cpp b/src/goto-instrument/abstract_loops.cpp new file mode 100644 index 00000000000..f56df053189 --- /dev/null +++ b/src/goto-instrument/abstract_loops.cpp @@ -0,0 +1,689 @@ +/*******************************************************************\ + +Module: Loop shrinking + +Author: Zhixing Xu, zhixingx@princeton.edu + +\*******************************************************************/ + +#include "abstract_loops.h" + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +// #define DEBUG_ABSTRACT_LOOPS + +#ifdef DEBUG_ABSTRACT_LOOPS +#include +#endif + +#include + +class abstract_loopst +{ +public: + typedef goto_functionst::goto_functiont goto_functiont; + + abstract_loopst(goto_modelt &goto_model, const loop_mapt &_target_loop_map) + : target_loop_map(_target_loop_map), ns(goto_model.symbol_table) + { + // compute program dependence graph + dependence_grapht dep_graph(ns); + // data structures initialization + dep_graph(goto_model.goto_functions, ns); + + // map location numbers to function identifiers as the dependence graph does + // not provide a mapping from program counters to the function containing + // the instruction + for(const auto &gf_entry : goto_model.goto_functions.function_map) + { + for(const auto &instruction : gf_entry.second.body.instructions) + location_to_function[instruction.location_number] = gf_entry.first; + } + + // build the CFG data structure + cfg(goto_model.goto_functions); + // call main function + abstract_loops(goto_model, dep_graph); + } + +protected: + // target abstract loop number for each function + const loop_mapt &target_loop_map; + // map from location number to the set of loops it belongs to + typedef std::set nodeset; + std::map insloop_map; + // set of instructions in loop + typedef const natural_loops_mutablet::natural_loopt loopt; + // data dependency set + typedef dep_graph_domaint::depst depst; + // map location numbers to function identifiers + std::map location_to_function; + // name space for program + const namespacet ns; + // cfg + struct cfg_nodet + { + cfg_nodet() : node_required(false) + { + } + + bool node_required; + }; + + typedef cfg_baset cfgt; + cfgt cfg; + + void + abstract_loops(goto_modelt &goto_model, const dependence_grapht &dep_graph); + + void get_loop_info( + const irep_idt &function_id, + const loopt &, + const dependence_grapht &dep_graph); + + void update_shrinkability(unsigned loc, const irep_idt &function_id); + + void check_assertion( + unsigned location, + const dependence_grapht &dep_graph, + const goto_functionst &goto_functions); + + void abstract_goto_program( + const irep_idt &function_id, + unsigned loop_num, + const dependence_grapht &dep_graph, + goto_functionst &goto_functions); + + /// add an element to unique queue + /// \param s: set to keep queue elements unique + /// \param q: the queue + /// \param e: element add to queue + void add_to_queue(nodeset &s, std::queue &q, unsigned e) + { + if(s.insert(e).second) + q.push(e); + } + + /// Check if the loop is a target for shrinking + /// \param function_id: function to contain loop + /// \param loop_num: loop_number to check + /// \return: true if the loop is shrinking target + bool check_target(const irep_idt &function_id, unsigned loop_num) const + { + auto function_entry = target_loop_map.find(function_id); + return target_loop_map.empty() || + (function_entry != target_loop_map.end() && + function_entry->second.find(loop_num) != + function_entry->second.end()); + } + + /// Check if a node has self-cycle in data dependency + /// Inefficient method, should get updated + /// \param target: node location + /// \param dep_graph: dependency graph for the program + bool is_in_cycle(unsigned target, const dependence_grapht &dep_graph) + { + nodeset dep_set; + std::queue dep_queue; + + depst data_deps = dep_graph[dep_graph[target].PC].get_data_deps(); + + for(const auto &dep : data_deps) + add_to_queue(dep_set, dep_queue, dep->location_number); + + while(!dep_queue.empty()) + { + unsigned node = dep_queue.front(); + if(node == target) + return true; + + dep_queue.pop(); + data_deps = dep_graph[dep_graph[node].PC].get_data_deps(); + + for(const auto &dep : data_deps) + add_to_queue(dep_set, dep_queue, dep->location_number); + } + + return false; + } + + // class to keep information of a single loop with simple helper function + class abstract_loopt + { + public: + abstract_loopt( + goto_programt::targett _head, + goto_programt::targett _tail, + irep_idt _loop_var) + : head(_head), tail(_tail), loop_var(_loop_var), shrinkable(true) + { + } + + // Loop variable dependency leaves + nodeset var_leaves; + // lines update loop variable value + nodeset var_updates; + // loop head and tail + goto_programt::targett head; + goto_programt::targett tail; + // Loop variable + irep_idt loop_var; + // loop variable initialization location + unsigned init_loc; + // loop shrinkable or not + bool shrinkable; + + /// make loop head an assumption(constraint) for loop variable + /// skip the goto at loop end + void build_assumption() + { + // make the goto inst into an assumption + head->make_assumption(boolean_negate(head->guard)); + // skip loop end line + tail->turn_into_skip(); + } + }; + + // map from the function name to loop information map + typedef std::map loopnum_mapt; + std::map absloop_map; + +#ifdef DEBUG_ABSTRACT_LOOPS + /// print out nodes in set + static void print_nodes(abstract_loopst::nodeset &node_set) + { + for(abstract_loopst::nodeset::iterator it = node_set.begin(); + it != node_set.end(); + ++it) + { + if(it != node_set.begin()) + std::cout << ","; + std::cout << *it; + } + std::cout << '\n'; + } +#endif +}; // end of class abstract_loopst + +/// Do the abstraction on goto program (modify goto program) +/// \param function_id: identifier of function containing loop +/// \param loop_num: target loop number +/// \param dep_graph: dependency graph for the program +/// \param goto_functions: goto_functions to abstract +void abstract_loopst::abstract_goto_program( + const irep_idt &function_id, + unsigned loop_num, + const dependence_grapht &dep_graph, + goto_functionst &goto_functions) +{ + abstract_loopt *loop_info = + &(absloop_map[function_id].find(loop_num)->second); + +#ifdef DEBUG_ABSTRACT_LOOPS + std::cout << "==Shrink loop #" << loop_num << " in " << function_id << "\n"; +#endif + + unsigned update_loc = *(loop_info->var_updates.begin()); + bool is_inc = true; + goto_functionst::function_mapt::iterator f_it = + goto_functions.function_map.find( + location_to_function.at(dep_graph[update_loc].PC->location_number)); + CHECK_RETURN(!f_it->second.body.instructions.empty()); + goto_programt::targett i_it = f_it->second.body.instructions.begin(); + std::advance(i_it, update_loc - i_it->location_number); + + INVARIANT(i_it->is_assign(), "instruction should be an assignment"); + exprt expr = to_code_assign(i_it->code).rhs(); + // This assumes loop variable is updated by +/- + if(expr.id() == ID_plus) + is_inc = true; + else if(expr.id() == ID_minus) + is_inc = false; + else // do not do abstraction + { +#ifdef DEBUG_ABSTRACT_LOOPS + std::cout << "Unshrinkable: Loop variable change not by +/-\n"; +#endif + return; + } + // skip variable update line + i_it->make_skip(); + + // change the initial assignment into assumption + f_it = goto_functions.function_map.find(function_id); + i_it = f_it->second.body.instructions.begin(); + std::advance(i_it, loop_info->init_loc - i_it->location_number); + expr = i_it->code; + expr.id(is_inc ? ID_ge : ID_le); + expr.type().id(ID_bool); + i_it->make_assumption(expr); + + // change loop head into assumption, skip goto at loop end + loop_info->build_assumption(); +} + +/// Mark all loops containing the assignment unshrinkable +/// \param loc: location number +/// \param function_id: function containing the instruction +void abstract_loopst::update_shrinkability( + unsigned loc, + const irep_idt &function_id) +{ + for(auto loop_n : insloop_map[loc]) + { + abstract_loopt *loop_info = + &(absloop_map[function_id].find(loop_n)->second); + loop_info->shrinkable = false; +#ifdef DEBUG_ABSTRACT_LOOPS + std::cout << " - intermediate node " << loc << " makes " << function_id + << "::" << loop_n << " unshrinkable\n"; +#endif + } +} + +/// Check data dependency for all assertions +/// \param location: line number for assertion +/// \param dep_graph: dependency graph for the program +/// \param goto_functions: source goto functions +void abstract_loopst::check_assertion( + unsigned location, + const dependence_grapht &dep_graph, + const goto_functionst &goto_functions) +{ +#ifdef DEBUG_ABSTRACT_LOOPS + std::cout << "Check assertion at: " << location << "\n"; +#endif + nodeset leaf_set, update_set; + + goto_programt::const_targett i_it = dep_graph[location].PC; + depst data_deps = dep_graph[i_it].get_data_deps(); + depst control_deps = dep_graph[i_it].get_control_deps(); + nodeset dep_set, ctrl_set; + std::queue dep_queue; + + for(const auto &dep : data_deps) + add_to_queue(dep_set, dep_queue, dep->location_number); + + for(const auto &cdep : control_deps) + { + add_to_queue(dep_set, dep_queue, cdep->location_number); + ctrl_set.insert(cdep->location_number); + } + + while(!dep_queue.empty()) + { + unsigned node = dep_queue.front(); + dep_queue.pop(); + data_deps = dep_graph[dep_graph[node].PC].get_data_deps(); + control_deps = dep_graph[dep_graph[node].PC].get_control_deps(); + if(!data_deps.empty()) + { + for(const auto &dep : data_deps) + { + add_to_queue(dep_set, dep_queue, dep->location_number); + if(ctrl_set.find(node) == ctrl_set.end()) + update_set.insert(node); + } + } + else + leaf_set.insert(node); + + for(const auto &cdep : control_deps) + { + add_to_queue(dep_set, dep_queue, cdep->location_number); + ctrl_set.insert(cdep->location_number); + } + } +#ifdef DEBUG_ABSTRACT_LOOPS + std::cout << " Leaf dependency nodes: "; + print_nodes(leaf_set); + std::cout << " Update node: "; + print_nodes(update_set); +#endif + + // node set + nodeset leaf_nodes; + nodeset update_nodes; + for(auto loop_n : insloop_map[location]) + { + irep_idt func = + location_to_function.at(dep_graph[location].PC->location_number); + abstract_loopt *loop_info = &(absloop_map[func].find(loop_n)->second); + leaf_nodes.insert( + loop_info->var_leaves.begin(), loop_info->var_leaves.end()); + update_nodes.insert( + loop_info->var_updates.begin(), loop_info->var_updates.end()); + } + // check dependence of leaf set + nodeset diff; + std::set_difference( + leaf_set.begin(), + leaf_set.end(), + leaf_nodes.begin(), + leaf_nodes.end(), + std::inserter(diff, diff.begin())); + // loops containing the update nodes should not be shrinkable + for(auto loc : diff) + { + update_shrinkability( + loc, location_to_function.at(dep_graph[loc].PC->location_number)); + } + + // check dependence of update set + diff.clear(); + std::set_difference( + update_set.begin(), + update_set.end(), + update_nodes.begin(), + update_nodes.end(), + std::inserter(diff, diff.begin())); + // loops containing the update nodes should not be shrinkable + for(auto loc : diff) + { + if(is_in_cycle(loc, dep_graph)) + { + nodeset entry_set; + std::queue entry_queue; + add_to_queue(entry_set, entry_queue, loc); + while(!entry_queue.empty()) + { + unsigned node = entry_queue.front(); + entry_queue.pop(); + const irep_idt &function_id = + location_to_function.at(dep_graph[node].PC->location_number); + update_shrinkability(node, function_id); + // check loops calling this function + goto_functionst::function_mapt::const_iterator f_it = + goto_functions.function_map.find(function_id); + CHECK_RETURN(f_it != goto_functions.function_map.end()); + + goto_programt::const_targett begin_function = + f_it->second.body.instructions.begin(); + + cfgt::entry_mapt::const_iterator entry = + cfg.entry_map.find(begin_function); + if(entry != cfg.entry_map.end()) + { + for(const auto &edge : cfg[entry->second].in) + add_to_queue(entry_set, entry_queue, edge.first); + } + } + } + else + { + // check if the update depends on loop variable + const goto_programt::instructiont &inst = *(dep_graph[loc].PC); + if(!inst.is_assign()) + continue; + value_setst &value_sets = + dep_graph.reaching_definitions().get_value_sets(); + rw_range_set_value_sett rw_set(ns, value_sets); + const code_assignt &code_assign = to_code_assign(inst.code); + rw_set.get_objects_rec( + location_to_function.at(dep_graph[loc].PC->location_number), + dep_graph[loc].PC, + rw_range_sett::get_modet::READ, + code_assign.lhs()); + bool dep_on_loop_var = false; + for(auto loop_n : insloop_map[loc]) + { + const irep_idt &func = + location_to_function.at(dep_graph[loc].PC->location_number); + abstract_loopt *loop_info = &(absloop_map[func].find(loop_n)->second); + if( + rw_set.get_r_set().find(loop_info->loop_var) != + rw_set.get_r_set().end()) + { + dep_on_loop_var = true; + } + } + if(dep_on_loop_var) + { + update_shrinkability( + loc, location_to_function.at(dep_graph[loc].PC->location_number)); + } + } + } +} + +/// Get loop information, create class to store the info +/// \param function_id: Name of the function the loop is part of +/// \param loop: list of instructions in loop +/// \param dep_graph: dependency graph for the program +void abstract_loopst::get_loop_info( + const irep_idt &function_id, + const loopt &loop, + const dependence_grapht &dep_graph) +{ + PRECONDITION(!loop.empty()); + + // construct loop map, make instructions ordered + std::map loop_map; + for(loopt::const_iterator l_it = loop.begin(); l_it != loop.end(); l_it++) + loop_map[(*l_it)->location_number] = *l_it; + + goto_programt::targett head = loop_map.begin()->second; + goto_programt::targett last = (--loop_map.end())->second; + CHECK_RETURN(last->is_backwards_goto()); + +#ifdef DEBUG_ABSTRACT_LOOPS + std::cout << "Loop " << function_id << "::" << last->loop_number + << "\n- head location: " << head->location_number << "\n"; +#endif + + // Only deal with loops starting with goto instruction + if(!head->is_goto()) + { +#ifdef DEBUG_ABSTRACT_LOOPS + std::cout << "Unshrinkable: only deal with loop start with goto\n"; +#endif + return; + } + + // identify loop variable + value_setst &value_sets = dep_graph.reaching_definitions().get_value_sets(); + rw_range_set_value_sett rw_set(ns, value_sets); + goto_rw(function_id, head, rw_set); + // can only handle single loop guard with one symbol now + if(rw_set.get_r_set().size() != 1) + { + // do nothing, won't record information of this loop +#ifdef DEBUG_ABSTRACT_LOOPS + std::cout << "Unshrinkable: loop condition with multiple variable\n"; +#endif + return; + } + irep_idt loop_var = rw_set.get_r_set().begin()->first; +#ifdef DEBUG_ABSTRACT_LOOPS + std::cout << "- loop variable: " << loop_var << "\n"; +#endif + + abstract_loopt loop_info(head, last, loop_var); + + // find dependence nodes for the loop variable + depst data_deps = dep_graph[head].get_data_deps(); + nodeset dep_set, direct_dep; + std::queue dep_queue; + + for(const auto &dep : data_deps) + { + add_to_queue(dep_set, dep_queue, dep->location_number); + direct_dep.insert(dep->location_number); + } + + while(!dep_queue.empty()) + { + unsigned node = dep_queue.front(); + dep_queue.pop(); + data_deps = dep_graph[dep_graph[node].PC].get_data_deps(); + if(!data_deps.empty()) + { + for(const auto &dep : data_deps) + { + add_to_queue(dep_set, dep_queue, dep->location_number); + if(is_in_cycle(node, dep_graph)) + loop_info.var_updates.insert(node); + } + } + else + loop_info.var_leaves.insert(node); + } + + // Only handle loops with simple loop variable initialization and update + // TODO: handle situation where loop variable is updated multiple times + if(loop_info.var_updates.size() != 1) + { +#ifdef DEBUG_ABSTRACT_LOOPS + std::cout << "Unshrinkable: Loop variable has multiple/no update\n"; +#endif + return; + } + // get loop variable initialization location + nodeset diff; + std::set_difference( + direct_dep.begin(), + direct_dep.end(), + loop_info.var_updates.begin(), + loop_info.var_updates.end(), + std::inserter(diff, diff.begin())); + if(diff.size() == 1) + loop_info.init_loc = *(diff.begin()); + else + { +#ifdef DEBUG_ABSTRACT_LOOPS + std::cout << "Unshrinkable: Loop variable has no/multiple init\n"; +#endif + return; + } + + // This is for SVcomp cases where values can be nondet + const code_assignt &code_assign = + to_code_assign(dep_graph[loop_info.init_loc].PC->code); + if(code_assign.rhs().id() == ID_side_effect) + { +#ifdef DEBUG_ABSTRACT_LOOPS + std::cout << "Unshrinkable: Loop variable init to nondet\n"; +#endif + return; + } + +#ifdef DEBUG_ABSTRACT_LOOPS + std::cout << " Leaf dependency nodes for loop variable: "; + print_nodes(loop_info.var_leaves); + std::cout << " Update node for loop variable: "; + print_nodes(loop_info.var_updates); +#endif + + // add to the instruction # -> loop # map + // unsafe with function call, further check when doing abstraction + for(unsigned n = head->location_number; n <= last->location_number; n++) + insloop_map[n].insert(last->loop_number); + + absloop_map[function_id].emplace(last->loop_number, loop_info); +} + +/// Main function for abstraction, has 3 iterations to +/// 1. Identify loop variables and their dependency +/// 2. Check assertions in program and their dependency +/// 3. Abstract shrinkable loop in goto program +/// \param goto_model: input goto model to modify +/// \param dep_graph: dependency graph for the program +void abstract_loopst::abstract_loops( + goto_modelt &goto_model, + const dependence_grapht &dep_graph) +{ + // iter over loops to identify all loop variables and their dependence + Forall_goto_functions(it, goto_model.goto_functions) + { + if(!it->second.body_available()) + continue; + + natural_loops_mutablet natural_loops((it->second).body); + for(const auto &loop : natural_loops.loop_map) + get_loop_info(it->first, loop.second, dep_graph); + } + + // iter over all assertions to check their dependence + Forall_goto_functions(it, goto_model.goto_functions) + { + if(!it->second.body_available()) + continue; + + Forall_goto_program_instructions(i_it, it->second.body) + { + if(i_it->is_assert()) + { + check_assertion( + i_it->location_number, dep_graph, goto_model.goto_functions); + } + } + } + + // abstract the shrinkable loops + for(const auto &abstraction_entry : absloop_map) + { + const irep_idt &function_id = abstraction_entry.first; + + for(const auto &loop_entry : abstraction_entry.second) + { + if( + loop_entry.second.shrinkable && + check_target(function_id, loop_entry.first)) + { + abstract_goto_program( + function_id, loop_entry.first, dep_graph, goto_model.goto_functions); + } + } + } +} + +/// Create abstract_loopst struct for given goto program +/// \param goto_model: the goto progarm model +/// \param target_loop_map: the target_loops to shrink +void abstract_loops(goto_modelt &goto_model, const loop_mapt &target_loop_map) +{ + abstract_loopst(goto_model, target_loop_map); + remove_skip(goto_model); +} + +/// Parse input of target loops. +/// Similar to the function with same name in skip_loops.cpp +/// \param loop_ids: input string of target loop +/// \param [out] funcloop_map: a map from function name to loop number +/// \return return false if parse succeeds +static bool parse_loop_ids(const std::string &loop_ids, loop_mapt &funcloop_map) +{ + std::vector loops; + split_string(loop_ids, ',', loops, true, true); + + for(const auto &loop : loops) + { + std::string::size_type delim = loop.rfind("."); + + if(delim == std::string::npos) + return true; + + const std::string function_id = loop.substr(0, delim); + const unsigned nr = safe_string2unsigned(loop.substr(delim + 1)); + + funcloop_map[function_id].insert(nr); + } + + return false; +} + +bool parse_absloopset(const std::string &inputset, loop_mapt &target_loop_map) +{ + return parse_loop_ids(inputset, target_loop_map); +} diff --git a/src/goto-instrument/abstract_loops.h b/src/goto-instrument/abstract_loops.h new file mode 100644 index 00000000000..0d4189700a9 --- /dev/null +++ b/src/goto-instrument/abstract_loops.h @@ -0,0 +1,46 @@ +/*******************************************************************\ + +Module: Loop shrinking + +Author: Zhixing Xu, zhixingx@princeton.edu + +\*******************************************************************/ + +/// \file +/// Loop shrinking. Based on "Property Checking Array Programs Using +/// Loop Shrinking" by Shrawan Kumar, Amitabha Sanyal, Venkatesh R and Punit +/// Shah, TACAS 2018. + +#include +#include +#include + +#include + +#ifndef CPROVER_GOTO_INSTRUMENT_ABSTRACT_LOOPS_H +#define CPROVER_GOTO_INSTRUMENT_ABSTRACT_LOOPS_H + +class goto_modelt; + +typedef std::set loop_idst; +typedef std::map loop_mapt; + +void abstract_loops(goto_modelt &goto_model, const loop_mapt &target_loop_map); + +/// Parse target loops in string format. +/// \param inputset: input of loops to abstract in string format +/// \param [out] target_loop_map: a map from function name to loop number +/// \return return false if parse succeeds +bool parse_absloopset(const std::string &inputset, loop_mapt &target_loop_map); + +// clang-format off +#define OPT_ABSTRACT_LOOPS \ + "(abstract-loops)(abstractset):" + +#define HELP_ABSTRACT_LOOPS \ + /* NOLINTNEXTLINE(whitespace/line_length) */ \ + " --abstract-loops shrink loop based on assertion dependency (experimental)\n" \ + " --abstractset L,... only shrink loop L,...\n" +// clang-format on + +#endif // CPROVER_GOTO_INSTRUMENT_ABSTRACT_LOOPS_H diff --git a/src/goto-instrument/goto_instrument_parse_options.cpp b/src/goto-instrument/goto_instrument_parse_options.cpp index 840b395ad9a..681a6bd9efe 100644 --- a/src/goto-instrument/goto_instrument_parse_options.cpp +++ b/src/goto-instrument/goto_instrument_parse_options.cpp @@ -1387,6 +1387,19 @@ void goto_instrument_parse_optionst::instrument_goto_program() havoc_loops(goto_model); } + if(cmdline.isset("abstract-loops")) + { + // make sure location numbers are set + goto_model.goto_functions.update(); + + status() << "Abstracting loops" << eom; + loop_mapt target_loop_map; + if(cmdline.isset("abstractset")) + if(parse_absloopset(cmdline.get_value("abstractset"), target_loop_map)) + throw "failed to parse input loop"; + abstract_loops(goto_model, target_loop_map); + } + if(cmdline.isset("k-induction")) { bool base_case=cmdline.isset("base-case"); @@ -1638,6 +1651,7 @@ void goto_instrument_parse_optionst::help() " --havoc-loops over-approximate all loops\n" " --accelerate add loop accelerators\n" " --skip-loops add gotos to skip selected loops during execution\n" // NOLINT(*) + HELP_ABSTRACT_LOOPS "\n" "Memory model instrumentations:\n" " --mm instruments a weak memory model\n" diff --git a/src/goto-instrument/goto_instrument_parse_options.h b/src/goto-instrument/goto_instrument_parse_options.h index 647ec904b0b..0796bcce8d2 100644 --- a/src/goto-instrument/goto_instrument_parse_options.h +++ b/src/goto-instrument/goto_instrument_parse_options.h @@ -29,6 +29,7 @@ Author: Daniel Kroening, kroening@kroening.com #include +#include "abstract_loops.h" #include "aggressive_slicer.h" #include "generate_function_bodies.h" @@ -84,6 +85,7 @@ Author: Daniel Kroening, kroening@kroening.com "(cav11)" \ OPT_TIMESTAMP \ "(show-natural-loops)(accelerate)(havoc-loops)" \ + OPT_ABSTRACT_LOOPS \ "(error-label):(string-abstraction)" \ "(verbosity):(version)(xml-ui)(json-ui)(show-loops)" \ "(accelerate)(constant-propagator)" \ diff --git a/unit/Makefile b/unit/Makefile index 26de90c719b..c844baa0130 100644 --- a/unit/Makefile +++ b/unit/Makefile @@ -117,6 +117,7 @@ BMC_DEPS =../src/cbmc/all_properties$(OBJEXT) \ ../src/goto-instrument/reachability_slicer$(OBJEXT) \ ../src/goto-instrument/nondet_static$(OBJEXT) \ ../src/goto-instrument/full_slicer$(OBJEXT) \ + ../src/goto-instrument/abstract_loops$(OBJEXT) \ ../src/goto-instrument/unwindset$(OBJEXT) \ ../src/xmllang/xmllang$(LIBEXT) \ ../src/goto-symex/goto-symex$(LIBEXT) \