diff --git a/src/path-symex/locs.cpp b/src/path-symex/locs.cpp index 8f6dd19..bcd4eaf 100644 --- a/src/path-symex/locs.cpp +++ b/src/path-symex/locs.cpp @@ -82,23 +82,60 @@ void locst::build(const goto_functionst &goto_functions) } } +void locst::output_reachable(std::ostream &out) const +{ + irep_idt function; + int reachable_count = 0; + int unreachable_count = 0; + + for(unsigned l = 0; l < loc_vector.size(); l++) + { + const loct &loc = loc_vector[l]; + if(loc.distance_to_property != -1) + { + reachable_count++; + if(function != loc.function) + { + function = loc.function; + out << "*** " << function << "\n"; + } + + out << " L" << l << ": " << " " << as_string(ns, *loc.target) + << " path length: " << loc_vector[l].distance_to_property << "\n"; + + if(!loc.branch_target.is_nil()) + out << " T: " << loc.branch_target << "\n"; + } + else + unreachable_count++; + } + out << "\n"; + out << "The entry location is L" << entry_loc << ".\n"; + out << "Number of reachable locs " << reachable_count << "\n"; + out << "Number of unreachable locs " << unreachable_count << "\n"; +} + void locst::output(std::ostream &out) const { irep_idt function; - for(unsigned l=0; ltype << " " // << loc.target->location - << " " << as_string(ns, *loc.target) << "\n"; + << " " << as_string(ns, *loc.target); + if(loc_vector[l].distance_to_property + != std::numeric_limits::max()) + out << " path length: " << loc_vector[l].distance_to_property; + out << "\n"; if(!loc.branch_target.is_nil()) out << " T: " << loc.branch_target << "\n"; diff --git a/src/path-symex/locs.h b/src/path-symex/locs.h index a7f54c9..1ec7c97 100644 --- a/src/path-symex/locs.h +++ b/src/path-symex/locs.h @@ -27,11 +27,12 @@ struct loct target(_target), function(_function) { + distance_to_property=std::numeric_limits::max(); } goto_programt::const_targett target; irep_idt function; - + std::size_t distance_to_property; // we only support a single branch target loc_reft branch_target; }; @@ -58,6 +59,7 @@ class locst explicit locst(const namespacet &_ns); void build(const goto_functionst &goto_functions); void output(std::ostream &out) const; + void output_reachable(std::ostream &out) const; loct &operator[] (loc_reft l) { diff --git a/src/path-symex/path_symex_state.h b/src/path-symex/path_symex_state.h index fc3597d..3b042ff 100644 --- a/src/path-symex/path_symex_state.h +++ b/src/path-symex/path_symex_state.h @@ -208,6 +208,11 @@ struct path_symex_statet return depth; } + unsigned get_shortest_path() const + { + return locs.loc_vector[pc().loc_number].distance_to_property; + } + void increase_depth() { depth++; diff --git a/src/symex/Makefile b/src/symex/Makefile index b14e327..3c94eee 100644 --- a/src/symex/Makefile +++ b/src/symex/Makefile @@ -2,6 +2,7 @@ SRC = path_search.cpp \ symex_cover.cpp \ symex_main.cpp \ symex_parse_options.cpp \ + shortest_path_graph.cpp \ # Empty last line OBJ += ../../$(CPROVER_DIR)/src/ansi-c/ansi-c$(LIBEXT) \ diff --git a/src/symex/path_search.cpp b/src/symex/path_search.cpp index 18106ae..ca3a6a3 100644 --- a/src/symex/path_search.cpp +++ b/src/symex/path_search.cpp @@ -20,6 +20,106 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include "shortest_path_graph.h" + +#include + + +void path_searcht::sort_queue() +{ + debug()<< " get shortest path, queue.size = " <::max(); + + std::list::iterator it; + std::list::iterator closest_state; + + for(it=queue.begin(); it!=queue.end(); ++it) + { + if(it->get_shortest_path() < shortest_path) + { + shortest_path = it->get_shortest_path(); + closest_state = it; + } + } + + if(shortest_path != std::numeric_limits::max()) + { + current_distance = shortest_path; + statet tmp = *closest_state; + queue.erase(closest_state); + queue.push_front(tmp); + } + else + { + error() << "all states have shortest path length = MAX_UNSIGNED_INT, " + << "try removing function pointers with goto-instrument next time." + << "randomly picking state instead" + << eom; + shuffle_queue(queue); + } +} + +// We prioritise remaining in the same function, but if there is no choice +// we take the next state with the shortest path +void path_searcht::sort_queue_per_function() +{ + debug()<< " get shortest path, queue.size = " <::max(); + + std::list::iterator it; + std::list::iterator closest_state; + + // pick the first state in the queue that is a direct successor, i.e., + // has a path length 1 less than the current path length + for(it=queue.begin(); it!=queue.end(); ++it) + { + if(it->get_shortest_path()+1 == current_distance) + { + shortest_path = it->get_shortest_path(); + current_distance = shortest_path; + statet tmp = *it; + queue.erase(it); + queue.push_front(tmp); + return; + } + } + + // if we get here there was no direct successor, we revert to + // picking the shortest path + sort_queue(); +} + +void path_searcht::shuffle_queue(queuet &queue) +{ + if(queue.size()<=1) + return; + + INVARIANT(queue.size()::max(), + "Queue size must be less than maximum integer"); + // pick random state and move to front + int rand_i = rand() % queue.size(); + + std::list::iterator it = queue.begin(); + for(int i=0; i::max(); number_of_dropped_states=0; number_of_paths=0; number_of_VCCs=0; @@ -51,6 +175,26 @@ path_searcht::resultt path_searcht::operator()( absolute_timet last_reported_time=start_time; initialize_property_map(goto_functions); + if(search_heuristic == search_heuristict::SHORTEST_PATH || + search_heuristic == search_heuristict::RAN_SHORTEST_PATH ) + { + status()<<"Building shortest path graph" << eom; + shortest_path_grapht shortest_path_graph(goto_functions, locs); + } + else if(search_heuristic == search_heuristict::SHORTEST_PATH_PER_FUNC) + { + status()<<"Building shortest path graph per function" << eom; + per_function_shortest_patht shortest_path_graph(goto_functions, locs); + } + + statet init_state = initial_state(var_map, locs, history); + queue.push_back(init_state); + initial_distance_to_property=init_state.get_shortest_path(); + + time_periodt initialisation_time=current_time()-start_time; + status() << "Initialisation took "<< initialisation_time << "s" << eom; + start_time=current_time(); + last_reported_time=start_time; while(!queue.empty()) { @@ -114,8 +258,13 @@ path_searcht::resultt path_searcht::operator()( << " thread " << state.get_current_thread()+1 << '/' << state.threads.size() << " PC " << state.pc() - << " depth " << state.get_depth() - << " [" << number_of_steps << " steps, " + << " depth " << state.get_depth(); + + if(search_heuristic == search_heuristict::SHORTEST_PATH + || search_heuristic == search_heuristict::RAN_SHORTEST_PATH) + status() << " distance to property " << state.get_shortest_path(); + + status() << " [" << number_of_steps << " steps, " << running_time << "s]" << messaget::eom; } } @@ -142,6 +291,9 @@ path_searcht::resultt path_searcht::operator()( // execute path_symex(state, tmp_queue); + if(search_heuristic == search_heuristict::RAN_DFS) + shuffle_queue(tmp_queue); + // put at head of main queue queue.splice(queue.begin(), tmp_queue); } @@ -208,6 +360,7 @@ void path_searcht::pick_state() switch(search_heuristic) { case search_heuristict::DFS: + case search_heuristict::RAN_DFS: // Picking the first one (most recently added) is a DFS. return; @@ -218,6 +371,18 @@ void path_searcht::pick_state() queue.splice(queue.begin(), queue, --queue.end(), queue.end()); return; + case search_heuristict::RAN_SHORTEST_PATH: + if(number_of_steps%1000==0) + shuffle_queue(queue); + else + sort_queue(); + return; + case search_heuristict::SHORTEST_PATH: + sort_queue(); + return; + case search_heuristict::SHORTEST_PATH_PER_FUNC: + sort_queue_per_function(); + return; case search_heuristict::LOCS: return; } diff --git a/src/symex/path_search.h b/src/symex/path_search.h index 9920723..4a112a8 100644 --- a/src/symex/path_search.h +++ b/src/symex/path_search.h @@ -80,6 +80,8 @@ class path_searcht:public safety_checkert bool stop_on_fail; // statistics + unsigned current_distance; + unsigned initial_distance_to_property; std::size_t number_of_dropped_states; std::size_t number_of_paths; std::size_t number_of_steps; @@ -107,8 +109,15 @@ class path_searcht:public safety_checkert }; void set_dfs() { search_heuristic=search_heuristict::DFS; } + void set_randomized_dfs() { search_heuristic=search_heuristict::RAN_DFS; } void set_bfs() { search_heuristic=search_heuristict::BFS; } void set_locs() { search_heuristic=search_heuristict::LOCS; } + void set_shortest_path() + { search_heuristic=search_heuristict::SHORTEST_PATH; } + void set_ran_shortest_path() + { search_heuristic=search_heuristict::RAN_SHORTEST_PATH; } + void set_shortest_path_per_function() + { search_heuristic=search_heuristict::SHORTEST_PATH_PER_FUNC; } typedef std::map property_mapt; property_mapt property_map; @@ -120,7 +129,13 @@ class path_searcht:public safety_checkert // The states most recently executed are at the head. typedef std::list queuet; queuet queue; - + /// Pick random element of queue and move to front + /// \param queue queue to be shuffled + void shuffle_queue(queuet &queue); + /// Move element with shortest distance to property + /// to the front of the queue + void sort_queue(); + void sort_queue_per_function(); // search heuristic void pick_state(); @@ -151,7 +166,9 @@ class path_searcht:public safety_checkert unsigned unwind_limit; unsigned time_limit; - enum class search_heuristict { DFS, BFS, LOCS } search_heuristic; + enum class search_heuristict + { DFS, RAN_DFS, BFS, LOCS, SHORTEST_PATH, + RAN_SHORTEST_PATH , SHORTEST_PATH_PER_FUNC } search_heuristic; source_locationt last_source_location; }; diff --git a/src/symex/shortest_path_graph.cpp b/src/symex/shortest_path_graph.cpp new file mode 100644 index 0000000..b1b6977 --- /dev/null +++ b/src/symex/shortest_path_graph.cpp @@ -0,0 +1,138 @@ +/*******************************************************************\ + +Module: Shortest path graph + +Author: elizabeth.polgreen@cs.ox.ac.uk + +\*******************************************************************/ + +#include "shortest_path_graph.h" + +#include + +void shortest_path_grapht::get_path_lengths_in_function() +{ + bool found_property = false; + bool found_end = false; + node_indext end_index; + node_indext property_index; + node_indext index = 0; + for(auto &n : nodes) + { + if(n.PC->is_assert()) + { + if(found_property == false) + { + n.is_property = true; + n.shortest_path_to_property = 0; + found_property = true; + property_index = index; + } + else + throw "shortest path search cannot be used with multiple properties"; + } + if(n.PC->is_end_function()) + { + end_index = index; + found_end = true; + } + index++; + } + + if(!found_property) + { + nodes[end_index].shortest_path_to_property = 0; + bfs(end_index); + } + else + bfs(property_index); + + write_lengths_to_locs(); +} + +void per_function_shortest_patht::build(const goto_functionst &goto_functions) +{ + forall_goto_functions(it, goto_functions) + if(it->second.body_available()) + { + shortest_path_grapht path_graph(it->second.body, locs); + } +} + +void shortest_path_grapht::bfs(node_indext property_index) +{ + // does BFS, not Dijkstra + // we hope the graph is sparse + std::vector frontier_set; + + frontier_set.reserve(nodes.size()); + + frontier_set.push_back(property_index); + nodes[property_index].visited = true; + + for(std::size_t d = 1; !frontier_set.empty(); ++d) + { + std::vector new_frontier_set; + + for(const auto &node_index : frontier_set) + { + const nodet &n = nodes[node_index]; + + // do all neighbors + // we go backwards through the graph + for(const auto &edge_in : n.in) + { + node_indext node_in = edge_in.first; + + if(!nodes[node_in].visited) + { + nodes[node_in].shortest_path_to_property = d; + nodes[node_in].visited = true; + new_frontier_set.push_back(node_in); + } + } + } + + frontier_set.swap(new_frontier_set); + } +} + +void shortest_path_grapht::write_lengths_to_locs() +{ + for(const auto &n : nodes) + { + loc_reft l = target_to_loc_map[n.PC]; + locs.loc_vector[l.loc_number].distance_to_property + = n.shortest_path_to_property; + } +} + +void shortest_path_grapht::get_path_lengths_to_property() +{ + node_indext property_index; + bool found_property=false; + for(node_indext n=0; nis_assert()) + { + if(found_property == false) + { + nodes[n].is_property = true; + nodes[n].shortest_path_to_property = 0; + working_set.insert(n); + property_index = n; + found_property = true; + } + else + throw "shortest path search cannot be used for multiple properties"; + } + } + if(!found_property) + throw "unable to find property"; + + bfs(property_index); + + write_lengths_to_locs(); +} + + diff --git a/src/symex/shortest_path_graph.h b/src/symex/shortest_path_graph.h new file mode 100644 index 0000000..9d1547e --- /dev/null +++ b/src/symex/shortest_path_graph.h @@ -0,0 +1,109 @@ +/*******************************************************************\ + +Module: Shortest path graph + +Author: elizabeth.polgreen@cs.ox.ac.uk + +\*******************************************************************/ + +#ifndef CPROVER_SYMEX_SHORTEST_PATH_GRAPH_H +#define CPROVER_SYMEX_SHORTEST_PATH_GRAPH_H + +#include +#include +#include +#include + + +struct shortest_path_nodet +{ + bool visited; + std::size_t shortest_path_to_property; + bool is_property; + shortest_path_nodet(): + visited(false), + is_property(false) + { + shortest_path_to_property = std::numeric_limits::max(); + } +}; + +/// \brief constructs a CFG of all program locations. Then computes +/// the shortest path from every program location to a single property. +/// WARNING: if more than one property is present in the graph, we will +/// use the first property found +/// The distances computed for each node are written to the corresponding +/// loct in the locst passed as param to the constructor. This allows us +/// to use these numbers to guide a symex search +/// \param goto functions to create the CFG from, locs struct made from the +/// same goto_functions +class shortest_path_grapht: public cfg_baset +{ +public: + explicit shortest_path_grapht( + const goto_functionst &_goto_functions, locst &_locs): + locs(_locs), + target_to_loc_map(_locs) + { + cfg_baset::operator()(_goto_functions); + get_path_lengths_to_property(); + } + + explicit shortest_path_grapht( + const goto_programt &_goto_program, locst &_locs): + locs(_locs), + target_to_loc_map(_locs) + { + cfg_baset::operator()(_goto_program); + get_path_lengths_in_function(); + } + +protected: + /// \brief writes the computed shortest path for every node + /// in the graph to the corresponding location in locst. + /// This is done so that we can use these numbers to guide + /// a search heuristic for symex + void write_lengths_to_locs(); + /// \brief computes the shortest path from every node in + /// the graph to a single property. WARNING: if more than one property + /// is present, we use the first one discovered. + /// Calls bfs() to do this. + void get_path_lengths_to_property(); + /// \brief computes the shortest path from every node in a + /// graph to the property, or the end of the funciton if + /// there is no property. + /// we assume the graph is a graph of a single function. + void get_path_lengths_in_function(); + /// \brief implements backwards BFS to compute distance from every node in + /// the graph to the node index given as parameter + /// \param destination node index + void bfs(node_indext property_index); + std::set working_set; + locst &locs; + target_to_loc_mapt target_to_loc_map; +}; + +/// \brief class contains CFG of program locations +/// for every function with the shortest distance to a +/// property, or the end of a function, calculated for each location +/// The distances computed for each node are written to the corresponding +/// loct in the locst passed as param to the constructor. This allows us +/// to use these numbers to guide a symex search +/// \param goto functions to create the CFGs from, locs struct made from the +/// same goto_functions +class per_function_shortest_patht +{ +public: + explicit per_function_shortest_patht( + const goto_functionst &_goto_functions, locst &_locs): + locs(_locs) + { + build(_goto_functions); + } + +protected: + locst &locs; + void build(const goto_functionst & goto_functions); +}; + +#endif /* CPROVER_SYMEX_SHORTEST_PATH_GRAPH_H */ diff --git a/src/symex/symex_parse_options.cpp b/src/symex/symex_parse_options.cpp index d28b638..8542cb5 100644 --- a/src/symex/symex_parse_options.cpp +++ b/src/symex/symex_parse_options.cpp @@ -58,6 +58,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include "path_search.h" +#include "shortest_path_graph.h" symex_parse_optionst::symex_parse_optionst(int argc, const char **argv): parse_options_baset(SYMEX_OPTIONS, argc, argv), @@ -202,6 +203,18 @@ int symex_parse_optionst::doit() return 0; } + if(cmdline.isset("show-distances-to-property")) + { + const namespacet ns(goto_model.symbol_table); + locst locs(ns); + locs.build(goto_model.goto_functions); + + shortest_path_grapht path_search_graph(goto_model.goto_functions, locs); + + locs.output_reachable(std::cout); + return 0; + } + // do actual Symex try @@ -232,7 +245,12 @@ int symex_parse_optionst::doit() safe_string2unsigned(cmdline.get_value("max-search-time"))); if(cmdline.isset("dfs")) - path_search.set_dfs(); + { + if(cmdline.isset("randomize")) + path_search.set_randomized_dfs(); + else + path_search.set_dfs(); + } if(cmdline.isset("bfs")) path_search.set_bfs(); @@ -240,6 +258,17 @@ int symex_parse_optionst::doit() if(cmdline.isset("locs")) path_search.set_locs(); + if(cmdline.isset("shortest-path")) + { + if(cmdline.isset("randomize")) + path_search.set_ran_shortest_path(); + else + path_search.set_shortest_path(); + } + + if(cmdline.isset("shortest-path-per-function")) + path_search.set_shortest_path_per_function(); + if(cmdline.isset("show-vcc")) { path_search.show_vcc=true; @@ -636,8 +665,17 @@ void symex_parse_optionst::help() " --context-bound nr limit number of context switches\n" " --branch-bound nr limit number of branches taken\n" " --max-search-time s limit search to approximately s seconds\n" + " --dfs use depth first search\n" + " --bfs use breadth first search\n" + "--shortest-path use shortest path guided search\n" + "--shortest-path-per-function computes shortest path locally and uses to guide symex search\n" // NOLINT(*) + " --randomize in conjunction with dfs to use randomized dfs\n" // NOLINT(*) + " in conjunction with shortest path to use randomized shortest path guided search\n" // NOLINT(*) + " --eager-infeasibility query solver early to determine whether a path is infeasible before searching it\n" // NOLINT(*) "\n" "Other options:\n" + " --show-distances-to-property\n" + " shows the (context free) shortest path from every reachable program location to a single property" // NOLINT(*) " --version show version and exit\n" " --xml-ui use XML-formatted output\n" " --verbosity # verbosity level\n" diff --git a/src/symex/symex_parse_options.h b/src/symex/symex_parse_options.h index 99fd3f6..e55d6ee 100644 --- a/src/symex/symex_parse_options.h +++ b/src/symex/symex_parse_options.h @@ -40,7 +40,9 @@ class optionst; "(little-endian)(big-endian)" \ "(error-label):(verbosity):(no-library)" \ "(version)" \ - "(bfs)(dfs)(locs)" \ + "(bfs)(dfs)(locs)(shortest-path)" \ + "(shortest-path-per-function)" \ + "(randomize)" \ "(cover):" \ "(i386-linux)(i386-macos)(i386-win32)(win32)(winx64)(gcc)" \ "(ppc-macos)(unsigned-char)" \ @@ -50,6 +52,7 @@ class optionst; "(show-locs)(show-vcc)(show-properties)(show-symbol-table)" \ "(drop-unused-functions)" \ "(object-bits):" \ + "(show-distances-to-property)" \ OPT_SHOW_GOTO_FUNCTIONS \ "(property):(trace)(show-trace)(stop-on-fail)(eager-infeasibility)" \ "(no-simplify)(no-unwinding-assertions)(no-propagation)" \