From afe32b76a13f59de78863f11529c314350b0011f Mon Sep 17 00:00:00 2001 From: martin Date: Tue, 24 Jul 2018 14:20:29 +0100 Subject: [PATCH 1/5] Refactor the methods that access "the abstract domain at a location". This allows us to be a lot more flexible about the relationship between location and domain rather than assuming it is a 1-to-1 map. --- src/analyses/ai.cpp | 7 +++-- src/analyses/ai.h | 31 ++++++++++++++----- src/goto-analyzer/static_simplifier.cpp | 26 ++++++++-------- src/goto-analyzer/static_verifier.cpp | 9 ++++-- .../unreachable_instructions.cpp | 4 +-- 5 files changed, 48 insertions(+), 29 deletions(-) diff --git a/src/analyses/ai.cpp b/src/analyses/ai.cpp index d2aa0c14bbb..f900f902145 100644 --- a/src/analyses/ai.cpp +++ b/src/analyses/ai.cpp @@ -50,7 +50,7 @@ void ai_baset::output( out << "**** " << i_it->location_number << " " << i_it->source_location << "\n"; - find_state(i_it).output(out, *this, ns); + abstract_state_before(i_it)->output(out, *this, ns); out << "\n"; #if 1 goto_program.output_instruction(ns, identifier, out, *i_it); @@ -101,7 +101,8 @@ jsont ai_baset::output_json( json_numbert(std::to_string(i_it->location_number)); location["sourceLocation"]= json_stringt(i_it->source_location.as_string()); - location["abstractState"]=find_state(i_it).output_json(*this, ns); + location["abstractState"] = + abstract_state_before(i_it)->output_json(*this, ns); // Ideally we need output_instruction_json std::ostringstream out; @@ -162,7 +163,7 @@ xmlt ai_baset::output_xml( "source_location", i_it->source_location.as_string()); - location.new_element(find_state(i_it).output_xml(*this, ns)); + location.new_element(abstract_state_before(i_it)->output_xml(*this, ns)); // Ideally we need output_instruction_xml std::ostringstream out; diff --git a/src/analyses/ai.h b/src/analyses/ai.h index aea76ea7480..ab4f4c411bf 100644 --- a/src/analyses/ai.h +++ b/src/analyses/ai.h @@ -82,15 +82,23 @@ class ai_baset finalize(); } + /// Accessing individual domains at particular locations + /// (without needing to know what kind of domain or history is used) + /// A pointer to a copy as the method should be const and + /// there are some non-trivial cases including merging domains, etc. + /// Intended for users of the abstract interpreter; don't use internally. + /// Returns the abstract state before the given instruction - virtual const ai_domain_baset & abstract_state_before( - goto_programt::const_targett t) const = 0; + /// PRECONDITION(l is dereferenceable) + virtual std::unique_ptr abstract_state_before(locationt l) const = 0; /// Returns the abstract state after the given instruction - virtual const ai_domain_baset & abstract_state_after( - goto_programt::const_targett t) const + virtual std::unique_ptr abstract_state_after(locationt l) const { - return abstract_state_before(std::next(t)); + /// PRECONDITION(l is dereferenceable && std::next(l) is dereferenceable) + /// Check relies on a DATA_INVARIANT of goto_programs + INVARIANT(!l->is_end_function(), "No state after the last instruction"); + return abstract_state_before(std::next(l)); } virtual void clear() @@ -304,10 +312,17 @@ class ait:public ai_baset return it->second; } - const ai_domain_baset & abstract_state_before( - goto_programt::const_targett t) const override + std::unique_ptr abstract_state_before(locationt t) const override { - return (*this)[t]; + typename state_mapt::const_iterator it = state_map.find(t); + if(it == state_map.end()) + { + std::unique_ptr d = util_make_unique(); + CHECK_RETURN(d->is_bottom()); + return d; + } + + return util_make_unique(it->second); } void clear() override diff --git a/src/goto-analyzer/static_simplifier.cpp b/src/goto-analyzer/static_simplifier.cpp index d83026698a4..7bf82c7723d 100644 --- a/src/goto-analyzer/static_simplifier.cpp +++ b/src/goto-analyzer/static_simplifier.cpp @@ -65,8 +65,8 @@ bool static_simplifier( if(i_it->is_assert()) { - bool unchanged= - ai.abstract_state_before(i_it).ai_simplify(i_it->guard, ns); + bool unchanged = + ai.abstract_state_before(i_it)->ai_simplify(i_it->guard, ns); if(unchanged) unmodified.asserts++; @@ -75,8 +75,8 @@ bool static_simplifier( } else if(i_it->is_assume()) { - bool unchanged= - ai.abstract_state_before(i_it).ai_simplify(i_it->guard, ns); + bool unchanged = + ai.abstract_state_before(i_it)->ai_simplify(i_it->guard, ns); if(unchanged) unmodified.assumes++; @@ -85,8 +85,8 @@ bool static_simplifier( } else if(i_it->is_goto()) { - bool unchanged= - ai.abstract_state_before(i_it).ai_simplify(i_it->guard, ns); + bool unchanged = + ai.abstract_state_before(i_it)->ai_simplify(i_it->guard, ns); if(unchanged) unmodified.gotos++; @@ -102,11 +102,11 @@ bool static_simplifier( // i=j // should simplify to i=1, not to 0=1. - bool unchanged_lhs= - ai.abstract_state_before(i_it).ai_simplify_lhs(assign.lhs(), ns); + bool unchanged_lhs = + ai.abstract_state_before(i_it)->ai_simplify_lhs(assign.lhs(), ns); - bool unchanged_rhs= - ai.abstract_state_before(i_it).ai_simplify(assign.rhs(), ns); + bool unchanged_rhs = + ai.abstract_state_before(i_it)->ai_simplify(assign.rhs(), ns); if(unchanged_lhs && unchanged_rhs) unmodified.assigns++; @@ -117,13 +117,13 @@ bool static_simplifier( { code_function_callt &fcall=to_code_function_call(i_it->code); - bool unchanged= - ai.abstract_state_before(i_it).ai_simplify(fcall.function(), ns); + bool unchanged = + ai.abstract_state_before(i_it)->ai_simplify(fcall.function(), ns); exprt::operandst &args=fcall.arguments(); for(auto &o : args) - unchanged&=ai.abstract_state_before(i_it).ai_simplify(o, ns); + unchanged &= ai.abstract_state_before(i_it)->ai_simplify(o, ns); if(unchanged) unmodified.function_calls++; diff --git a/src/goto-analyzer/static_verifier.cpp b/src/goto-analyzer/static_verifier.cpp index f4fbc6320b7..70789e4ca5c 100644 --- a/src/goto-analyzer/static_verifier.cpp +++ b/src/goto-analyzer/static_verifier.cpp @@ -55,7 +55,8 @@ bool static_verifier( continue; exprt e(i_it->guard); - const ai_domain_baset &domain(ai.abstract_state_before(i_it)); + auto dp = ai.abstract_state_before(i_it); + const ai_domain_baset &domain(*dp); domain.ai_simplify(e, ns); json_objectt &j=json_result.push_back().make_object(); @@ -104,7 +105,8 @@ bool static_verifier( continue; exprt e(i_it->guard); - const ai_domain_baset &domain(ai.abstract_state_before(i_it)); + auto dp = ai.abstract_state_before(i_it); + const ai_domain_baset &domain(*dp); domain.ai_simplify(e, ns); xmlt &x=xml_result.new_element("result"); @@ -160,7 +162,8 @@ bool static_verifier( continue; exprt e(i_it->guard); - const ai_domain_baset &domain(ai.abstract_state_before(i_it)); + auto dp = ai.abstract_state_before(i_it); + const ai_domain_baset &domain(*dp); domain.ai_simplify(e, ns); out << '[' << i_it->source_location.get_property_id() diff --git a/src/goto-analyzer/unreachable_instructions.cpp b/src/goto-analyzer/unreachable_instructions.cpp index 7f2c42a34a5..0c7ef1a4581 100644 --- a/src/goto-analyzer/unreachable_instructions.cpp +++ b/src/goto-analyzer/unreachable_instructions.cpp @@ -59,7 +59,7 @@ static void build_dead_map_from_ai( dead_mapt &dest) { forall_goto_program_instructions(it, goto_program) - if(ai.abstract_state_before(it).is_bottom()) + if(ai.abstract_state_before(it)->is_bottom()) dest.insert(std::make_pair(it->location_number, it)); } @@ -418,7 +418,7 @@ std::unordered_set compute_called_functions_from_ai( const goto_programt &p = f_it->second.body; - if(!ai.abstract_state_before(p.instructions.begin()).is_bottom()) + if(!ai.abstract_state_before(p.instructions.begin())->is_bottom()) called.insert(f_it->first); } From 1fe0796eab1cd4027754ecda717a0d78300e1c83 Mon Sep 17 00:00:00 2001 From: martin Date: Tue, 16 Jan 2018 05:01:47 +0000 Subject: [PATCH 2/5] Convert various older domains to use the more recent ait API. This is mostly using accessor functions rather than directly using inherited members but it allows a slightly greater degree of independence. --- src/analyses/constant_propagator.cpp | 20 +++++++++++--------- src/analyses/dependence_graph.cpp | 4 ++-- src/analyses/invariant_propagation.cpp | 8 ++++---- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/src/analyses/constant_propagator.cpp b/src/analyses/constant_propagator.cpp index 57b854a52ee..b6bb6688f62 100644 --- a/src/analyses/constant_propagator.cpp +++ b/src/analyses/constant_propagator.cpp @@ -592,43 +592,45 @@ void constant_propagator_ait::replace( { Forall_goto_program_instructions(it, goto_function.body) { - state_mapt::iterator s_it=state_map.find(it); + // Works because this is a location (but not history) sensitive domain + const constant_propagator_domaint &d = (*this)[it]; - if(s_it==state_map.end()) + if(d.is_bottom()) continue; - replace_types_rec(s_it->second.values.replace_const, it->code); - replace_types_rec(s_it->second.values.replace_const, it->guard); + replace_types_rec(d.values.replace_const, it->code); + replace_types_rec(d.values.replace_const, it->guard); if(it->is_goto() || it->is_assume() || it->is_assert()) { - s_it->second.partial_evaluate(it->guard, ns); + d.partial_evaluate(it->guard, ns); } else if(it->is_assign()) { exprt &rhs=to_code_assign(it->code).rhs(); - s_it->second.partial_evaluate(rhs, ns); + d.partial_evaluate(rhs, ns); + if(rhs.id()==ID_constant) rhs.add_source_location()=it->code.op0().source_location(); } else if(it->is_function_call()) { exprt &function = to_code_function_call(it->code).function(); - s_it->second.partial_evaluate(function, ns); + d.partial_evaluate(function, ns); exprt::operandst &args= to_code_function_call(it->code).arguments(); for(auto &arg : args) { - s_it->second.partial_evaluate(arg, ns); + d.partial_evaluate(arg, ns); } } else if(it->is_other()) { if(it->code.get_statement()==ID_expression) { - s_it->second.partial_evaluate(it->code, ns); + d.partial_evaluate(it->code, ns); } } } diff --git a/src/analyses/dependence_graph.cpp b/src/analyses/dependence_graph.cpp index 95ae2de10ae..24c0320af66 100644 --- a/src/analyses/dependence_graph.cpp +++ b/src/analyses/dependence_graph.cpp @@ -307,9 +307,9 @@ void dependence_grapht::add_dep( goto_programt::const_targett from, goto_programt::const_targett to) { - const node_indext n_from=state_map[from].get_node_id(); + const node_indext n_from = (*this)[from].get_node_id(); assert(n_fromsecond.invariant_set; + const invariant_sett &invariant_set = d.invariant_set; exprt simplified_guard(i_it->guard); From e65f0277a2e820a35ad81ea73cbd187a5154aff9 Mon Sep 17 00:00:00 2001 From: martin Date: Tue, 24 Jul 2018 15:12:23 +0100 Subject: [PATCH 3/5] Add comments to the abstract interpreter interface. --- src/analyses/ai.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/analyses/ai.h b/src/analyses/ai.h index ab4f4c411bf..e2f3a4d83a4 100644 --- a/src/analyses/ai.h +++ b/src/analyses/ai.h @@ -25,6 +25,8 @@ Author: Daniel Kroening, kroening@kroening.com #include "ai_domain.h" +/// The basic interface of an abstract interpreter. This should be enough +/// to create, run and query an abstract interpreter. // don't use me -- I am just a base class // use ait instead class ai_baset @@ -41,6 +43,7 @@ class ai_baset { } + /// Running the interpreter void operator()( const goto_programt &goto_program, const namespacet &ns) @@ -101,6 +104,7 @@ class ai_baset return abstract_state_before(std::next(l)); } + /// Resets the domain virtual void clear() { } @@ -245,6 +249,9 @@ class ai_baset const goto_functionst &goto_functions, const namespacet &ns); + // Visit performs one step of abstract interpretation from location l + // Depending on the instruction type it may compute a number of "edges" + // or applications of the abstract transformer // true = found something new bool visit( locationt l, From 773bc86e2c3fbb635d33d6a8f7dd9b27f34aa94d Mon Sep 17 00:00:00 2001 From: martin Date: Tue, 24 Jul 2018 15:58:49 +0100 Subject: [PATCH 4/5] Convert various comments, asserts and throws into invariants. --- src/analyses/ai.cpp | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/analyses/ai.cpp b/src/analyses/ai.cpp index f900f902145..f0fc24aa219 100644 --- a/src/analyses/ai.cpp +++ b/src/analyses/ai.cpp @@ -15,8 +15,9 @@ Author: Daniel Kroening, kroening@kroening.com #include #include -#include +#include #include +#include #include "is_threaded.h" @@ -220,7 +221,7 @@ void ai_baset::finalize() ai_baset::locationt ai_baset::get_next( working_sett &working_set) { - assert(!working_set.empty()); + PRECONDITION(!working_set.empty()); working_sett::iterator i=working_set.begin(); locationt l=i->second; @@ -248,6 +249,7 @@ bool ai_baset::fixedpoint( { locationt l=get_next(working_set); + // goto_program is really only needed for iterator manipulation if(visit(l, working_set, goto_program, goto_functions, ns)) new_data=true; } @@ -323,6 +325,8 @@ bool ai_baset::do_function_call( // initialize state, if necessary get_state(l_return); + PRECONDITION(l_call->is_function_call()); + const goto_functionst::goto_functiont &goto_function= f_it->second; @@ -388,7 +392,17 @@ bool ai_baset::do_function_call_rec( const goto_functionst &goto_functions, const namespacet &ns) { - assert(!goto_functions.function_map.empty()); + PRECONDITION(!goto_functions.function_map.empty()); + + // We can't really do this here -- we rely on + // these being removed by some previous analysis. + PRECONDITION(function.id() != ID_dereference); + + // Can't be a function + DATA_INVARIANT( + function.id() != "NULL-object", "Function cannot be null object"); + DATA_INVARIANT(function.id() != ID_member, "Function cannot be struct field"); + DATA_INVARIANT(function.id() != ID_index, "Function cannot be array element"); bool new_data=false; @@ -411,8 +425,7 @@ bool ai_baset::do_function_call_rec( } else if(function.id()==ID_if) { - if(function.operands().size()!=3) - throw "if has three operands"; + DATA_INVARIANT(function.operands().size() != 3, "if has three operands"); bool new_data1= do_function_call_rec( @@ -433,19 +446,6 @@ bool ai_baset::do_function_call_rec( if(new_data1 || new_data2) new_data=true; } - else if(function.id()==ID_dereference) - { - // We can't really do this here -- we rely on - // these being removed by some previous analysis. - } - else if(function.id() == ID_null_object) - { - // ignore, can't be a function - } - else if(function.id()==ID_member || function.id()==ID_index) - { - // ignore, can't be a function - } else { throw "unexpected function_call argument: "+ From 28ba19240616eb7b7058c6e90a59844ef337dc38 Mon Sep 17 00:00:00 2001 From: martin Date: Thu, 26 Jul 2018 15:14:02 +0100 Subject: [PATCH 5/5] Strengthen the invariant on what are acceptable function calls. --- src/analyses/ai.cpp | 62 +++++++++------------------------------------ 1 file changed, 12 insertions(+), 50 deletions(-) diff --git a/src/analyses/ai.cpp b/src/analyses/ai.cpp index f0fc24aa219..c12c1f4d0a3 100644 --- a/src/analyses/ai.cpp +++ b/src/analyses/ai.cpp @@ -394,63 +394,25 @@ bool ai_baset::do_function_call_rec( { PRECONDITION(!goto_functions.function_map.empty()); - // We can't really do this here -- we rely on - // these being removed by some previous analysis. - PRECONDITION(function.id() != ID_dereference); - - // Can't be a function + // This is quite a strong assumption on the well-formedness of the program. + // It means function pointers must be removed before use. DATA_INVARIANT( - function.id() != "NULL-object", "Function cannot be null object"); - DATA_INVARIANT(function.id() != ID_member, "Function cannot be struct field"); - DATA_INVARIANT(function.id() != ID_index, "Function cannot be array element"); + function.id() == ID_symbol, + "Function pointers and indirect calls must be removed before analysis."); bool new_data=false; - if(function.id()==ID_symbol) - { - const irep_idt &identifier = to_symbol_expr(function).get_identifier(); + const irep_idt &identifier = to_symbol_expr(function).get_identifier(); - goto_functionst::function_mapt::const_iterator it= - goto_functions.function_map.find(identifier); + goto_functionst::function_mapt::const_iterator it = + goto_functions.function_map.find(identifier); - if(it==goto_functions.function_map.end()) - throw "failed to find function "+id2string(identifier); + DATA_INVARIANT( + it != goto_functions.function_map.end(), + "Function " + id2string(identifier) + "not in function map"); - new_data=do_function_call( - l_call, l_return, - goto_functions, - it, - arguments, - ns); - } - else if(function.id()==ID_if) - { - DATA_INVARIANT(function.operands().size() != 3, "if has three operands"); - - bool new_data1= - do_function_call_rec( - l_call, l_return, - function.op1(), - arguments, - goto_functions, - ns); - - bool new_data2= - do_function_call_rec( - l_call, l_return, - function.op2(), - arguments, - goto_functions, - ns); - - if(new_data1 || new_data2) - new_data=true; - } - else - { - throw "unexpected function_call argument: "+ - function.id_string(); - } + new_data = + do_function_call(l_call, l_return, goto_functions, it, arguments, ns); return new_data; }