From fb1b3a4df6031c6502f8bea2574e4eaafa029b40 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Fri, 26 Apr 2019 17:32:04 +0100 Subject: [PATCH 01/12] Value-set: handle get_value_set_rec over let expressions This is needed to handle nested dereferences when symex_dereference uses let expressions. It depends on a cheap-to-copy value_sett so that we can easily record and then later discard the local definition. --- src/pointer-analysis/value_set.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/pointer-analysis/value_set.cpp b/src/pointer-analysis/value_set.cpp index e19399c5d2b..352fdab1c50 100644 --- a/src/pointer-analysis/value_set.cpp +++ b/src/pointer-analysis/value_set.cpp @@ -962,6 +962,18 @@ void value_sett::get_value_set_rec( // we could have checked object size to be more precise } + else if(expr.id() == ID_let) + { + const auto &let_expr = to_let_expr(expr); + // This depends on copying `value_sett` being cheap -- which it is, because + // our backing store is sharing_mapt. + value_sett value_set_with_local_definition = *this; + value_set_with_local_definition.assign( + let_expr.symbol(), let_expr.value(), ns, false, false); + + value_set_with_local_definition.get_value_set_rec( + let_expr.where(), dest, suffix, original_type, ns); + } else { #if 0 From 86eae91d3d10fd521bdeab0e81fd929c9a46a934 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Fri, 26 Apr 2019 17:33:38 +0100 Subject: [PATCH 02/12] Simplifier: push member expressions inside a let There are a couple of key patterns involving members that we can understand, such as member-of-typecast and member-of-byte-extract, neither of which work properly if we have an intervening let expression. --- src/util/simplify_expr_struct.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/util/simplify_expr_struct.cpp b/src/util/simplify_expr_struct.cpp index 76452119cff..4d1067b271e 100644 --- a/src/util/simplify_expr_struct.cpp +++ b/src/util/simplify_expr_struct.cpp @@ -289,6 +289,21 @@ bool simplify_exprt::simplify_member(exprt &expr) return false; } + else if(op.id() == ID_let) + { + // Push a member operator inside a let binding, to avoid the let bisecting + // structures we otherwise know how to analyse, such as + // (let x = 1 in ({x, x})).field1 --> let x = 1 in ({x, x}.field1) --> + // let x = 1 in x + member_exprt pushed_in_member = to_member_expr(expr); + pushed_in_member.op() = to_let_expr(op).where(); + expr = op; + to_let_expr(expr).where() = pushed_in_member; + to_let_expr(expr).type() = to_let_expr(expr).where().type(); + + simplify_rec(expr); + return false; + } return true; } From ffaff081c676de36123437957cf5a080009bbfb7 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Fri, 26 Apr 2019 17:22:26 +0100 Subject: [PATCH 03/12] Symex: implement lift_lets This executes let expressions like ordinary assignments, avoiding the need for all other components of cbmc to understand the little-used local definition expression. It also means that symex's usual constant propagator, value-set and other analyses are brought to bear on the definition just as if the front-end had used an instruction to make the definition. --- src/goto-symex/goto_symex.h | 6 ++++ src/goto-symex/symex_clean_expr.cpp | 46 +++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/src/goto-symex/goto_symex.h b/src/goto-symex/goto_symex.h index a0ebb5f196c..eb2bce61b9a 100644 --- a/src/goto-symex/goto_symex.h +++ b/src/goto-symex/goto_symex.h @@ -503,6 +503,12 @@ class goto_symext typedef symex_targett::assignment_typet assignment_typet; + void lift_lets(statet &, exprt &, assignment_typet); + void lift_let( + statet &state, + const let_exprt &let_expr, + assignment_typet assignment_type); + void symex_assign_rec( statet &, const exprt &lhs, diff --git a/src/goto-symex/symex_clean_expr.cpp b/src/goto-symex/symex_clean_expr.cpp index 328e227ce9a..5a66fc4de07 100644 --- a/src/goto-symex/symex_clean_expr.cpp +++ b/src/goto-symex/symex_clean_expr.cpp @@ -14,6 +14,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include +#include #include #include @@ -169,6 +170,51 @@ replace_nondet(exprt &expr, symex_nondet_generatort &build_symex_nondet) } } +void goto_symext::lift_let( + statet &state, + const let_exprt &let_expr, + assignment_typet assignment_type) +{ + exprt let_value = let_expr.value(); + clean_expr(let_value, state, false); + let_value = state.rename(std::move(let_value), ns).get(); + do_simplify(let_value); + + exprt::operandst value_assignment_guard; + symex_assign_symbol( + state, + to_ssa_expr(state.rename(let_expr.symbol(), ns).get()), + nil_exprt(), + let_value, + value_assignment_guard, + assignment_type); +} + +void goto_symext::lift_lets( + statet &state, + exprt &rhs, + assignment_typet assignment_type) +{ + for(auto it = rhs.depth_begin(), itend = rhs.depth_end(); it != itend;) + { + if(it->id() == ID_let) + { + // Visit post-order, so more-local definitions are made before usage: + exprt &replaced_expr = it.mutate(); + let_exprt &replaced_let = to_let_expr(replaced_expr); + lift_lets(state, replaced_let.value(), assignment_type); + lift_lets(state, replaced_let.where(), assignment_type); + + lift_let(state, replaced_let, assignment_type); + replaced_expr = replaced_let.where(); + + it.next_sibling_or_parent(); + } + else + ++it; + } +} + void goto_symext::clean_expr( exprt &expr, statet &state, From 7e7e757f083c6e3151d44e1158d35c7cd5fec3e0 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Fri, 26 Apr 2019 17:31:01 +0100 Subject: [PATCH 04/12] Value-set-dereference: generate let expressions for complex pointers For the time being symex always lifts these immediately, to avoid having to teach all elements of cbmc about the let expression. --- src/goto-symex/symex_clean_expr.cpp | 2 ++ .../value_set_dereference.cpp | 24 ++++++++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/goto-symex/symex_clean_expr.cpp b/src/goto-symex/symex_clean_expr.cpp index 5a66fc4de07..8078df8443f 100644 --- a/src/goto-symex/symex_clean_expr.cpp +++ b/src/goto-symex/symex_clean_expr.cpp @@ -128,6 +128,7 @@ void goto_symext::process_array_expr(statet &state, exprt &expr) ns, state.symbol_table, symex_dereference_state, language_mode, false); expr = dereference.dereference(expr); + lift_lets(state, expr, symex_targett::assignment_typet::STATE); ::process_array_expr(expr, symex_config.simplify_opt, ns); } @@ -222,6 +223,7 @@ void goto_symext::clean_expr( { replace_nondet(expr, path_storage.build_symex_nondet); dereference(expr, state, write); + lift_lets(state, expr, symex_targett::assignment_typet::STATE); // make sure all remaining byte extract operations use the root // object to avoid nesting of with/update and byte_update when on diff --git a/src/pointer-analysis/value_set_dereference.cpp b/src/pointer-analysis/value_set_dereference.cpp index c3914dbaeaa..769b991551b 100644 --- a/src/pointer-analysis/value_set_dereference.cpp +++ b/src/pointer-analysis/value_set_dereference.cpp @@ -76,9 +76,24 @@ exprt value_set_dereferencet::dereference(const exprt &pointer) std::list values; + exprt compare_against_pointer = pointer; + + if(pointer.id() != ID_symbol && retained_values.size() >= 2) + { + symbolt fresh_binder = get_fresh_aux_symbol( + pointer.type(), + "derefd_pointer", + "derefd_pointer", + source_locationt(), + language_mode, + new_symbol_table); + + compare_against_pointer = fresh_binder.symbol_expr(); + } + for(const auto &value : retained_values) { - values.push_back(build_reference_to(value, pointer, ns)); + values.push_back(build_reference_to(value, compare_against_pointer, ns)); #if 0 std::cout << "V: " << format(value.pointer_guard) << " --> "; std::cout << format(value.value); @@ -160,9 +175,12 @@ exprt value_set_dereferencet::dereference(const exprt &pointer) } } - #if 0 + if(compare_against_pointer != pointer) + value = let_exprt(to_symbol_expr(compare_against_pointer), pointer, value); + +#if 0 std::cout << "R: " << format(value) << "\n\n"; - #endif +#endif return value; } From 440a76dad5da76547f51e052a75849cc433d3f3b Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 29 Apr 2019 17:03:32 +0100 Subject: [PATCH 05/12] Factor goto_symext::symex_dead We'll use the symex_dead overload that takes a symbol expression rather than an instruction to kill let-bound local definitions --- src/goto-symex/goto_symex.h | 4 ++++ src/goto-symex/symex_dead.cpp | 8 +++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/goto-symex/goto_symex.h b/src/goto-symex/goto_symex.h index eb2bce61b9a..e1b085aede2 100644 --- a/src/goto-symex/goto_symex.h +++ b/src/goto-symex/goto_symex.h @@ -321,6 +321,10 @@ class goto_symext /// Symbolically execute a DEAD instruction /// \param state: Symbolic execution state for current instruction virtual void symex_dead(statet &state); + /// Kill a symbol, as if it had been the subject of a DEAD instruction + /// \param state: Symbolic execution state + /// \param symbol_expr: Symbol to kill + void symex_dead(statet &state, const symbol_exprt &symbol_expr); /// Symbolically execute an OTHER instruction /// \param state: Symbolic execution state for current instruction virtual void symex_other(statet &state); diff --git a/src/goto-symex/symex_dead.cpp b/src/goto-symex/symex_dead.cpp index 28dfd6c2e04..ecd2ee78499 100644 --- a/src/goto-symex/symex_dead.cpp +++ b/src/goto-symex/symex_dead.cpp @@ -21,8 +21,14 @@ void goto_symext::symex_dead(statet &state) const goto_programt::instructiont &instruction=*state.source.pc; const code_deadt &code = instruction.get_dead(); + symex_dead(state, code.symbol()); +} - ssa_exprt ssa = state.rename_ssa(ssa_exprt{code.symbol()}, ns).get(); +void goto_symext::symex_dead(statet &state, const symbol_exprt &symbol_expr) +{ + ssa_exprt to_rename = is_ssa_expr(symbol_expr) ? to_ssa_expr(symbol_expr) + : ssa_exprt{symbol_expr}; + ssa_exprt ssa = state.rename_ssa(to_rename, ns).get(); const exprt fields = state.field_sensitivity.get_fields(ns, state, ssa); find_symbols_sett fields_set; From 9614af2de1d63893dace7b9617171f37b5618428 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 29 Apr 2019 17:04:52 +0100 Subject: [PATCH 06/12] Don't use let-bound temporaries for simple pointers For example, we're willing to directly quote `p`, `(type *)p`, `p[some_constant]` and other simple expressions directly when comparing a pointer against its possible aliases-- the case we really must prevent is a large if-expression like `q == &o1 ? o1 : q == &o2 ? o2 : ...` that can result from a nested dereference -- in this case we must use a let-bound temporary to avoid producing a very large expression that can founder subsequent algorithms such as simplification, renaming and irep-merging. --- .../value_set_dereference.cpp | 30 ++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/src/pointer-analysis/value_set_dereference.cpp b/src/pointer-analysis/value_set_dereference.cpp index 769b991551b..6885e5bf559 100644 --- a/src/pointer-analysis/value_set_dereference.cpp +++ b/src/pointer-analysis/value_set_dereference.cpp @@ -22,6 +22,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include +#include #include #include #include @@ -30,6 +31,33 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +/// Returns true if \p expr is complicated enough that a local definition (using +/// a let expression) is preferable to repeating it, potentially many times. +/// Of course this is just a heuristic -- currently we allow any expression that +/// only involves one symbol, such as "x", "(type*)x", "x[0]" (but not "x[y]"). +/// Particularly we want to make sure to insist on a local definition of \p expr +/// is a large if-expression, such as `p == &o1 ? o1 : p == &o2 ? o2 : ...`, as +/// can result from dereferencing a subexpression (though note that \ref +/// value_set_dereferencet::dereference special-cases if_exprt, and therefore +/// handles the specific case of a double-dereference (**p) without an +/// intervening member operator, typecast, pointer arithmetic, etc.) +static bool should_use_local_definition_for(const exprt &expr) +{ + bool seen_symbol = false; + for(auto it = expr.depth_begin(), itend = expr.depth_end(); it != itend; ++it) + { + if(it->id() == ID_symbol) + { + if(seen_symbol) + return true; + else + seen_symbol = true; + } + } + + return false; +} + exprt value_set_dereferencet::dereference(const exprt &pointer) { if(pointer.type().id()!=ID_pointer) @@ -78,7 +106,7 @@ exprt value_set_dereferencet::dereference(const exprt &pointer) exprt compare_against_pointer = pointer; - if(pointer.id() != ID_symbol && retained_values.size() >= 2) + if(retained_values.size() >= 2 && should_use_local_definition_for(pointer)) { symbolt fresh_binder = get_fresh_aux_symbol( pointer.type(), From d3ebf4aef4add187b06db44bf6355b8579f501cf Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 29 Apr 2019 17:15:58 +0100 Subject: [PATCH 07/12] Lift-lets: always emit hidden assignments Taking inspiration from #return_value variables, which are always marked as hidden, since all let-bound variables encountered by symex are synthetic we mark those hidden too. --- src/goto-symex/goto_symex.h | 7 ++----- src/goto-symex/symex_clean_expr.cpp | 22 ++++++++-------------- 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/src/goto-symex/goto_symex.h b/src/goto-symex/goto_symex.h index e1b085aede2..07c1974ef4d 100644 --- a/src/goto-symex/goto_symex.h +++ b/src/goto-symex/goto_symex.h @@ -507,11 +507,8 @@ class goto_symext typedef symex_targett::assignment_typet assignment_typet; - void lift_lets(statet &, exprt &, assignment_typet); - void lift_let( - statet &state, - const let_exprt &let_expr, - assignment_typet assignment_type); + void lift_lets(statet &, exprt &); + void lift_let(statet &state, const let_exprt &let_expr); void symex_assign_rec( statet &, diff --git a/src/goto-symex/symex_clean_expr.cpp b/src/goto-symex/symex_clean_expr.cpp index 8078df8443f..7b8d9e0071c 100644 --- a/src/goto-symex/symex_clean_expr.cpp +++ b/src/goto-symex/symex_clean_expr.cpp @@ -128,7 +128,7 @@ void goto_symext::process_array_expr(statet &state, exprt &expr) ns, state.symbol_table, symex_dereference_state, language_mode, false); expr = dereference.dereference(expr); - lift_lets(state, expr, symex_targett::assignment_typet::STATE); + lift_lets(state, expr); ::process_array_expr(expr, symex_config.simplify_opt, ns); } @@ -171,10 +171,7 @@ replace_nondet(exprt &expr, symex_nondet_generatort &build_symex_nondet) } } -void goto_symext::lift_let( - statet &state, - const let_exprt &let_expr, - assignment_typet assignment_type) +void goto_symext::lift_let(statet &state, const let_exprt &let_expr) { exprt let_value = let_expr.value(); clean_expr(let_value, state, false); @@ -188,13 +185,10 @@ void goto_symext::lift_let( nil_exprt(), let_value, value_assignment_guard, - assignment_type); + symex_targett::assignment_typet::HIDDEN); } -void goto_symext::lift_lets( - statet &state, - exprt &rhs, - assignment_typet assignment_type) +void goto_symext::lift_lets(statet &state, exprt &rhs) { for(auto it = rhs.depth_begin(), itend = rhs.depth_end(); it != itend;) { @@ -203,10 +197,10 @@ void goto_symext::lift_lets( // Visit post-order, so more-local definitions are made before usage: exprt &replaced_expr = it.mutate(); let_exprt &replaced_let = to_let_expr(replaced_expr); - lift_lets(state, replaced_let.value(), assignment_type); - lift_lets(state, replaced_let.where(), assignment_type); + lift_lets(state, replaced_let.value()); + lift_lets(state, replaced_let.where()); - lift_let(state, replaced_let, assignment_type); + lift_let(state, replaced_let); replaced_expr = replaced_let.where(); it.next_sibling_or_parent(); @@ -223,7 +217,7 @@ void goto_symext::clean_expr( { replace_nondet(expr, path_storage.build_symex_nondet); dereference(expr, state, write); - lift_lets(state, expr, symex_targett::assignment_typet::STATE); + lift_lets(state, expr); // make sure all remaining byte extract operations use the root // object to avoid nesting of with/update and byte_update when on From cbc495d4383ce0529eef04a7741f93b8d6d98537 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 29 Apr 2019 17:33:57 +0100 Subject: [PATCH 08/12] Goto-symex: add documentation to let-lifting and related functions --- src/goto-symex/goto_symex.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/goto-symex/goto_symex.h b/src/goto-symex/goto_symex.h index 07c1974ef4d..4d307f8bbb2 100644 --- a/src/goto-symex/goto_symex.h +++ b/src/goto-symex/goto_symex.h @@ -288,6 +288,11 @@ class goto_symext void trigger_auto_object(const exprt &, statet &); void initialize_auto_object(const exprt &, statet &); + + /// Given an expression, find the root object and the offset into it. + /// + /// The extra complication to be considered here is that the expression may + /// have any number of ternary expressions mixed with type casts. void process_array_expr(statet &, exprt &); exprt make_auto_object(const typet &, statet &); virtual void dereference(exprt &, statet &, bool write); @@ -507,7 +512,16 @@ class goto_symext typedef symex_targett::assignment_typet assignment_typet; + /// Execute any let expressions in \p expr using \ref symex_assign_symbol. + /// The assignments will be made in bottom-up topological but otherwise + /// arbitrary order (i.e. in `(let x = let y = 0 in x + y) + (let z = 0 in z) + /// we will define `y` before `x`, but `z` and `x` could come in either order) void lift_lets(statet &, exprt &); + + /// Execute a single let expression, which should not have any nested let + /// expressions (use \ref lift_lets instead if there might be). + /// The caller is responsible for killing the newly-defined variable when + /// appropriate. void lift_let(statet &state, const let_exprt &let_expr); void symex_assign_rec( From dc10d51a38660c5986a7f3332c6560ebaa44cbce Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Tue, 7 May 2019 17:50:01 +0100 Subject: [PATCH 09/12] Split symex_step and execute_next_instruction This is in preparation for adding post-step cleanup --- src/goto-symex/goto_symex.h | 15 +++++++++++++-- src/goto-symex/symex_main.cpp | 6 ++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/goto-symex/goto_symex.h b/src/goto-symex/goto_symex.h index 4d307f8bbb2..2d60ad6ac9b 100644 --- a/src/goto-symex/goto_symex.h +++ b/src/goto-symex/goto_symex.h @@ -218,6 +218,16 @@ class goto_symext const get_goto_functiont &get_goto_function); /// \brief Called for each step in the symbolic execution + /// This calls \ref print_symex_step to print symex's current instruction if + /// required, then \ref execute_instruction to execute the actual instruction + /// body. + /// \param get_goto_function: The delegate to retrieve function bodies (see + /// \ref get_goto_functiont) + /// \param state: Symbolic execution state for current instruction + virtual void + symex_step(const get_goto_functiont &get_goto_function, statet &state); + + /// \brief Executes the instruction `state.source.pc` /// Case-switches over the type of the instruction being executed and calls /// another function appropriate to the instruction type, for example /// \ref symex_function_call if the current instruction is a function call, @@ -225,8 +235,9 @@ class goto_symext /// \param get_goto_function: The delegate to retrieve function bodies (see /// \ref get_goto_functiont) /// \param state: Symbolic execution state for current instruction - virtual void - symex_step(const get_goto_functiont &get_goto_function, statet &state); + void execute_next_instruction( + const get_goto_functiont &get_goto_function, + statet &state); /// Prints the route of symex as it walks through the code. Used for /// debugging. diff --git a/src/goto-symex/symex_main.cpp b/src/goto-symex/symex_main.cpp index 2a3fb8b92b2..3c2a1af85eb 100644 --- a/src/goto-symex/symex_main.cpp +++ b/src/goto-symex/symex_main.cpp @@ -512,7 +512,13 @@ void goto_symext::symex_step( { // Print debug statements if they've been enabled. print_symex_step(state); + execute_next_instruction(get_goto_function, state); +} +void goto_symext::execute_next_instruction( + const get_goto_functiont &get_goto_function, + statet &state) +{ PRECONDITION(!state.threads.empty()); PRECONDITION(!state.call_stack().empty()); From 61507bd9ac9263b21afcf646a8edad7559014a1b Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Tue, 7 May 2019 18:02:43 +0100 Subject: [PATCH 10/12] Kill let-bound variables after the current instruction completes This avoids both wasted storage due to let-bound variables staying in the L2 renaming map, propagator and/or value-set, and avoids generating pointless PHI-node instructions for them. --- src/goto-symex/goto_symex.h | 18 ++++++++++++++---- src/goto-symex/symex_clean_expr.cpp | 3 +++ src/goto-symex/symex_main.cpp | 8 ++++++++ 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/src/goto-symex/goto_symex.h b/src/goto-symex/goto_symex.h index 2d60ad6ac9b..b0fda713b7d 100644 --- a/src/goto-symex/goto_symex.h +++ b/src/goto-symex/goto_symex.h @@ -219,8 +219,8 @@ class goto_symext /// \brief Called for each step in the symbolic execution /// This calls \ref print_symex_step to print symex's current instruction if - /// required, then \ref execute_instruction to execute the actual instruction - /// body. + /// required, then \ref execute_next_instruction to execute the actual + /// instruction body. /// \param get_goto_function: The delegate to retrieve function bodies (see /// \ref get_goto_functiont) /// \param state: Symbolic execution state for current instruction @@ -239,6 +239,11 @@ class goto_symext const get_goto_functiont &get_goto_function, statet &state); + /// Kills any variables in \ref instruction_local_symbols (these are currently + /// always let-bound variables defined in the course of executing the current + /// instruction), then clears \ref instruction_local_symbols. + void kill_instruction_local_symbols(statet &state); + /// Prints the route of symex as it walks through the code. Used for /// debugging. void print_symex_step(statet &state); @@ -281,6 +286,11 @@ class goto_symext /// instruction unsigned atomic_section_counter; + /// Variables that should be killed at the end of the current symex_step + /// invocation. Currently this is used for let-bound variables executed during + /// symex, whose lifetime is at most one instruction long. + std::vector instruction_local_symbols; + /// The messaget to write log messages to mutable messaget log; @@ -531,8 +541,8 @@ class goto_symext /// Execute a single let expression, which should not have any nested let /// expressions (use \ref lift_lets instead if there might be). - /// The caller is responsible for killing the newly-defined variable when - /// appropriate. + /// Records the newly-defined variable in \ref instruction_local_symbols, + /// meaning it will be killed when \ref symex_step concludes. void lift_let(statet &state, const let_exprt &let_expr); void symex_assign_rec( diff --git a/src/goto-symex/symex_clean_expr.cpp b/src/goto-symex/symex_clean_expr.cpp index 7b8d9e0071c..ad741845e49 100644 --- a/src/goto-symex/symex_clean_expr.cpp +++ b/src/goto-symex/symex_clean_expr.cpp @@ -186,6 +186,9 @@ void goto_symext::lift_let(statet &state, const let_exprt &let_expr) let_value, value_assignment_guard, symex_targett::assignment_typet::HIDDEN); + + // Schedule the bound variable to be cleaned up at the end of symex_step: + instruction_local_symbols.push_back(let_expr.symbol()); } void goto_symext::lift_lets(statet &state, exprt &rhs) diff --git a/src/goto-symex/symex_main.cpp b/src/goto-symex/symex_main.cpp index 3c2a1af85eb..97fe7fde683 100644 --- a/src/goto-symex/symex_main.cpp +++ b/src/goto-symex/symex_main.cpp @@ -513,6 +513,7 @@ void goto_symext::symex_step( // Print debug statements if they've been enabled. print_symex_step(state); execute_next_instruction(get_goto_function, state); + kill_instruction_local_symbols(state); } void goto_symext::execute_next_instruction( @@ -652,6 +653,13 @@ void goto_symext::execute_next_instruction( } } +void goto_symext::kill_instruction_local_symbols(statet &state) +{ + for(const auto &symbol_expr : instruction_local_symbols) + symex_dead(state, symbol_expr); + instruction_local_symbols.clear(); +} + /// Check if an expression only contains one unique symbol (possibly repeated /// multiple times) /// \param expr: The expression to examine From 7552badd3dfa39a226b754c933fba17dd352ad9f Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Wed, 8 May 2019 14:18:20 +0100 Subject: [PATCH 11/12] Add tests for let-expression introduction on double-deref --- regression/cbmc/double_deref/README | 27 +++++++++++++++++++ regression/cbmc/double_deref/double_deref.c | 16 +++++++++++ .../cbmc/double_deref/double_deref.desc | 10 +++++++ .../double_deref/double_deref_single_alias.c | 14 ++++++++++ .../double_deref_single_alias.desc | 10 +++++++ .../double_deref/double_deref_with_cast.c | 16 +++++++++++ .../double_deref/double_deref_with_cast.desc | 10 +++++++ .../double_deref_with_cast_single_alias.c | 14 ++++++++++ .../double_deref_with_cast_single_alias.desc | 10 +++++++ .../double_deref/double_deref_with_member.c | 22 +++++++++++++++ .../double_deref_with_member.desc | 10 +++++++ .../double_deref_with_member_single_alias.c | 20 ++++++++++++++ ...double_deref_with_member_single_alias.desc | 10 +++++++ .../double_deref_with_pointer_arithmetic.c | 22 +++++++++++++++ .../double_deref_with_pointer_arithmetic.desc | 10 +++++++ ...ref_with_pointer_arithmetic_single_alias.c | 19 +++++++++++++ ..._with_pointer_arithmetic_single_alias.desc | 10 +++++++ 17 files changed, 250 insertions(+) create mode 100644 regression/cbmc/double_deref/README create mode 100644 regression/cbmc/double_deref/double_deref.c create mode 100644 regression/cbmc/double_deref/double_deref.desc create mode 100644 regression/cbmc/double_deref/double_deref_single_alias.c create mode 100644 regression/cbmc/double_deref/double_deref_single_alias.desc create mode 100644 regression/cbmc/double_deref/double_deref_with_cast.c create mode 100644 regression/cbmc/double_deref/double_deref_with_cast.desc create mode 100644 regression/cbmc/double_deref/double_deref_with_cast_single_alias.c create mode 100644 regression/cbmc/double_deref/double_deref_with_cast_single_alias.desc create mode 100644 regression/cbmc/double_deref/double_deref_with_member.c create mode 100644 regression/cbmc/double_deref/double_deref_with_member.desc create mode 100644 regression/cbmc/double_deref/double_deref_with_member_single_alias.c create mode 100644 regression/cbmc/double_deref/double_deref_with_member_single_alias.desc create mode 100644 regression/cbmc/double_deref/double_deref_with_pointer_arithmetic.c create mode 100644 regression/cbmc/double_deref/double_deref_with_pointer_arithmetic.desc create mode 100644 regression/cbmc/double_deref/double_deref_with_pointer_arithmetic_single_alias.c create mode 100644 regression/cbmc/double_deref/double_deref_with_pointer_arithmetic_single_alias.desc diff --git a/regression/cbmc/double_deref/README b/regression/cbmc/double_deref/README new file mode 100644 index 00000000000..650f554a3c2 --- /dev/null +++ b/regression/cbmc/double_deref/README @@ -0,0 +1,27 @@ +This is a batch of tests checking cbmc's behaviour against expressions with more than one deref +operator, such as **p, *(int*)*p, p->field1->field2, etc. At present a directly nested dereference +(**p) is handled by pushing the second deref inside the result of the first. Example: + +**p ==> +*(p == &o1 ? o1 : o2) ==> +(p == &o1 ? *o1 : *o2) ==> +(p == &o1 ? (o1 == &o3 ? o3 : o4) : (o2 == &o5 ? o5 : o6)) + +This deref operator push-in is performed (implicitly) by value_set_dereferencet::dereference, which special-cases +the ternary conditional expression. +More complicated expressions are handled using a let-expression. Example: + +p->field1->field2 ==> +((p == &o1 ? o1 : o2).field1)->field2 ==> +let derefd_pointer = ((p == &o1 ? o1 : o2).field1) in (derefd_pointer == &o3 ? o3 : derefd_pointer == &o4 ? o4 : derefd_pointer == &o5 ? o5 : o6) + +Symex executes this let-expression as an auxiliary assignment, such as +derefd_pointer = ((p == &o1 ? o1 : o2).field1) +result = (derefd_pointer == &o3 ? o3 : derefd_pointer == &o4 ? o4 : derefd_pointer == &o5 ? o5 : o6) + +The tests in this directory check that auxiliary let-expressions like this are only used when appropriate by inspecting formula VCCs: +double_deref.desc -- a directly nested double-dereference, should not use a let-expression +double_deref_with_cast.desc -- a double-deref with an intervening cast (*(int*)*p for example), should use a let-expression +double_deref_with_member.desc -- a double-deref with an intervening member expression (p->field1->field2), should use a let-expression +double_deref_with_pointer_arithmetic.desc -- a double-deref with intervening pointer arithmetic (p[idx1][idx2]), should use a let-expression +*_single_alias.desc -- variants of the above where the first dereference points to a single possible object, so no let-expression is necessary diff --git a/regression/cbmc/double_deref/double_deref.c b/regression/cbmc/double_deref/double_deref.c new file mode 100644 index 00000000000..bc671f18b79 --- /dev/null +++ b/regression/cbmc/double_deref/double_deref.c @@ -0,0 +1,16 @@ + +#include +#include + +int main(int argc, char **argv) +{ + int **pptr; + int *ptr1 = (int *)malloc(sizeof(int)); + *ptr1 = 1; + int *ptr2 = (int *)malloc(sizeof(int)); + *ptr2 = 2; + + pptr = (argc == 5 ? &ptr1 : &ptr2); + + assert(**pptr == argc); +} diff --git a/regression/cbmc/double_deref/double_deref.desc b/regression/cbmc/double_deref/double_deref.desc new file mode 100644 index 00000000000..b5daf7673f3 --- /dev/null +++ b/regression/cbmc/double_deref/double_deref.desc @@ -0,0 +1,10 @@ +CORE +double_deref.c +--show-vcc +\{1\} \(main::1::pptr!0@1#2 = address_of\(main::1::ptr[12]!0@1\) \? main::argc!0@1#1 = [12] : main::argc!0@1#1 = [12]\) +^EXIT=0$ +^SIGNAL=0$ +-- +derefd_pointer::derefd_pointer +-- +See README for details about these tests diff --git a/regression/cbmc/double_deref/double_deref_single_alias.c b/regression/cbmc/double_deref/double_deref_single_alias.c new file mode 100644 index 00000000000..653446c256e --- /dev/null +++ b/regression/cbmc/double_deref/double_deref_single_alias.c @@ -0,0 +1,14 @@ + +#include +#include + +int main(int argc, char **argv) +{ + int **pptr; + int *ptr1 = (int *)malloc(sizeof(int)); + *ptr1 = 1; + + pptr = &ptr1; + + assert(**pptr == argc); +} diff --git a/regression/cbmc/double_deref/double_deref_single_alias.desc b/regression/cbmc/double_deref/double_deref_single_alias.desc new file mode 100644 index 00000000000..c759890f632 --- /dev/null +++ b/regression/cbmc/double_deref/double_deref_single_alias.desc @@ -0,0 +1,10 @@ +CORE +double_deref_single_alias.c +--show-vcc +\{1\} main::argc!0@1#1 = 1 +^EXIT=0$ +^SIGNAL=0$ +-- +derefd_pointer::derefd_pointer +-- +See README for details about these tests diff --git a/regression/cbmc/double_deref/double_deref_with_cast.c b/regression/cbmc/double_deref/double_deref_with_cast.c new file mode 100644 index 00000000000..6517156f83f --- /dev/null +++ b/regression/cbmc/double_deref/double_deref_with_cast.c @@ -0,0 +1,16 @@ + +#include +#include + +int main(int argc, char **argv) +{ + void **pptr; + int *ptr1 = (int *)malloc(sizeof(int)); + *ptr1 = 1; + int *ptr2 = (int *)malloc(sizeof(int)); + *ptr2 = 2; + + pptr = (argc == 1 ? &ptr1 : &ptr2); + + assert(*(int *)*pptr == argc); +} diff --git a/regression/cbmc/double_deref/double_deref_with_cast.desc b/regression/cbmc/double_deref/double_deref_with_cast.desc new file mode 100644 index 00000000000..a3fc616f73d --- /dev/null +++ b/regression/cbmc/double_deref/double_deref_with_cast.desc @@ -0,0 +1,10 @@ +CORE +double_deref_with_cast.c +--show-vcc +^\{-[0-9]+\} derefd_pointer::derefd_pointer!0#1 = +^\{1\} \(derefd_pointer::derefd_pointer!0#1 = address_of\(symex_dynamic::dynamic_object[1-9]+\) \? main::argc!0@1#1 = [12] : main::argc!0@1#1 = [12] +^EXIT=0$ +^SIGNAL=0$ +-- +-- +See README for details about these tests diff --git a/regression/cbmc/double_deref/double_deref_with_cast_single_alias.c b/regression/cbmc/double_deref/double_deref_with_cast_single_alias.c new file mode 100644 index 00000000000..a7d9e9f00f2 --- /dev/null +++ b/regression/cbmc/double_deref/double_deref_with_cast_single_alias.c @@ -0,0 +1,14 @@ + +#include +#include + +int main(int argc, char **argv) +{ + void **pptr; + int *ptr1 = (int *)malloc(sizeof(int)); + *ptr1 = 1; + + pptr = &ptr1; + + assert(*(int *)*pptr == argc); +} diff --git a/regression/cbmc/double_deref/double_deref_with_cast_single_alias.desc b/regression/cbmc/double_deref/double_deref_with_cast_single_alias.desc new file mode 100644 index 00000000000..6be9a4d83da --- /dev/null +++ b/regression/cbmc/double_deref/double_deref_with_cast_single_alias.desc @@ -0,0 +1,10 @@ +CORE +double_deref_with_cast_single_alias.c +--show-vcc +\{1\} main::argc!0@1#1 = 1 +^EXIT=0$ +^SIGNAL=0$ +-- +derefd_pointer::derefd_pointer +-- +See README for details about these tests diff --git a/regression/cbmc/double_deref/double_deref_with_member.c b/regression/cbmc/double_deref/double_deref_with_member.c new file mode 100644 index 00000000000..fbebcc80b0d --- /dev/null +++ b/regression/cbmc/double_deref/double_deref_with_member.c @@ -0,0 +1,22 @@ + +#include +#include + +struct container +{ + int *iptr; +}; + +int main(int argc, char **argv) +{ + struct container *cptr; + struct container container1, container2; + container1.iptr = (int *)malloc(sizeof(int)); + *container1.iptr = 1; + container2.iptr = (int *)malloc(sizeof(int)); + *container2.iptr = 2; + + cptr = (argc == 1 ? &container1 : &container2); + + assert(*(cptr->iptr) == argc); +} diff --git a/regression/cbmc/double_deref/double_deref_with_member.desc b/regression/cbmc/double_deref/double_deref_with_member.desc new file mode 100644 index 00000000000..e7256d946f2 --- /dev/null +++ b/regression/cbmc/double_deref/double_deref_with_member.desc @@ -0,0 +1,10 @@ +CORE +double_deref_with_member.c +--show-vcc +^\{-[0-9]+\} derefd_pointer::derefd_pointer!0#1 = \(main::1::cptr!0@1#2 = address_of\(main::1::container[12]!0@1\) \? address_of\(symex_dynamic::dynamic_object[12]\) : address_of\(symex_dynamic::dynamic_object[12]\)\) +^\{1\} \(derefd_pointer::derefd_pointer!0#1 = address_of\(symex_dynamic::dynamic_object[12]\) \? main::argc!0@1#1 = 2 : main::argc!0@1#1 = 1\) +^EXIT=0$ +^SIGNAL=0$ +-- +-- +See README for details about these tests diff --git a/regression/cbmc/double_deref/double_deref_with_member_single_alias.c b/regression/cbmc/double_deref/double_deref_with_member_single_alias.c new file mode 100644 index 00000000000..c7ba385de08 --- /dev/null +++ b/regression/cbmc/double_deref/double_deref_with_member_single_alias.c @@ -0,0 +1,20 @@ + +#include +#include + +struct container +{ + int *iptr; +}; + +int main(int argc, char **argv) +{ + struct container *cptr; + struct container container1; + container1.iptr = (int *)malloc(sizeof(int)); + *container1.iptr = 1; + + cptr = &container1; + + assert(*(cptr->iptr) == argc); +} diff --git a/regression/cbmc/double_deref/double_deref_with_member_single_alias.desc b/regression/cbmc/double_deref/double_deref_with_member_single_alias.desc new file mode 100644 index 00000000000..0e4313b1877 --- /dev/null +++ b/regression/cbmc/double_deref/double_deref_with_member_single_alias.desc @@ -0,0 +1,10 @@ +CORE +double_deref_with_member_single_alias.c +--show-vcc +\{1\} main::argc!0@1#1 = 1 +^EXIT=0$ +^SIGNAL=0$ +-- +derefd_pointer::derefd_pointer +-- +See README for details about these tests diff --git a/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic.c b/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic.c new file mode 100644 index 00000000000..62ff0738d1b --- /dev/null +++ b/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic.c @@ -0,0 +1,22 @@ + +#include +#include + +struct container +{ + int *iptr; +}; + +int main(int argc, char **argv) +{ + int **new_ptrs = malloc(2 * sizeof(int *)); + int *iptr1 = (int *)malloc(sizeof(int)); + *iptr1 = 1; + int *iptr2 = (int *)malloc(sizeof(int)); + *iptr2 = 2; + + new_ptrs[argc % 2] = iptr1; + new_ptrs[1 - (argc % 2)] = iptr2; + + assert(*(new_ptrs[argc % 2]) == argc); +} diff --git a/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic.desc b/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic.desc new file mode 100644 index 00000000000..4328a111fa9 --- /dev/null +++ b/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic.desc @@ -0,0 +1,10 @@ +CORE +double_deref_with_pointer_arithmetic.c +--show-vcc +^\{-[0-9]+\} derefd_pointer::derefd_pointer!0#1 = symex_dynamic::dynamic_object1#3\[cast\(mod #source_location=""\(main::argc!0@1#1, 2\), signedbv\[64\]\)\] +^\{1\} \(derefd_pointer::derefd_pointer!0#1 = address_of\(symex_dynamic::dynamic_object[23]\) \? main::argc!0@1#1 = 2 : main::argc!0@1#1 = 1\) +^EXIT=0$ +^SIGNAL=0$ +-- +-- +See README for details about these tests diff --git a/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic_single_alias.c b/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic_single_alias.c new file mode 100644 index 00000000000..cf6646e4594 --- /dev/null +++ b/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic_single_alias.c @@ -0,0 +1,19 @@ + +#include +#include + +struct container +{ + int *iptr; +}; + +int main(int argc, char **argv) +{ + int **new_ptrs = malloc(2 * sizeof(int *)); + int *iptr1 = (int *)malloc(sizeof(int)); + *iptr1 = 1; + + new_ptrs[argc % 2] = iptr1; + + assert(*(new_ptrs[argc % 2]) == argc); +} diff --git a/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic_single_alias.desc b/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic_single_alias.desc new file mode 100644 index 00000000000..596be52269b --- /dev/null +++ b/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic_single_alias.desc @@ -0,0 +1,10 @@ +CORE +double_deref_with_pointer_arithmetic_single_alias.c +--show-vcc +\{1\} main::argc!0@1#1 = 1 +^EXIT=0$ +^SIGNAL=0$ +-- +derefd_pointer::derefd_pointer +-- +See README for details about these tests From 87bcf4af51c084def411c173b5593a47b7d77d6f Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Wed, 8 May 2019 14:35:55 +0100 Subject: [PATCH 12/12] Add special case for deref-of-typecast-of-if This results from expressions such as *(int*)*p, and is very nearly as easy to handle as the already-special-cased deref-of-if case. In essence we transform *(type*)(x ? y : z) into x ? *(type*)y : *(type*)z, which should yield a more compact expression due to not conflating the alias sets that can be derived from the two arms of the conditional. --- regression/cbmc/double_deref/README | 4 ++-- .../double_deref/double_deref_with_cast.desc | 4 ++-- .../value_set_dereference.cpp | 20 +++++++++++++++++++ 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/regression/cbmc/double_deref/README b/regression/cbmc/double_deref/README index 650f554a3c2..55292da9cdc 100644 --- a/regression/cbmc/double_deref/README +++ b/regression/cbmc/double_deref/README @@ -8,7 +8,7 @@ operator, such as **p, *(int*)*p, p->field1->field2, etc. At present a directly (p == &o1 ? (o1 == &o3 ? o3 : o4) : (o2 == &o5 ? o5 : o6)) This deref operator push-in is performed (implicitly) by value_set_dereferencet::dereference, which special-cases -the ternary conditional expression. +the ternary conditional expression and typecasts of ternary conditionals. More complicated expressions are handled using a let-expression. Example: p->field1->field2 ==> @@ -21,7 +21,7 @@ result = (derefd_pointer == &o3 ? o3 : derefd_pointer == &o4 ? o4 : derefd_point The tests in this directory check that auxiliary let-expressions like this are only used when appropriate by inspecting formula VCCs: double_deref.desc -- a directly nested double-dereference, should not use a let-expression -double_deref_with_cast.desc -- a double-deref with an intervening cast (*(int*)*p for example), should use a let-expression +double_deref_with_cast.desc -- a double-deref with an intervening cast (*(int*)*p for example), should not use a let-expression double_deref_with_member.desc -- a double-deref with an intervening member expression (p->field1->field2), should use a let-expression double_deref_with_pointer_arithmetic.desc -- a double-deref with intervening pointer arithmetic (p[idx1][idx2]), should use a let-expression *_single_alias.desc -- variants of the above where the first dereference points to a single possible object, so no let-expression is necessary diff --git a/regression/cbmc/double_deref/double_deref_with_cast.desc b/regression/cbmc/double_deref/double_deref_with_cast.desc index a3fc616f73d..e5b1f592416 100644 --- a/regression/cbmc/double_deref/double_deref_with_cast.desc +++ b/regression/cbmc/double_deref/double_deref_with_cast.desc @@ -1,10 +1,10 @@ CORE double_deref_with_cast.c --show-vcc -^\{-[0-9]+\} derefd_pointer::derefd_pointer!0#1 = -^\{1\} \(derefd_pointer::derefd_pointer!0#1 = address_of\(symex_dynamic::dynamic_object[1-9]+\) \? main::argc!0@1#1 = [12] : main::argc!0@1#1 = [12] +\{1\} \(cast\(main::1::pptr!0@1#2, signedbv\[32\]\*\*\) = address_of\(main::1::ptr2!0@1\) \? main::argc!0@1#1 = 2 \: main::argc!0@1#1 = 1\) ^EXIT=0$ ^SIGNAL=0$ -- +derefd_pointer::derefd_pointer -- See README for details about these tests diff --git a/src/pointer-analysis/value_set_dereference.cpp b/src/pointer-analysis/value_set_dereference.cpp index 6885e5bf559..66186c8d134 100644 --- a/src/pointer-analysis/value_set_dereference.cpp +++ b/src/pointer-analysis/value_set_dereference.cpp @@ -72,6 +72,26 @@ exprt value_set_dereferencet::dereference(const exprt &pointer) exprt false_case = dereference(if_expr.false_case()); return if_exprt(if_expr.cond(), true_case, false_case); } + else if(pointer.id() == ID_typecast) + { + const exprt *underlying = &pointer; + // Note this isn't quite the same as skip_typecast, which would also skip + // things such as int-to-ptr typecasts which we shouldn't ignore + while(underlying->id() == ID_typecast && + underlying->type().id() == ID_pointer) + { + underlying = &to_typecast_expr(*underlying).op(); + } + + if(underlying->id() == ID_if && underlying->type().id() == ID_pointer) + { + const auto &if_expr = to_if_expr(*underlying); + return if_exprt( + if_expr.cond(), + dereference(typecast_exprt(if_expr.true_case(), pointer.type())), + dereference(typecast_exprt(if_expr.false_case(), pointer.type()))); + } + } // type of the object const typet &type=pointer.type().subtype();