Skip to content

Use let-expression for dereferenced pointers #4576

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions regression/cbmc/double_deref/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
This is a batch of tests checking cbmc's behaviour against expressions with more than one deref
operator, such as **p, *(int*)*p, p->field1->field2, etc. At present a directly nested dereference
(**p) is handled by pushing the second deref inside the result of the first. Example:

**p ==>
*(p == &o1 ? o1 : o2) ==>
(p == &o1 ? *o1 : *o2) ==>
(p == &o1 ? (o1 == &o3 ? o3 : o4) : (o2 == &o5 ? o5 : o6))

This deref operator push-in is performed (implicitly) by value_set_dereferencet::dereference, which special-cases
the ternary conditional expression and typecasts of ternary conditionals.
More complicated expressions are handled using a let-expression. Example:

p->field1->field2 ==>
((p == &o1 ? o1 : o2).field1)->field2 ==>
let derefd_pointer = ((p == &o1 ? o1 : o2).field1) in (derefd_pointer == &o3 ? o3 : derefd_pointer == &o4 ? o4 : derefd_pointer == &o5 ? o5 : o6)

Symex executes this let-expression as an auxiliary assignment, such as
derefd_pointer = ((p == &o1 ? o1 : o2).field1)
result = (derefd_pointer == &o3 ? o3 : derefd_pointer == &o4 ? o4 : derefd_pointer == &o5 ? o5 : o6)

The tests in this directory check that auxiliary let-expressions like this are only used when appropriate by inspecting formula VCCs:
double_deref.desc -- a directly nested double-dereference, should not use a let-expression
double_deref_with_cast.desc -- a double-deref with an intervening cast (*(int*)*p for example), should not use a let-expression
double_deref_with_member.desc -- a double-deref with an intervening member expression (p->field1->field2), should use a let-expression
double_deref_with_pointer_arithmetic.desc -- a double-deref with intervening pointer arithmetic (p[idx1][idx2]), should use a let-expression
*_single_alias.desc -- variants of the above where the first dereference points to a single possible object, so no let-expression is necessary
16 changes: 16 additions & 0 deletions regression/cbmc/double_deref/double_deref.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@

#include <assert.h>
#include <stdlib.h>

int main(int argc, char **argv)
{
int **pptr;
int *ptr1 = (int *)malloc(sizeof(int));
*ptr1 = 1;
int *ptr2 = (int *)malloc(sizeof(int));
*ptr2 = 2;

pptr = (argc == 5 ? &ptr1 : &ptr2);

assert(**pptr == argc);
}
10 changes: 10 additions & 0 deletions regression/cbmc/double_deref/double_deref.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
CORE
double_deref.c
--show-vcc
\{1\} \(main::1::pptr!0@1#2 = address_of\(main::1::ptr[12]!0@1\) \? main::argc!0@1#1 = [12] : main::argc!0@1#1 = [12]\)
^EXIT=0$
^SIGNAL=0$
--
derefd_pointer::derefd_pointer
--
See README for details about these tests
14 changes: 14 additions & 0 deletions regression/cbmc/double_deref/double_deref_single_alias.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@

#include <assert.h>
#include <stdlib.h>

int main(int argc, char **argv)
{
int **pptr;
int *ptr1 = (int *)malloc(sizeof(int));
*ptr1 = 1;

pptr = &ptr1;

assert(**pptr == argc);
}
10 changes: 10 additions & 0 deletions regression/cbmc/double_deref/double_deref_single_alias.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
CORE
double_deref_single_alias.c
--show-vcc
\{1\} main::argc!0@1#1 = 1
^EXIT=0$
^SIGNAL=0$
--
derefd_pointer::derefd_pointer
--
See README for details about these tests
16 changes: 16 additions & 0 deletions regression/cbmc/double_deref/double_deref_with_cast.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@

#include <assert.h>
#include <stdlib.h>

int main(int argc, char **argv)
{
void **pptr;
int *ptr1 = (int *)malloc(sizeof(int));
*ptr1 = 1;
int *ptr2 = (int *)malloc(sizeof(int));
*ptr2 = 2;

pptr = (argc == 1 ? &ptr1 : &ptr2);

assert(*(int *)*pptr == argc);
}
10 changes: 10 additions & 0 deletions regression/cbmc/double_deref/double_deref_with_cast.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
CORE
double_deref_with_cast.c
--show-vcc
\{1\} \(cast\(main::1::pptr!0@1#2, signedbv\[32\]\*\*\) = address_of\(main::1::ptr2!0@1\) \? main::argc!0@1#1 = 2 \: main::argc!0@1#1 = 1\)
^EXIT=0$
^SIGNAL=0$
--
derefd_pointer::derefd_pointer
--
See README for details about these tests
14 changes: 14 additions & 0 deletions regression/cbmc/double_deref/double_deref_with_cast_single_alias.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@

#include <assert.h>
#include <stdlib.h>

int main(int argc, char **argv)
{
void **pptr;
int *ptr1 = (int *)malloc(sizeof(int));
*ptr1 = 1;

pptr = &ptr1;

assert(*(int *)*pptr == argc);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
CORE
double_deref_with_cast_single_alias.c
--show-vcc
\{1\} main::argc!0@1#1 = 1
^EXIT=0$
^SIGNAL=0$
--
derefd_pointer::derefd_pointer
--
See README for details about these tests
22 changes: 22 additions & 0 deletions regression/cbmc/double_deref/double_deref_with_member.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@

#include <assert.h>
#include <stdlib.h>

struct container
{
int *iptr;
};

int main(int argc, char **argv)
{
struct container *cptr;
struct container container1, container2;
container1.iptr = (int *)malloc(sizeof(int));
*container1.iptr = 1;
container2.iptr = (int *)malloc(sizeof(int));
*container2.iptr = 2;

cptr = (argc == 1 ? &container1 : &container2);

assert(*(cptr->iptr) == argc);
}
10 changes: 10 additions & 0 deletions regression/cbmc/double_deref/double_deref_with_member.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
CORE
double_deref_with_member.c
--show-vcc
^\{-[0-9]+\} derefd_pointer::derefd_pointer!0#1 = \(main::1::cptr!0@1#2 = address_of\(main::1::container[12]!0@1\) \? address_of\(symex_dynamic::dynamic_object[12]\) : address_of\(symex_dynamic::dynamic_object[12]\)\)
^\{1\} \(derefd_pointer::derefd_pointer!0#1 = address_of\(symex_dynamic::dynamic_object[12]\) \? main::argc!0@1#1 = 2 : main::argc!0@1#1 = 1\)
^EXIT=0$
^SIGNAL=0$
--
--
See README for details about these tests
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@

#include <assert.h>
#include <stdlib.h>

struct container
{
int *iptr;
};

int main(int argc, char **argv)
{
struct container *cptr;
struct container container1;
container1.iptr = (int *)malloc(sizeof(int));
*container1.iptr = 1;

cptr = &container1;

assert(*(cptr->iptr) == argc);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
CORE
double_deref_with_member_single_alias.c
--show-vcc
\{1\} main::argc!0@1#1 = 1
^EXIT=0$
^SIGNAL=0$
--
derefd_pointer::derefd_pointer
--
See README for details about these tests
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@

#include <assert.h>
#include <stdlib.h>

struct container
{
int *iptr;
};

int main(int argc, char **argv)
{
int **new_ptrs = malloc(2 * sizeof(int *));
int *iptr1 = (int *)malloc(sizeof(int));
*iptr1 = 1;
int *iptr2 = (int *)malloc(sizeof(int));
*iptr2 = 2;

new_ptrs[argc % 2] = iptr1;
new_ptrs[1 - (argc % 2)] = iptr2;

assert(*(new_ptrs[argc % 2]) == argc);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
CORE
double_deref_with_pointer_arithmetic.c
--show-vcc
^\{-[0-9]+\} derefd_pointer::derefd_pointer!0#1 = symex_dynamic::dynamic_object1#3\[cast\(mod #source_location=""\(main::argc!0@1#1, 2\), signedbv\[64\]\)\]
^\{1\} \(derefd_pointer::derefd_pointer!0#1 = address_of\(symex_dynamic::dynamic_object[23]\) \? main::argc!0@1#1 = 2 : main::argc!0@1#1 = 1\)
^EXIT=0$
^SIGNAL=0$
--
--
See README for details about these tests
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@

#include <assert.h>
#include <stdlib.h>

struct container
{
int *iptr;
};

int main(int argc, char **argv)
{
int **new_ptrs = malloc(2 * sizeof(int *));
int *iptr1 = (int *)malloc(sizeof(int));
*iptr1 = 1;

new_ptrs[argc % 2] = iptr1;

assert(*(new_ptrs[argc % 2]) == argc);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
CORE
double_deref_with_pointer_arithmetic_single_alias.c
--show-vcc
\{1\} main::argc!0@1#1 = 1
^EXIT=0$
^SIGNAL=0$
--
derefd_pointer::derefd_pointer
--
See README for details about these tests
46 changes: 44 additions & 2 deletions src/goto-symex/goto_symex.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,15 +218,31 @@ class goto_symext
const get_goto_functiont &get_goto_function);

/// \brief Called for each step in the symbolic execution
/// This calls \ref print_symex_step to print symex's current instruction if
/// required, then \ref execute_next_instruction to execute the actual
/// instruction body.
/// \param get_goto_function: The delegate to retrieve function bodies (see
/// \ref get_goto_functiont)
/// \param state: Symbolic execution state for current instruction
virtual void
symex_step(const get_goto_functiont &get_goto_function, statet &state);

/// \brief Executes the instruction `state.source.pc`
/// Case-switches over the type of the instruction being executed and calls
/// another function appropriate to the instruction type, for example
/// \ref symex_function_call if the current instruction is a function call,
/// \ref symex_goto if the current instruction is a goto, etc.
/// \param get_goto_function: The delegate to retrieve function bodies (see
/// \ref get_goto_functiont)
/// \param state: Symbolic execution state for current instruction
virtual void
symex_step(const get_goto_functiont &get_goto_function, statet &state);
void execute_next_instruction(
const get_goto_functiont &get_goto_function,
statet &state);

/// Kills any variables in \ref instruction_local_symbols (these are currently
/// always let-bound variables defined in the course of executing the current
/// instruction), then clears \ref instruction_local_symbols.
void kill_instruction_local_symbols(statet &state);

/// Prints the route of symex as it walks through the code. Used for
/// debugging.
Expand Down Expand Up @@ -270,6 +286,11 @@ class goto_symext
/// instruction
unsigned atomic_section_counter;

/// Variables that should be killed at the end of the current symex_step
/// invocation. Currently this is used for let-bound variables executed during
/// symex, whose lifetime is at most one instruction long.
std::vector<symbol_exprt> instruction_local_symbols;

/// The messaget to write log messages to
mutable messaget log;

Expand All @@ -288,6 +309,11 @@ class goto_symext

void trigger_auto_object(const exprt &, statet &);
void initialize_auto_object(const exprt &, statet &);

/// Given an expression, find the root object and the offset into it.
///
/// The extra complication to be considered here is that the expression may
/// have any number of ternary expressions mixed with type casts.
void process_array_expr(statet &, exprt &);
exprt make_auto_object(const typet &, statet &);
virtual void dereference(exprt &, statet &, bool write);
Expand Down Expand Up @@ -321,6 +347,10 @@ class goto_symext
/// Symbolically execute a DEAD instruction
/// \param state: Symbolic execution state for current instruction
virtual void symex_dead(statet &state);
/// Kill a symbol, as if it had been the subject of a DEAD instruction
/// \param state: Symbolic execution state
/// \param symbol_expr: Symbol to kill
void symex_dead(statet &state, const symbol_exprt &symbol_expr);
/// Symbolically execute an OTHER instruction
/// \param state: Symbolic execution state for current instruction
virtual void symex_other(statet &state);
Expand Down Expand Up @@ -503,6 +533,18 @@ class goto_symext

typedef symex_targett::assignment_typet assignment_typet;

/// Execute any let expressions in \p expr using \ref symex_assign_symbol.
/// The assignments will be made in bottom-up topological but otherwise
/// arbitrary order (i.e. in `(let x = let y = 0 in x + y) + (let z = 0 in z)
/// we will define `y` before `x`, but `z` and `x` could come in either order)
void lift_lets(statet &, exprt &);

/// Execute a single let expression, which should not have any nested let
/// expressions (use \ref lift_lets instead if there might be).
/// Records the newly-defined variable in \ref instruction_local_symbols,
/// meaning it will be killed when \ref symex_step concludes.
void lift_let(statet &state, const let_exprt &let_expr);

void symex_assign_rec(
statet &,
const exprt &lhs,
Expand Down
Loading