From d0b5fb408543ca2a6e06110e5380d9966320ceb8 Mon Sep 17 00:00:00 2001 From: Remi Delmas Date: Wed, 20 Apr 2022 18:11:29 -0400 Subject: [PATCH] CONTRACTS: handle locals with composite types Detect and skip checking assignments to members of local symbols that have composite types. Added debug output prints about tracked locations, detected local statics, checked or skipped assignments. --- .../contracts/assigns-local-composite/main.c | 104 ++++++++++++++ .../assigns-local-composite/test.desc | 12 ++ .../assigns_type_checking_valid_cases/main.c | 2 + .../test.desc | 2 - .../contracts/instrument_spec_assigns.cpp | 127 ++++++++++++++---- .../contracts/instrument_spec_assigns.h | 28 ++-- src/goto-instrument/contracts/utils.h | 62 +++++++++ 7 files changed, 296 insertions(+), 41 deletions(-) create mode 100644 regression/contracts/assigns-local-composite/main.c create mode 100644 regression/contracts/assigns-local-composite/test.desc diff --git a/regression/contracts/assigns-local-composite/main.c b/regression/contracts/assigns-local-composite/main.c new file mode 100644 index 00000000000..7b102af7cdd --- /dev/null +++ b/regression/contracts/assigns-local-composite/main.c @@ -0,0 +1,104 @@ +struct st1 +{ + int a; + int arr[10]; +}; + +struct st2 +{ + int a; + struct st1 arr[10]; +}; + +struct st3 +{ + struct st1 *s1; + struct st2 *s2; + struct st1 arr1[10]; + struct st2 arr2[10]; +}; + +enum tagt +{ + CHAR, + INT, + CHAR_PTR, + INT_ARR, + STRUCT_ST1_ARR +}; + +// clang-format off +struct taggedt { + enum tagt tag; + union { + struct{ char a; }; + struct{ int b; }; + struct{ char *ptr; }; + struct{ int arr[10]; }; + struct{ struct st1 arr1[10]; }; + }; +}; +// clang-format on + +int foo(int i) __CPROVER_assigns() +{ + // all accesses to locals should pass + int arr[10]; + struct st1 s1; + struct st2 s2; + struct st1 dirty_s1; + struct st3 s3; + s3.s1 = &dirty_s1; + s3.s2 = malloc(sizeof(struct st2)); + + if(0 <= i && i < 10) + { + arr[i] = 0; + s1.a = 0; + s1.arr[i] = 0; + s2.a = 0; + s2.arr[i].a = 0; + s2.arr[i].arr[i] = 0; + s3.s1->a = 0; + s3.s1->arr[i] = 0; + s3.s2->a = 0; + s3.s2->arr[i].a = 0; + s3.s2->arr[i].arr[i] = 0; + *(&(s3.s2->arr[i].arr[0]) + i) = 0; + (&(s3.arr1[0]) + i)->arr[i] = 0; + (&((&(s3.arr2[0]) + i)->arr[i]))->a = 0; + } + + struct taggedt tagged; + switch(tagged.tag) + { + case CHAR: + tagged.a = 0; + break; + case INT: + tagged.b = 0; + break; + case CHAR_PTR: + tagged.ptr = 0; + break; + case INT_ARR: + if(0 <= i && i < 10) + tagged.arr[i] = 0; + break; + case STRUCT_ST1_ARR: + if(0 <= i && i < 10) + { + tagged.arr1[i].a = 0; + tagged.arr1[i].arr[i] = 0; + } + break; + } + + return 0; +} + +void main() +{ + int i; + foo(i); +} diff --git a/regression/contracts/assigns-local-composite/test.desc b/regression/contracts/assigns-local-composite/test.desc new file mode 100644 index 00000000000..1e4b211e77d --- /dev/null +++ b/regression/contracts/assigns-local-composite/test.desc @@ -0,0 +1,12 @@ +CORE +main.c +--enforce-contract foo +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +-- +Checks that assigns clause checking explicitly checks assignments to locally +declared symbols with composite types, when they are dirty. +Out of bounds accesses to locally declared arrays, structs, etc. +will be detected by assigns clause checking. diff --git a/regression/contracts/assigns_type_checking_valid_cases/main.c b/regression/contracts/assigns_type_checking_valid_cases/main.c index f1381d555e4..52b071790fa 100644 --- a/regression/contracts/assigns_type_checking_valid_cases/main.c +++ b/regression/contracts/assigns_type_checking_valid_cases/main.c @@ -41,6 +41,8 @@ void foo4(int a, int *b, int *c) __CPROVER_requires(c != NULL) void foo5(struct buf buffer) __CPROVER_assigns() { + // these are assignments to the function parameter which is a local symbol + // and should not generate checks buffer.data = NULL; buffer.len = 0; } diff --git a/regression/contracts/assigns_type_checking_valid_cases/test.desc b/regression/contracts/assigns_type_checking_valid_cases/test.desc index 2e6ca5c2349..588301c9ead 100644 --- a/regression/contracts/assigns_type_checking_valid_cases/test.desc +++ b/regression/contracts/assigns_type_checking_valid_cases/test.desc @@ -8,8 +8,6 @@ main.c ^\[foo3.assigns.\d+\] line \d+ Check that y is assignable: SUCCESS$ ^\[foo4.assigns.\d+\] line \d+ Check that \*c is assignable: SUCCESS$ ^\[foo4.assigns.\d+\] line \d+ Check that \*x is assignable: SUCCESS$ -^\[foo5.assigns.\d+\] line \d+ Check that buffer.data is assignable: SUCCESS$ -^\[foo5.assigns.\d+\] line \d+ Check that buffer.len is assignable: SUCCESS$ ^\[foo6.assigns.\d+\] line \d+ Check that \*buffer->data is assignable: SUCCESS$ ^\[foo6.assigns.\d+\] line \d+ Check that buffer->len is assignable: SUCCESS$ ^\[foo7.assigns.\d+\] line \d+ Check that \*buffer->data is assignable: SUCCESS$ diff --git a/src/goto-instrument/contracts/instrument_spec_assigns.cpp b/src/goto-instrument/contracts/instrument_spec_assigns.cpp index 7264dbd7998..974d0d62f74 100644 --- a/src/goto-instrument/contracts/instrument_spec_assigns.cpp +++ b/src/goto-instrument/contracts/instrument_spec_assigns.cpp @@ -25,6 +25,9 @@ Date: January 2022 #include "utils.h" +/// header for log messages +static const char LOG_HEADER[] = "assigns clause checking: "; + /// Pragma used to mark assignments to static locals that need to be propagated static const char PROPAGATE_STATIC_LOCAL_PRAGMA[] = "contracts:propagate-static-local"; @@ -694,8 +697,9 @@ exprt instrument_spec_assignst::inclusion_check_full( } if(allow_null_lhs) - return or_exprt{null_pointer(car.target_start_address()), - and_exprt{car.valid_var(), disjunction(disjuncts)}}; + return or_exprt{ + null_pointer(car.target_start_address()), + and_exprt{car.valid_var(), disjunction(disjuncts)}}; else return and_exprt{car.valid_var(), disjunction(disjuncts)}; } @@ -716,6 +720,8 @@ const car_exprt &instrument_spec_assignst::create_car_from_spec_assigns( } else { + log.debug() << LOG_HEADER << "creating CAR for assigns clause target " + << format(condition) << ": " << format(target) << messaget::eom; from_spec_assigns.insert({key, create_car_expr(condition, target)}); return from_spec_assigns.find(key)->second; } @@ -734,6 +740,8 @@ const car_exprt &instrument_spec_assignst::create_car_from_stack_alloc( } else { + log.debug() << LOG_HEADER << "creating CAR for stack-allocated target " + << format(target) << messaget::eom; from_stack_alloc.insert({target, create_car_expr(true_exprt{}, target)}); return from_stack_alloc.find(target)->second; } @@ -752,6 +760,8 @@ instrument_spec_assignst::create_car_from_heap_alloc(const exprt &target) } else { + log.debug() << LOG_HEADER << "creating CAR for heap-allocated target " + << format(target) << messaget::eom; from_heap_alloc.insert({target, create_car_expr(true_exprt{}, target)}); return from_heap_alloc.find(target)->second; } @@ -770,6 +780,8 @@ const car_exprt &instrument_spec_assignst::create_car_from_static_local( } else { + log.debug() << LOG_HEADER << "creating CAR for static local target " + << format(target) << messaget::eom; from_static_local.insert({target, create_car_expr(true_exprt{}, target)}); return from_static_local.find(target)->second; } @@ -809,44 +821,107 @@ bool instrument_spec_assignst::must_check_assign( skip_function_paramst skip_function_params, const optionalt cfg_info_opt) { - if( - const auto &symbol_expr = - expr_try_dynamic_cast(target->assign_lhs())) + log.debug().source_location = target->source_location(); + + if(can_cast_expr(target->assign_lhs())) { + const auto &symbol_expr = to_symbol_expr(target->assign_lhs()); if( skip_function_params == skip_function_paramst::NO && - ns.lookup(symbol_expr->get_identifier()).is_parameter) + ns.lookup(symbol_expr.get_identifier()).is_parameter) { + log.debug() << LOG_HEADER << "checking assignment to function parameter " + << format(symbol_expr) << messaget::eom; return true; } if(cfg_info_opt.has_value()) - return !cfg_info_opt.value().is_local(symbol_expr->get_identifier()); + { + if(cfg_info_opt.value().is_local(symbol_expr.get_identifier())) + { + log.debug() << LOG_HEADER + << "skipping checking on assignment to local symbol " + << format(symbol_expr) << messaget::eom; + return false; + } + else + { + log.debug() << LOG_HEADER << "checking assignment to non-local symbol " + << format(symbol_expr) << messaget::eom; + return true; + } + } + log.debug() << LOG_HEADER << "checking assignment to symbol " + << format(symbol_expr) << messaget::eom; + return true; + } + else + { + // This is not a mere symbol. + // Since non-dirty locals are not tracked explicitly in the write set, + // we need to skip the check if we can verify that the expression describes + // an access to a non-dirty local symbol or an input parameter, + // otherwise the check will fail. + // In addition, the expression shall not contain address_of or dereference + // operators, regardless of the base symbol/object on which they apply. + // If the expression contains an address_of operation, the assignment gets + // checked. If the base object is a local or a parameter, it will also be + // flagged as dirty and will be tracked explicitly, and the check will pass. + // If the expression contains a dereference operation, the assignment gets + // checked. If the dereferenced address was computed from a local object, + // from a function parameter or returned by a local malloc, + // then the object will be tracked explicitly and the check will pass. + // In all other cases (address of a non-local object, or dereference of + // a non-locally computed address) the location must be given explicitly + // in the assigns clause to be tracked and we must check the assignment. + if( + cfg_info_opt.has_value() && + cfg_info_opt.value().is_local_composite_access(target->assign_lhs())) + { + log.debug() + << LOG_HEADER + << "skipping check on assignment to local composite member expression " + << format(target->assign_lhs()) << messaget::eom; + return false; + } + log.debug() << LOG_HEADER << "checking assignment to expression " + << format(target->assign_lhs()) << messaget::eom; + return true; } +} - return true; +/// Track the symbol iff we have no cfg_infot, or we have a cfg_infot and the +/// symbol is not a local or is a dirty local. +bool instrument_spec_assignst::must_track_decl_or_dead( + const irep_idt &ident, + const optionalt &cfg_info_opt) const +{ + return !cfg_info_opt.has_value() || + (cfg_info_opt.has_value() && + cfg_info_opt.value().is_not_local_or_dirty_local(ident)); } -/// Returns true iff a `DECL x` must be added to the local write set. -/// -/// A variable is called 'dirty' if its address gets taken at some point in -/// the program. -/// -/// Assuming the goto program is obtained from a structured C program that -/// passed C compiler checks, non-dirty variables can only be assigned to -/// directly by name, cannot escape their lexical scope, and are always safe -/// to assign. Hence, we only track dirty variables in the write set. +/// Returns true iff a `DECL x` must be explicitly tracked in the write set. bool instrument_spec_assignst::must_track_decl( const goto_programt::const_targett &target, const optionalt &cfg_info_opt) const { - if(cfg_info_opt.has_value()) + log.debug().source_location = target->source_location(); + if(must_track_decl_or_dead( + target->decl_symbol().get_identifier(), cfg_info_opt)) { - return cfg_info_opt.value().is_not_local_or_dirty_local( - target->decl_symbol().get_identifier()); + log.debug() << LOG_HEADER << "explicitly tracking " + << format(target->decl_symbol()) << " as assignable" + << messaget::eom; + return true; + } + else + { + log.debug() << LOG_HEADER << "implicitly tracking " + << format(target->decl_symbol()) + << " as assignable (non-dirty local)" << messaget::eom; + return false; } - // Unless proved non-dirty by the CFG analysis we assume it is dirty. - return true; } /// Returns true iff a `DEAD x` must be processed to upate the local write set. @@ -855,12 +930,8 @@ bool instrument_spec_assignst::must_track_dead( const goto_programt::const_targett &target, const optionalt &cfg_info_opt) const { - // Unless proved non-dirty by the CFG analysis we assume it is dirty. - if(!cfg_info_opt.has_value()) - return true; - - return cfg_info_opt.value().is_not_local_or_dirty_local( - target->dead_symbol().get_identifier()); + return must_track_decl_or_dead( + target->dead_symbol().get_identifier(), cfg_info_opt); } void instrument_spec_assignst::instrument_assign_statement( diff --git a/src/goto-instrument/contracts/instrument_spec_assigns.h b/src/goto-instrument/contracts/instrument_spec_assigns.h index f5db34c9ab4..7a6d000ad6b 100644 --- a/src/goto-instrument/contracts/instrument_spec_assigns.h +++ b/src/goto-instrument/contracts/instrument_spec_assigns.h @@ -574,25 +574,31 @@ class instrument_spec_assignst const car_exprt &freed_car, goto_programt &dest) const; - /// Returns true iff a `DECL x` must be added to the local write set. - /// - /// A variable is called 'dirty' if its address gets taken at some point in - /// the program. - /// - /// Assuming the goto program is obtained from a structured C program that - /// passed C compiler checks, non-dirty variables can only be assigned to - /// directly by name, cannot escape their lexical scope, and are always safe - /// to assign. Hence, we only track dirty variables in the write set. + /// Returns true iff a `DECL x` must be explicitly added to the write set. + /// @see must_track_decl_or_dead. bool must_track_decl( const goto_programt::const_targett &target, const optionalt &cfg_info_opt) const; - /// Returns true iff a `DEAD x` must be processed to update the local write - /// set. The conditions are the same than for tracking a `DECL x` instruction. + /// Returns true iff a `DEAD x` must be processed to update the write set. + /// @see must_track_decl_or_dead. bool must_track_dead( const goto_programt::const_targett &target, const optionalt &cfg_info_opt) const; + /// \brief Returns true iff a function-local symbol must be tracked. + /// + /// A local is called 'dirty' if its address gets taken at some point in + /// the program. + /// + /// Assuming the goto program is obtained from a structured C program that + /// passed C compiler checks, non-dirty locals can only be assigned to + /// directly by name, cannot escape their lexical scope, and are always safe + /// to assign. Hence, we only track dirty locals in the write set. + bool must_track_decl_or_dead( + const irep_idt &ident, + const optionalt &cfg_info_opt) const; + /// Returns true iff an `ASSIGN lhs := rhs` instruction must be instrumented. bool must_check_assign( const goto_programt::const_targett &target, diff --git a/src/goto-instrument/contracts/utils.h b/src/goto-instrument/contracts/utils.h index 9487ddb9475..5d9b2136853 100644 --- a/src/goto-instrument/contracts/utils.h +++ b/src/goto-instrument/contracts/utils.h @@ -11,6 +11,7 @@ Date: September 2021 #ifndef CPROVER_GOTO_INSTRUMENT_CONTRACTS_UTILS_H #define CPROVER_GOTO_INSTRUMENT_CONTRACTS_UTILS_H +// clang-format off #include #include @@ -22,7 +23,9 @@ Date: September 2021 #include #include +#include #include +// clang-format on /// \brief A class that overrides the low-level havocing functions in the base /// utility class, to havoc only when expressions point to valid memory, @@ -190,6 +193,65 @@ class cfg_infot return locals.is_local(ident) || symbol.is_parameter; } + /// Returns true iff `expr` is an access to a locally declared symbol + /// or parameter symbol, without any dereference/address_of operations. + bool is_local_composite_access(const exprt &expr) const + { + // case-splitting on the lhs structure copied from symex_assignt::assign_rec + if(expr.id() == ID_symbol) + { + return is_local(to_symbol_expr(expr).get_identifier()); + } + else if(expr.id() == ID_index) + { + return is_local_composite_access(to_index_expr(expr).array()); + } + else if(expr.id() == ID_member) + { + const typet &type = to_member_expr(expr).struct_op().type(); + if( + type.id() == ID_struct || type.id() == ID_struct_tag || + type.id() == ID_union || type.id() == ID_union_tag) + { + return is_local_composite_access(to_member_expr(expr).compound()); + } + else + { + throw unsupported_operation_exceptiont( + "is_local_composite_access: unexpected assignment to member of '" + + type.id_string() + "'"); + } + } + else if(expr.id() == ID_if) + { + return is_local_composite_access(to_if_expr(expr).true_case()) && + is_local_composite_access(to_if_expr(expr).false_case()); + } + else if(expr.id() == ID_typecast) + { + return is_local_composite_access(to_typecast_expr(expr).op()); + } + else if( + expr.id() == ID_byte_extract_little_endian || + expr.id() == ID_byte_extract_big_endian) + { + return is_local_composite_access(to_byte_extract_expr(expr).op()); + } + else if(expr.id() == ID_complex_real) + { + return is_local_composite_access(to_complex_real_expr(expr).op()); + } + else if(expr.id() == ID_complex_imag) + { + return is_local_composite_access(to_complex_imag_expr(expr).op()); + } + else + { + // matches ID_address_of, ID_dereference, etc. + return false; + } + } + /// returns the current target instruction const goto_programt::targett &get_current_target() const {