diff --git a/regression/contracts/assigns-local-composite/main.c b/regression/contracts/assigns-local-composite/main.c new file mode 100644 index 00000000000..7b102af7cdd --- /dev/null +++ b/regression/contracts/assigns-local-composite/main.c @@ -0,0 +1,104 @@ +struct st1 +{ + int a; + int arr[10]; +}; + +struct st2 +{ + int a; + struct st1 arr[10]; +}; + +struct st3 +{ + struct st1 *s1; + struct st2 *s2; + struct st1 arr1[10]; + struct st2 arr2[10]; +}; + +enum tagt +{ + CHAR, + INT, + CHAR_PTR, + INT_ARR, + STRUCT_ST1_ARR +}; + +// clang-format off +struct taggedt { + enum tagt tag; + union { + struct{ char a; }; + struct{ int b; }; + struct{ char *ptr; }; + struct{ int arr[10]; }; + struct{ struct st1 arr1[10]; }; + }; +}; +// clang-format on + +int foo(int i) __CPROVER_assigns() +{ + // all accesses to locals should pass + int arr[10]; + struct st1 s1; + struct st2 s2; + struct st1 dirty_s1; + struct st3 s3; + s3.s1 = &dirty_s1; + s3.s2 = malloc(sizeof(struct st2)); + + if(0 <= i && i < 10) + { + arr[i] = 0; + s1.a = 0; + s1.arr[i] = 0; + s2.a = 0; + s2.arr[i].a = 0; + s2.arr[i].arr[i] = 0; + s3.s1->a = 0; + s3.s1->arr[i] = 0; + s3.s2->a = 0; + s3.s2->arr[i].a = 0; + s3.s2->arr[i].arr[i] = 0; + *(&(s3.s2->arr[i].arr[0]) + i) = 0; + (&(s3.arr1[0]) + i)->arr[i] = 0; + (&((&(s3.arr2[0]) + i)->arr[i]))->a = 0; + } + + struct taggedt tagged; + switch(tagged.tag) + { + case CHAR: + tagged.a = 0; + break; + case INT: + tagged.b = 0; + break; + case CHAR_PTR: + tagged.ptr = 0; + break; + case INT_ARR: + if(0 <= i && i < 10) + tagged.arr[i] = 0; + break; + case STRUCT_ST1_ARR: + if(0 <= i && i < 10) + { + tagged.arr1[i].a = 0; + tagged.arr1[i].arr[i] = 0; + } + break; + } + + return 0; +} + +void main() +{ + int i; + foo(i); +} diff --git a/regression/contracts/assigns-local-composite/test.desc b/regression/contracts/assigns-local-composite/test.desc new file mode 100644 index 00000000000..1e4b211e77d --- /dev/null +++ b/regression/contracts/assigns-local-composite/test.desc @@ -0,0 +1,12 @@ +CORE +main.c +--enforce-contract foo +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +-- +Checks that assigns clause checking explicitly checks assignments to locally +declared symbols with composite types, when they are dirty. +Out of bounds accesses to locally declared arrays, structs, etc. +will be detected by assigns clause checking. diff --git a/regression/contracts/assigns_type_checking_valid_cases/main.c b/regression/contracts/assigns_type_checking_valid_cases/main.c index f1381d555e4..52b071790fa 100644 --- a/regression/contracts/assigns_type_checking_valid_cases/main.c +++ b/regression/contracts/assigns_type_checking_valid_cases/main.c @@ -41,6 +41,8 @@ void foo4(int a, int *b, int *c) __CPROVER_requires(c != NULL) void foo5(struct buf buffer) __CPROVER_assigns() { + // these are assignments to the function parameter which is a local symbol + // and should not generate checks buffer.data = NULL; buffer.len = 0; } diff --git a/regression/contracts/assigns_type_checking_valid_cases/test.desc b/regression/contracts/assigns_type_checking_valid_cases/test.desc index 2e6ca5c2349..588301c9ead 100644 --- a/regression/contracts/assigns_type_checking_valid_cases/test.desc +++ b/regression/contracts/assigns_type_checking_valid_cases/test.desc @@ -8,8 +8,6 @@ main.c ^\[foo3.assigns.\d+\] line \d+ Check that y is assignable: SUCCESS$ ^\[foo4.assigns.\d+\] line \d+ Check that \*c is assignable: SUCCESS$ ^\[foo4.assigns.\d+\] line \d+ Check that \*x is assignable: SUCCESS$ -^\[foo5.assigns.\d+\] line \d+ Check that buffer.data is assignable: SUCCESS$ -^\[foo5.assigns.\d+\] line \d+ Check that buffer.len is assignable: SUCCESS$ ^\[foo6.assigns.\d+\] line \d+ Check that \*buffer->data is assignable: SUCCESS$ ^\[foo6.assigns.\d+\] line \d+ Check that buffer->len is assignable: SUCCESS$ ^\[foo7.assigns.\d+\] line \d+ Check that \*buffer->data is assignable: SUCCESS$ diff --git a/src/goto-instrument/contracts/instrument_spec_assigns.cpp b/src/goto-instrument/contracts/instrument_spec_assigns.cpp index 7264dbd7998..974d0d62f74 100644 --- a/src/goto-instrument/contracts/instrument_spec_assigns.cpp +++ b/src/goto-instrument/contracts/instrument_spec_assigns.cpp @@ -25,6 +25,9 @@ Date: January 2022 #include "utils.h" +/// header for log messages +static const char LOG_HEADER[] = "assigns clause checking: "; + /// Pragma used to mark assignments to static locals that need to be propagated static const char PROPAGATE_STATIC_LOCAL_PRAGMA[] = "contracts:propagate-static-local"; @@ -694,8 +697,9 @@ exprt instrument_spec_assignst::inclusion_check_full( } if(allow_null_lhs) - return or_exprt{null_pointer(car.target_start_address()), - and_exprt{car.valid_var(), disjunction(disjuncts)}}; + return or_exprt{ + null_pointer(car.target_start_address()), + and_exprt{car.valid_var(), disjunction(disjuncts)}}; else return and_exprt{car.valid_var(), disjunction(disjuncts)}; } @@ -716,6 +720,8 @@ const car_exprt &instrument_spec_assignst::create_car_from_spec_assigns( } else { + log.debug() << LOG_HEADER << "creating CAR for assigns clause target " + << format(condition) << ": " << format(target) << messaget::eom; from_spec_assigns.insert({key, create_car_expr(condition, target)}); return from_spec_assigns.find(key)->second; } @@ -734,6 +740,8 @@ const car_exprt &instrument_spec_assignst::create_car_from_stack_alloc( } else { + log.debug() << LOG_HEADER << "creating CAR for stack-allocated target " + << format(target) << messaget::eom; from_stack_alloc.insert({target, create_car_expr(true_exprt{}, target)}); return from_stack_alloc.find(target)->second; } @@ -752,6 +760,8 @@ instrument_spec_assignst::create_car_from_heap_alloc(const exprt &target) } else { + log.debug() << LOG_HEADER << "creating CAR for heap-allocated target " + << format(target) << messaget::eom; from_heap_alloc.insert({target, create_car_expr(true_exprt{}, target)}); return from_heap_alloc.find(target)->second; } @@ -770,6 +780,8 @@ const car_exprt &instrument_spec_assignst::create_car_from_static_local( } else { + log.debug() << LOG_HEADER << "creating CAR for static local target " + << format(target) << messaget::eom; from_static_local.insert({target, create_car_expr(true_exprt{}, target)}); return from_static_local.find(target)->second; } @@ -809,44 +821,107 @@ bool instrument_spec_assignst::must_check_assign( skip_function_paramst skip_function_params, const optionalt cfg_info_opt) { - if( - const auto &symbol_expr = - expr_try_dynamic_cast(target->assign_lhs())) + log.debug().source_location = target->source_location(); + + if(can_cast_expr(target->assign_lhs())) { + const auto &symbol_expr = to_symbol_expr(target->assign_lhs()); if( skip_function_params == skip_function_paramst::NO && - ns.lookup(symbol_expr->get_identifier()).is_parameter) + ns.lookup(symbol_expr.get_identifier()).is_parameter) { + log.debug() << LOG_HEADER << "checking assignment to function parameter " + << format(symbol_expr) << messaget::eom; return true; } if(cfg_info_opt.has_value()) - return !cfg_info_opt.value().is_local(symbol_expr->get_identifier()); + { + if(cfg_info_opt.value().is_local(symbol_expr.get_identifier())) + { + log.debug() << LOG_HEADER + << "skipping checking on assignment to local symbol " + << format(symbol_expr) << messaget::eom; + return false; + } + else + { + log.debug() << LOG_HEADER << "checking assignment to non-local symbol " + << format(symbol_expr) << messaget::eom; + return true; + } + } + log.debug() << LOG_HEADER << "checking assignment to symbol " + << format(symbol_expr) << messaget::eom; + return true; + } + else + { + // This is not a mere symbol. + // Since non-dirty locals are not tracked explicitly in the write set, + // we need to skip the check if we can verify that the expression describes + // an access to a non-dirty local symbol or an input parameter, + // otherwise the check will fail. + // In addition, the expression shall not contain address_of or dereference + // operators, regardless of the base symbol/object on which they apply. + // If the expression contains an address_of operation, the assignment gets + // checked. If the base object is a local or a parameter, it will also be + // flagged as dirty and will be tracked explicitly, and the check will pass. + // If the expression contains a dereference operation, the assignment gets + // checked. If the dereferenced address was computed from a local object, + // from a function parameter or returned by a local malloc, + // then the object will be tracked explicitly and the check will pass. + // In all other cases (address of a non-local object, or dereference of + // a non-locally computed address) the location must be given explicitly + // in the assigns clause to be tracked and we must check the assignment. + if( + cfg_info_opt.has_value() && + cfg_info_opt.value().is_local_composite_access(target->assign_lhs())) + { + log.debug() + << LOG_HEADER + << "skipping check on assignment to local composite member expression " + << format(target->assign_lhs()) << messaget::eom; + return false; + } + log.debug() << LOG_HEADER << "checking assignment to expression " + << format(target->assign_lhs()) << messaget::eom; + return true; } +} - return true; +/// Track the symbol iff we have no cfg_infot, or we have a cfg_infot and the +/// symbol is not a local or is a dirty local. +bool instrument_spec_assignst::must_track_decl_or_dead( + const irep_idt &ident, + const optionalt &cfg_info_opt) const +{ + return !cfg_info_opt.has_value() || + (cfg_info_opt.has_value() && + cfg_info_opt.value().is_not_local_or_dirty_local(ident)); } -/// Returns true iff a `DECL x` must be added to the local write set. -/// -/// A variable is called 'dirty' if its address gets taken at some point in -/// the program. -/// -/// Assuming the goto program is obtained from a structured C program that -/// passed C compiler checks, non-dirty variables can only be assigned to -/// directly by name, cannot escape their lexical scope, and are always safe -/// to assign. Hence, we only track dirty variables in the write set. +/// Returns true iff a `DECL x` must be explicitly tracked in the write set. bool instrument_spec_assignst::must_track_decl( const goto_programt::const_targett &target, const optionalt &cfg_info_opt) const { - if(cfg_info_opt.has_value()) + log.debug().source_location = target->source_location(); + if(must_track_decl_or_dead( + target->decl_symbol().get_identifier(), cfg_info_opt)) { - return cfg_info_opt.value().is_not_local_or_dirty_local( - target->decl_symbol().get_identifier()); + log.debug() << LOG_HEADER << "explicitly tracking " + << format(target->decl_symbol()) << " as assignable" + << messaget::eom; + return true; + } + else + { + log.debug() << LOG_HEADER << "implicitly tracking " + << format(target->decl_symbol()) + << " as assignable (non-dirty local)" << messaget::eom; + return false; } - // Unless proved non-dirty by the CFG analysis we assume it is dirty. - return true; } /// Returns true iff a `DEAD x` must be processed to upate the local write set. @@ -855,12 +930,8 @@ bool instrument_spec_assignst::must_track_dead( const goto_programt::const_targett &target, const optionalt &cfg_info_opt) const { - // Unless proved non-dirty by the CFG analysis we assume it is dirty. - if(!cfg_info_opt.has_value()) - return true; - - return cfg_info_opt.value().is_not_local_or_dirty_local( - target->dead_symbol().get_identifier()); + return must_track_decl_or_dead( + target->dead_symbol().get_identifier(), cfg_info_opt); } void instrument_spec_assignst::instrument_assign_statement( diff --git a/src/goto-instrument/contracts/instrument_spec_assigns.h b/src/goto-instrument/contracts/instrument_spec_assigns.h index f5db34c9ab4..7a6d000ad6b 100644 --- a/src/goto-instrument/contracts/instrument_spec_assigns.h +++ b/src/goto-instrument/contracts/instrument_spec_assigns.h @@ -574,25 +574,31 @@ class instrument_spec_assignst const car_exprt &freed_car, goto_programt &dest) const; - /// Returns true iff a `DECL x` must be added to the local write set. - /// - /// A variable is called 'dirty' if its address gets taken at some point in - /// the program. - /// - /// Assuming the goto program is obtained from a structured C program that - /// passed C compiler checks, non-dirty variables can only be assigned to - /// directly by name, cannot escape their lexical scope, and are always safe - /// to assign. Hence, we only track dirty variables in the write set. + /// Returns true iff a `DECL x` must be explicitly added to the write set. + /// @see must_track_decl_or_dead. bool must_track_decl( const goto_programt::const_targett &target, const optionalt &cfg_info_opt) const; - /// Returns true iff a `DEAD x` must be processed to update the local write - /// set. The conditions are the same than for tracking a `DECL x` instruction. + /// Returns true iff a `DEAD x` must be processed to update the write set. + /// @see must_track_decl_or_dead. bool must_track_dead( const goto_programt::const_targett &target, const optionalt &cfg_info_opt) const; + /// \brief Returns true iff a function-local symbol must be tracked. + /// + /// A local is called 'dirty' if its address gets taken at some point in + /// the program. + /// + /// Assuming the goto program is obtained from a structured C program that + /// passed C compiler checks, non-dirty locals can only be assigned to + /// directly by name, cannot escape their lexical scope, and are always safe + /// to assign. Hence, we only track dirty locals in the write set. + bool must_track_decl_or_dead( + const irep_idt &ident, + const optionalt &cfg_info_opt) const; + /// Returns true iff an `ASSIGN lhs := rhs` instruction must be instrumented. bool must_check_assign( const goto_programt::const_targett &target, diff --git a/src/goto-instrument/contracts/utils.h b/src/goto-instrument/contracts/utils.h index 9487ddb9475..5d9b2136853 100644 --- a/src/goto-instrument/contracts/utils.h +++ b/src/goto-instrument/contracts/utils.h @@ -11,6 +11,7 @@ Date: September 2021 #ifndef CPROVER_GOTO_INSTRUMENT_CONTRACTS_UTILS_H #define CPROVER_GOTO_INSTRUMENT_CONTRACTS_UTILS_H +// clang-format off #include #include @@ -22,7 +23,9 @@ Date: September 2021 #include #include +#include #include +// clang-format on /// \brief A class that overrides the low-level havocing functions in the base /// utility class, to havoc only when expressions point to valid memory, @@ -190,6 +193,65 @@ class cfg_infot return locals.is_local(ident) || symbol.is_parameter; } + /// Returns true iff `expr` is an access to a locally declared symbol + /// or parameter symbol, without any dereference/address_of operations. + bool is_local_composite_access(const exprt &expr) const + { + // case-splitting on the lhs structure copied from symex_assignt::assign_rec + if(expr.id() == ID_symbol) + { + return is_local(to_symbol_expr(expr).get_identifier()); + } + else if(expr.id() == ID_index) + { + return is_local_composite_access(to_index_expr(expr).array()); + } + else if(expr.id() == ID_member) + { + const typet &type = to_member_expr(expr).struct_op().type(); + if( + type.id() == ID_struct || type.id() == ID_struct_tag || + type.id() == ID_union || type.id() == ID_union_tag) + { + return is_local_composite_access(to_member_expr(expr).compound()); + } + else + { + throw unsupported_operation_exceptiont( + "is_local_composite_access: unexpected assignment to member of '" + + type.id_string() + "'"); + } + } + else if(expr.id() == ID_if) + { + return is_local_composite_access(to_if_expr(expr).true_case()) && + is_local_composite_access(to_if_expr(expr).false_case()); + } + else if(expr.id() == ID_typecast) + { + return is_local_composite_access(to_typecast_expr(expr).op()); + } + else if( + expr.id() == ID_byte_extract_little_endian || + expr.id() == ID_byte_extract_big_endian) + { + return is_local_composite_access(to_byte_extract_expr(expr).op()); + } + else if(expr.id() == ID_complex_real) + { + return is_local_composite_access(to_complex_real_expr(expr).op()); + } + else if(expr.id() == ID_complex_imag) + { + return is_local_composite_access(to_complex_imag_expr(expr).op()); + } + else + { + // matches ID_address_of, ID_dereference, etc. + return false; + } + } + /// returns the current target instruction const goto_programt::targett &get_current_target() const {