diff --git a/jbmc/regression/jbmc/virtual_filter_value_sets/A.class b/jbmc/regression/jbmc/virtual_filter_value_sets/A.class new file mode 100644 index 00000000000..d83b016d0dc Binary files /dev/null and b/jbmc/regression/jbmc/virtual_filter_value_sets/A.class differ diff --git a/jbmc/regression/jbmc/virtual_filter_value_sets/B.class b/jbmc/regression/jbmc/virtual_filter_value_sets/B.class new file mode 100644 index 00000000000..7f595dd7ae7 Binary files /dev/null and b/jbmc/regression/jbmc/virtual_filter_value_sets/B.class differ diff --git a/jbmc/regression/jbmc/virtual_filter_value_sets/Container.class b/jbmc/regression/jbmc/virtual_filter_value_sets/Container.class new file mode 100644 index 00000000000..abd4fedca0a Binary files /dev/null and b/jbmc/regression/jbmc/virtual_filter_value_sets/Container.class differ diff --git a/jbmc/regression/jbmc/virtual_filter_value_sets/test_with_deref.desc b/jbmc/regression/jbmc/virtual_filter_value_sets/test_with_deref.desc new file mode 100644 index 00000000000..be92ae0f2fb --- /dev/null +++ b/jbmc/regression/jbmc/virtual_filter_value_sets/test_with_deref.desc @@ -0,0 +1,18 @@ +CORE +virtual_filter_value_sets.class +--show-vcc --function virtual_filter_value_sets.test_with_deref +^EXIT=0$ +^SIGNAL=0$ +java::B\.f:\(\)I#return_value!0#.* = 9$ +-- +java::B\.f:\(\)I#return_value!0#.* = .*byte_extract_.*_endian +^warning: ignoring +-- +When B.f is called on `c.a_field`, it is guarded by a conditional which implies +that `c.a_field` must be a B. Symex should filter the value set for `c.a_field` +to remove anything which doesn't satisfy that condition. It will then be able +to determine that the return value of B.f is 9. If it thinks that `c.a_field` +might contain an A then it will create a more complicated expression for the +return value of B.f, which will include byte_extract_little_endian or +byte_extract_big_endian, as this is what it will produce when trying to read +the field `int flag` from a class that doesn't have any fields. diff --git a/jbmc/regression/jbmc/virtual_filter_value_sets/test_without_deref.desc b/jbmc/regression/jbmc/virtual_filter_value_sets/test_without_deref.desc new file mode 100644 index 00000000000..f1ae1c830e1 --- /dev/null +++ b/jbmc/regression/jbmc/virtual_filter_value_sets/test_without_deref.desc @@ -0,0 +1,18 @@ +CORE +virtual_filter_value_sets.class +--show-vcc --function virtual_filter_value_sets.test_without_deref +^EXIT=0$ +^SIGNAL=0$ +java::B\.f:\(\)I#return_value!0#.* = 9$ +-- +java::B\.f:\(\)I#return_value!0#.* = .*byte_extract_.*_endian +^warning: ignoring +-- +When B.f is called on `a_or_b`, it is guarded by a conditional which implies +that `a_or_b` must be a B. Symex should filter the value set for `a_or_b` to +remove anything which doesn't satisfy that condition. It will then be able +to determine that the return value of B.f is 9. If it thinks that `a_or_b` +might contain an A then it will create a more complicated expression for the +return value of B.f, which will include byte_extract_little_endian or +byte_extract_big_endian, as this is what it will produce when trying to read +the field `int flag` from a class that doesn't have any fields. diff --git a/jbmc/regression/jbmc/virtual_filter_value_sets/virtual_filter_value_sets.class b/jbmc/regression/jbmc/virtual_filter_value_sets/virtual_filter_value_sets.class new file mode 100644 index 00000000000..3de773d4e00 Binary files /dev/null and b/jbmc/regression/jbmc/virtual_filter_value_sets/virtual_filter_value_sets.class differ diff --git a/jbmc/regression/jbmc/virtual_filter_value_sets/virtual_filter_value_sets.java b/jbmc/regression/jbmc/virtual_filter_value_sets/virtual_filter_value_sets.java new file mode 100644 index 00000000000..cc2655e6b21 --- /dev/null +++ b/jbmc/regression/jbmc/virtual_filter_value_sets/virtual_filter_value_sets.java @@ -0,0 +1,40 @@ +class A { + public int f() { return 1; } +}; + +class B extends A { + public int flag; + + public int f() { return flag; } +}; + +class Container { + public A a_field; +} + +class virtual_filter_value_sets { + public static void test_without_deref(boolean nondet_bool) { + A a = new A(); + + B b = new B(); + b.flag = 9; + + A a_or_b = nondet_bool ? a : b; + int retval = a_or_b.f(); + + assert false; + } + + public static void test_with_deref(boolean nondet_bool) { + A a = new A(); + + B b = new B(); + b.flag = 9; + + Container c = new Container(); + c.a_field = nondet_bool ? a : b; + int retval = c.a_field.f(); + + assert false; + } +} diff --git a/regression/cbmc/CMakeLists.txt b/regression/cbmc/CMakeLists.txt index 8d7709afb89..b6c470209ec 100644 --- a/regression/cbmc/CMakeLists.txt +++ b/regression/cbmc/CMakeLists.txt @@ -5,7 +5,7 @@ add_test_pl_tests( add_test_pl_profile( "cbmc-paths-lifo" "$ --paths lifo" - "-C;-X;thorough-paths;-X;smt-backend;-s;paths-lifo" + "-C;-X;thorough-paths;-X;smt-backend;-X;paths-lifo-expected-failure;-s;paths-lifo" "CORE" ) diff --git a/regression/cbmc/Makefile b/regression/cbmc/Makefile index 2ff63786dca..83429155149 100644 --- a/regression/cbmc/Makefile +++ b/regression/cbmc/Makefile @@ -7,7 +7,7 @@ test-cprover-smt2: @../test.pl -e -p -c "../../../src/cbmc/cbmc --cprover-smt2" -X broken-smt-backend test-paths-lifo: - @../test.pl -e -p -c "../../../src/cbmc/cbmc --paths lifo" -X thorough-paths -X smt-backend + @../test.pl -e -p -c "../../../src/cbmc/cbmc --paths lifo" -X thorough-paths -X smt-backend -X paths-lifo-expected-failure tests.log: ../test.pl test diff --git a/regression/cbmc/symex_should_exclude_null_pointers/main.c b/regression/cbmc/symex_should_exclude_null_pointers/main.c index 4ee8d91e3bb..3ccab5c9585 100644 --- a/regression/cbmc/symex_should_exclude_null_pointers/main.c +++ b/regression/cbmc/symex_should_exclude_null_pointers/main.c @@ -1,3 +1,5 @@ +#include + static void noop() { } int main(int argc, char **argv) { @@ -5,12 +7,16 @@ int main(int argc, char **argv) { int x; int *maybe_null = argc & 1 ? &x : 0; - // Should work (guarded by assume): + // There are two features of symex that might exclude null pointers: local + // safe pointer analysis (LSPA for the rest of this file) and value-set + // filtering (i.e. goto_symext::try_filter_value_sets()). + + // Should be judged safe by LSPA and value-set filtering (guarded by assume): int *ptr1 = maybe_null; __CPROVER_assume(ptr1 != 0); int deref1 = *ptr1; - // Should work (guarded by else): + // Should be judged safe by LSPA and value-set filtering (guarded by else): int *ptr2 = maybe_null; if(ptr2 == 0) { @@ -20,14 +26,15 @@ int main(int argc, char **argv) { int deref2 = *ptr2; } - // Should work (guarded by if): + // Should be judged safe by LSPA and value-set filtering (guarded by if): int *ptr3 = maybe_null; if(ptr3 != 0) { int deref3 = *ptr3; } - // Shouldn't work yet despite being safe (guarded by backward goto): + // Should be judged unsafe by LSPA and safe by value-set filtering + // (guarded by backward goto): int *ptr4 = maybe_null; goto check; @@ -41,7 +48,8 @@ int main(int argc, char **argv) { end_test4: - // Shouldn't work yet despite being safe (guarded by confluence): + // Should be judged unsafe by LSPA and safe by value-set filtering + // (guarded by confluence): int *ptr5 = maybe_null; if(argc == 5) __CPROVER_assume(ptr5 != 0); @@ -49,26 +57,49 @@ int main(int argc, char **argv) { __CPROVER_assume(ptr5 != 0); int deref5 = *ptr5; - // Shouldn't work (unsafe as only guarded on one branch): + // Should be judged unsafe by LSPA (due to suspicion write to ptr5 might alter + // ptr6) and safe by value-set filtering: int *ptr6 = maybe_null; - if(argc == 6) - __CPROVER_assume(ptr6 != 0); - else - { - } + __CPROVER_assume(ptr6 != 0); + ptr5 = 0; int deref6 = *ptr6; - // Shouldn't work due to suspicion write to ptr6 might alter ptr7: + // Should be judged unsafe by LSPA (due to suspicion noop() call might alter + // ptr7) and safe by value-set filtering: int *ptr7 = maybe_null; __CPROVER_assume(ptr7 != 0); - ptr6 = 0; + noop(); int deref7 = *ptr7; - // Shouldn't work due to suspicion noop() call might alter ptr8: - int *ptr8 = maybe_null; - __CPROVER_assume(ptr8 != 0); - noop(); - int deref8 = *ptr8; + // Should be judged safe by LSPA and unsafe by value-set filtering (it + // isn't known what symbol *ptr_ptr8 refers to, so null can't be removed + // from a specific value set): + int r8_a = 1, r8_b = 2; + int *q8_a = argc & 2 ? &r8_a : 0; + int *q8_b = argc & 4 ? &r8_b : 0; + int **ptr8 = argc & 8 ? &q8_a : &q8_b; + __CPROVER_assume(*ptr8 != 0); + int deref8 = **ptr8; + + // Should be judged safe by LSPA and unsafe by value-set filtering (it + // isn't known what symbol *ptr_ptr9 refers to, so null can't be removed + // from a specific value set): + int r9_a = 1, r9_b = 2; + int *q9_a = argc & 2 ? &r9_a : 0; + int *q9_b = argc & 4 ? &r9_b : 0; + int **ptr9 = argc & 8 ? &q9_a : &q9_b; + if(*ptr9 != 0) + int deref9 = **ptr9; + + // Should be judged unsafe by LSPA and value-set filtering + // (unsafe as only guarded on one branch): + int *ptr10 = maybe_null; + if(argc == 10) + __CPROVER_assume(ptr10 != 0); + else + { + } + int deref10 = *ptr10; assert(0); } diff --git a/regression/cbmc/symex_should_exclude_null_pointers/test.desc b/regression/cbmc/symex_should_exclude_null_pointers/test.desc index e5075fa5763..493843ebfb7 100644 --- a/regression/cbmc/symex_should_exclude_null_pointers/test.desc +++ b/regression/cbmc/symex_should_exclude_null_pointers/test.desc @@ -3,15 +3,14 @@ main.c --show-vcc ^EXIT=0$ ^SIGNAL=0$ -ptr4\$object -ptr5\$object -ptr6\$object -ptr7\$object -ptr8\$object +ptr10\$object -- -ptr[1-3]\$object +ptr[1-9]\$object ^warning: ignoring -- ptrX\$object appearing in the VCCs indicates symex was unsure whether the pointer had a valid target, and uses the $object symbol as a referred object of last resort. ptr1-3 should be judged -not-null by symex, so their $object symbols do not occur. +not-null by local safe pointer analysis, so their $object symbols do not occur. ptr4-7 are +not caught by local safe pointer analysis but they are judged safe by value-set filtering, i.e. +goto_symext::try_filter_value_sets(). ptr8-9 are judged safe by local safe pointer analysis but +not value-set filtering. ptr10 is not judged safe by either because it is not safe. diff --git a/regression/cbmc/symex_should_filter_value_sets/main.c b/regression/cbmc/symex_should_filter_value_sets/main.c new file mode 100644 index 00000000000..6f9910184c3 --- /dev/null +++ b/regression/cbmc/symex_should_filter_value_sets/main.c @@ -0,0 +1,146 @@ +#include + +static void noop() +{ +} + +int main(int argc, char **argv) +{ + __CPROVER_assume(argc == 5); + + int a = 2; + int b = 1; + int *ptr_to_a_or_b = argv[0][0] == '0' ? &a : &b; + + // Should work (value-set filtered by assume): + int *p1 = ptr_to_a_or_b; + __CPROVER_assume(p1 != &a); + int c1 = *p1; + + int *p2 = ptr_to_a_or_b; + __CPROVER_assume(*p2 != 2); + int c2 = *p2; + + // Should work (value-set filtered by else): + int c3 = 0; + int *p3 = ptr_to_a_or_b; + if(p3 == &a) + { + } + else + { + c3 = *p3; + } + + int c4 = 0; + int *p4 = ptr_to_a_or_b; + if(*p4 == 2) + { + } + else + { + c4 = *p4; + } + + // Should work (value-set filtered by if): + int c5 = 0; + int *p5 = ptr_to_a_or_b; + if(p5 != &a) + { + c5 = *p5; + } + + int c6 = 0; + int *p6 = ptr_to_a_or_b; + if(*p6 != 2) + { + c6 = *p6; + } + + // Should work (value-set filtered by assume before a backward goto): + int *p7 = ptr_to_a_or_b; + goto check7; + +divide7: + int c7 = *p7; + goto end_test7; + +check7: + __CPROVER_assume(p7 != &a); + goto divide7; + +end_test7: + + int *p8 = ptr_to_a_or_b; + goto check8; + +divide8: + int c8 = *p8; + goto end_test8; + +check8: + __CPROVER_assume(*p8 != 2); + goto divide8; + +end_test8: + + // Should work (value-set filtered by confluence of if and else): + int *p9 = ptr_to_a_or_b; + if(argv[1][0] == '0') + __CPROVER_assume(p9 != &a); + else + __CPROVER_assume(p9 != &a); + int c9 = *p9; + + int *p10 = ptr_to_a_or_b; + if(argv[2][0] == '0') + __CPROVER_assume(*p10 != 2); + else + __CPROVER_assume(*p10 != 2); + int c10 = *p10; + + // Should work (value-set filtered by assume, write through an unrelated + // pointer has no effect): + int c = 0; + int *ptr_to_c = &c; + + int *p11 = ptr_to_a_or_b; + __CPROVER_assume(p11 != &a); + *ptr_to_c = 3; + int c11 = *p11; + + int *p12 = ptr_to_a_or_b; + __CPROVER_assume(*p12 != 2); + *ptr_to_c = 4; + int c12 = *p12; + + // Should work (value-set filtered by assume, function call has no effect): + int *p13 = ptr_to_a_or_b; + __CPROVER_assume(p13 != &a); + noop(); + int c13 = *p13; + + int *p14 = ptr_to_a_or_b; + __CPROVER_assume(*p14 != 2); + noop(); + int c14 = *p14; + + // Shouldn't work (unsafe as value-set only filtered by if on one branch): + int *p15 = ptr_to_a_or_b; + if(argv[3][0] == '0') + __CPROVER_assume(p15 != &a); + else + { + } + int c15 = *p15; + + int *p16 = ptr_to_a_or_b; + if(argv[4][0] == '0') + __CPROVER_assume(*p16 != 2); + else + { + } + int c16 = *p16; + + assert(0); +} diff --git a/regression/cbmc/symex_should_filter_value_sets/test.desc b/regression/cbmc/symex_should_filter_value_sets/test.desc new file mode 100644 index 00000000000..84401510cc7 --- /dev/null +++ b/regression/cbmc/symex_should_filter_value_sets/test.desc @@ -0,0 +1,29 @@ +CORE paths-lifo-expected-failure +main.c +--show-vcc +^EXIT=0$ +^SIGNAL=0$ +main::1::c1!0@1#. = 1 +main::1::c2!0@1#. = 1 +main::1::c3!0@1#. = 1 +main::1::c4!0@1#. = 1 +main::1::c5!0@1#. = 1 +main::1::c6!0@1#. = 1 +main::1::c7!0@1#. = 1 +main::1::c8!0@1#. = 1 +main::1::c9!0@1#. = 1 +main::1::c10!0@1#. = 1 +main::1::c11!0@1#. = 1 +main::1::c12!0@1#. = 1 +main::1::c13!0@1#. = 1 +main::1::c14!0@1#. = 1 +-- +^warning: ignoring +main::1::c15!0@1#. = 1 +main::1::c16!0@1#. = 1 +-- +This test does not work in single-path mode as it relies on convergence +behaviour (p15 and p16 are restricted on only one branch of an if-else diamond). + +See comments in main.c for explanations on why each of the assignments +is or isn't expected to appear in the output. diff --git a/src/goto-symex/goto_symex.h b/src/goto-symex/goto_symex.h index 6871883e452..3b75c5c8e98 100644 --- a/src/goto-symex/goto_symex.h +++ b/src/goto-symex/goto_symex.h @@ -315,6 +315,47 @@ class goto_symext virtual void symex_other(statet &state); void symex_assert(const goto_programt::instructiont &, statet &); + + /// Propagate constants and points-to information implied by a GOTO condition. + /// See \ref goto_statet::apply_condition for aspects of this which are common + /// to GOTO and ASSUME instructions. + /// \param current_state: state prior to the GOTO instruction + /// \param jump_taken_state: state following taking the GOTO + /// \param jump_not_taken_state: fall-through state + /// \param original_guard: the original GOTO condition + /// \param new_guard: GOTO condition, L2 renamed and simplified + /// \param ns: global namespace + void apply_goto_condition( + goto_symex_statet ¤t_state, + goto_statet &jump_taken_state, + goto_statet &jump_not_taken_state, + const exprt &original_guard, + const exprt &new_guard, + const namespacet &ns); + + /// Try to filter value sets based on whether possible values of a + /// pointer-typed symbol make the condition true or false. We only do this + /// when there is only one pointer-typed symbol in \p condition. + /// \param state: The current state + /// \param condition: The condition which is being evaluated, which it expects + /// will not have been cleaned or renamed. In practice, it's fine if it has + /// been cleaned and renamed up to level L1. + /// \param original_value_set: The value set we will read from + /// \param jump_taken_value_set: The value set that will be used when the + /// condition is true, so we remove any elements which we can tell will + /// make the condition false, or nullptr if this shouldn't be done + /// \param jump_not_taken_value_set: The value set that will be used when the + /// condition is false, so we remove any elements which we can tell will + /// make the condition true, or nullptr if this shouldn't be done + /// \param ns: A namespace + void try_filter_value_sets( + goto_symex_statet &state, + exprt condition, + const value_sett &original_value_set, + value_sett *jump_taken_value_set, + value_sett *jump_not_taken_value_set, + const namespacet &ns); + virtual void vcc( const exprt &, const std::string &msg, diff --git a/src/goto-symex/symex_goto.cpp b/src/goto-symex/symex_goto.cpp index 45d0529b1a6..667c911c12f 100644 --- a/src/goto-symex/symex_goto.cpp +++ b/src/goto-symex/symex_goto.cpp @@ -21,21 +21,35 @@ Author: Daniel Kroening, kroening@kroening.com #include -/// Propagate constants and points-to information implied by a GOTO condition. -/// See \ref goto_statet::apply_condition for aspects of this which are common -/// to GOTO and ASSUME instructions. -/// \param current_state: state prior to the GOTO instruction -/// \param jump_taken_state: state following taking the GOTO -/// \param jump_not_taken_state: fall-through state -/// \param new_guard: GOTO condition, L2 renamed and simplified -/// \param ns: global namespace -static void apply_goto_condition( - const goto_symex_statet ¤t_state, +void goto_symext::apply_goto_condition( + goto_symex_statet ¤t_state, goto_statet &jump_taken_state, goto_statet &jump_not_taken_state, + const exprt &original_guard, const exprt &new_guard, const namespacet &ns) { + // It would be better to call try_filter_value_sets after apply_condition, + // and pass nullptr for value sets which apply_condition successfully updated + // already. However, try_filter_value_sets calls rename to do constant + // propagation, and apply_condition can update the constant propagator. Since + // apply condition will never succeed on both jump_taken_state and + // jump_not_taken_state, it should be possible to pass a reference to an + // unmodified goto_statet to use for renaming. But renaming needs a + // goto_symex_statet, not just a goto_statet, and we only have access to one + // of those. A future improvement would be to split rename into two parts: + // one part on goto_symex_statet which is non-const and deals with + // l2 thread reads, and one part on goto_statet which is const and could be + // used in try_filter_value_sets. + + try_filter_value_sets( + current_state, + original_guard, + jump_taken_state.value_set, + &jump_taken_state.value_set, + &jump_not_taken_state.value_set, + ns); + jump_taken_state.apply_condition(new_guard, current_state, ns); // Try to find a negative condition that implies an equality constraint on @@ -269,7 +283,13 @@ void goto_symext::symex_goto(statet &state) auto &taken_state = backward ? state : goto_state_list.back().second; auto ¬_taken_state = backward ? goto_state_list.back().second : state; - apply_goto_condition(state, taken_state, not_taken_state, new_guard, ns); + apply_goto_condition( + state, + taken_state, + not_taken_state, + instruction.get_condition(), + new_guard, + ns); } // produce new guard symbol diff --git a/src/goto-symex/symex_main.cpp b/src/goto-symex/symex_main.cpp index cef3bf55539..3755bcc5ca4 100644 --- a/src/goto-symex/symex_main.cpp +++ b/src/goto-symex/symex_main.cpp @@ -14,7 +14,10 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include + #include +#include #include #include #include @@ -128,6 +131,17 @@ void goto_symext::symex_assume(statet &state, const exprt &cond) simplified_cond = state.rename(std::move(simplified_cond), ns).get(); do_simplify(simplified_cond); + // It would be better to call try_filter_value_sets after apply_condition, + // but it is not currently possible. See the comment at the beginning of + // \ref apply_goto_condition for more information. + + try_filter_value_sets( + state, cond, state.value_set, &state.value_set, nullptr, ns); + + // apply_condition must come after rename because it might change the + // constant propagator and the value-set and we read from those in rename + state.apply_condition(simplified_cond, state, ns); + symex_assume_l2(state, simplified_cond); } @@ -158,8 +172,6 @@ void goto_symext::symex_assume_l2(statet &state, const exprt &cond) if(state.atomic_section_id!=0 && state.guard.is_false()) symex_atomic_end(state); - - state.apply_condition(cond, state, ns); } void goto_symext::rewrite_quantifiers(exprt &expr, statet &state) @@ -541,3 +553,139 @@ void goto_symext::symex_step( UNREACHABLE; } } + +/// Check if an expression only contains one unique symbol (possibly repeated +/// multiple times) +/// \param expr: The expression to examine +/// \return If only one unique symbol occurs in \p expr then return it; +/// otherwise return an empty optionalt +static optionalt +find_unique_pointer_typed_symbol(const exprt &expr) +{ + optionalt return_value; + for(auto it = expr.depth_cbegin(); it != expr.depth_cend(); ++it) + { + const symbol_exprt *symbol_expr = expr_try_dynamic_cast(*it); + if(symbol_expr && can_cast_type(symbol_expr->type())) + { + // If we already have a potential return value, check if it is the same + // symbol, and return an empty optionalt if not + if(return_value && *symbol_expr != *return_value) + { + return {}; + } + return_value = *symbol_expr; + } + } + + // Either expr contains no pointer-typed symbols or it contains one unique + // pointer-typed symbol, possibly repeated multiple times + return return_value; +} + +void goto_symext::try_filter_value_sets( + goto_symex_statet &state, + exprt condition, + const value_sett &original_value_set, + value_sett *jump_taken_value_set, + value_sett *jump_not_taken_value_set, + const namespacet &ns) +{ + condition = state.rename(std::move(condition), ns).get(); + + optionalt symbol_expr = + find_unique_pointer_typed_symbol(condition); + + if(!symbol_expr) + { + return; + } + + const pointer_typet &symbol_type = to_pointer_type(symbol_expr->type()); + + value_setst::valuest value_set_elements; + original_value_set.get_value_set(*symbol_expr, value_set_elements, ns); + + std::unordered_set erase_from_jump_taken_value_set; + std::unordered_set erase_from_jump_not_taken_value_set; + erase_from_jump_taken_value_set.reserve(value_set_elements.size()); + erase_from_jump_not_taken_value_set.reserve(value_set_elements.size()); + + // Try evaluating the condition with the symbol replaced by a pointer to each + // one of its possible values in turn. If that leads to a true for some + // value_set_element then we can delete it from the value set that will be + // used if the condition is false, and vice versa. + for(const auto &value_set_element : value_set_elements) + { + if(value_set_element.id() == ID_unknown) + { + continue; + } + + const bool exclude_null_derefs = false; + value_set_dereferencet::valuet possible_value = + value_set_dereferencet::build_reference_to( + value_set_element, + *symbol_expr, + exclude_null_derefs, + language_mode, + ns); + + if(possible_value.ignore) + { + continue; + } + + exprt replacement_expr = + possible_value.value.is_nil() + ? static_cast(null_pointer_exprt{symbol_type}) + : static_cast(address_of_exprt{possible_value.value}); + + exprt modified_condition(condition); + + address_of_aware_replace_symbolt replace_symbol{}; + replace_symbol.insert(*symbol_expr, replacement_expr); + replace_symbol(modified_condition); + + // This do_simplify() is needed for the following reason: if `condition` is + // `*p == a` and we replace `p` with `&a` then we get `*&a == a`. Suppose + // our constant propagation knows that `a` is `1`. Without this call to + // do_simplify(), state.rename() turns this into `*&a == 1` (because + // rename() doesn't do constant propagation inside addresses), which + // do_simplify() turns into `a == 1`, which cannot be evaluated as true + // without another round of constant propagation. + // It would be sufficient to replace this call to do_simplify() with + // something that just replaces `*&x` with `x` whenever it finds it. + do_simplify(modified_condition); + + const bool record_events = state.record_events; + state.record_events = false; + modified_condition = state.rename(std::move(modified_condition), ns).get(); + state.record_events = record_events; + + do_simplify(modified_condition); + + if(jump_taken_value_set && modified_condition.is_false()) + { + erase_from_jump_taken_value_set.insert(value_set_element); + } + else if(jump_not_taken_value_set && modified_condition.is_true()) + { + erase_from_jump_not_taken_value_set.insert(value_set_element); + } + } + if(jump_taken_value_set && !erase_from_jump_taken_value_set.empty()) + { + value_sett::entryt *entry = jump_taken_value_set->get_entry_for_symbol( + symbol_expr->get_identifier(), symbol_type, "", ns); + jump_taken_value_set->erase_values_from_entry( + *entry, erase_from_jump_taken_value_set); + } + if(jump_not_taken_value_set && !erase_from_jump_not_taken_value_set.empty()) + { + value_sett::entryt *entry = jump_not_taken_value_set->get_entry_for_symbol( + symbol_expr->get_identifier(), symbol_type, "", ns); + jump_not_taken_value_set->erase_values_from_entry( + *entry, erase_from_jump_not_taken_value_set); + } +} diff --git a/src/pointer-analysis/Makefile b/src/pointer-analysis/Makefile index edbb7ba6a43..3fd0d40712f 100644 --- a/src/pointer-analysis/Makefile +++ b/src/pointer-analysis/Makefile @@ -1,6 +1,5 @@ SRC = add_failed_symbols.cpp \ goto_program_dereference.cpp \ - pointer_offset_sum.cpp \ rewrite_index.cpp \ show_value_sets.cpp \ value_set.cpp \ diff --git a/src/pointer-analysis/value_set.cpp b/src/pointer-analysis/value_set.cpp index a312a963b3f..cb8934722f5 100644 --- a/src/pointer-analysis/value_set.cpp +++ b/src/pointer-analysis/value_set.cpp @@ -48,8 +48,14 @@ bool value_sett::field_sensitive(const irep_idt &id, const typet &type) return type.id() == ID_struct || type.id() == ID_struct_tag; } -const value_sett::entryt *value_sett::find_entry(const value_sett::idt &id) - const +value_sett::entryt *value_sett::find_entry(const value_sett::idt &id) +{ + auto found = values.find(id); + return found == values.end() ? nullptr : &found->second; +} + +const value_sett::entryt * +value_sett::find_entry(const value_sett::idt &id) const { auto found = values.find(id); return found == values.end() ? nullptr : &found->second; @@ -366,6 +372,79 @@ static std::string strip_first_field_from_suffix( return suffix.substr(field.length() + 1); } +template +auto value_sett::get_entry_for_symbol( + maybe_const_value_sett &value_set, + const irep_idt identifier, + const typet &type, + const std::string &suffix, + const namespacet &ns) -> + typename std::conditional::value, + const value_sett::entryt *, + value_sett::entryt *>::type +{ + if( + type.id() != ID_pointer && type.id() != ID_signedbv && + type.id() != ID_unsignedbv && type.id() != ID_array && + type.id() != ID_struct && type.id() != ID_struct_tag && + type.id() != ID_union && type.id() != ID_union_tag) + { + return nullptr; + } + + const typet &followed_type = type.id() == ID_struct_tag + ? ns.follow_tag(to_struct_tag_type(type)) + : type.id() == ID_union_tag + ? ns.follow_tag(to_union_tag_type(type)) + : type; + + // look it up + auto *entry = value_set.find_entry(id2string(identifier) + suffix); + + // try first component name as suffix if not yet found + if( + !entry && + (followed_type.id() == ID_struct || followed_type.id() == ID_union)) + { + const struct_union_typet &struct_union_type = + to_struct_union_type(followed_type); + + const irep_idt &first_component_name = + struct_union_type.components().front().get_name(); + + entry = value_set.find_entry( + id2string(identifier) + "." + id2string(first_component_name) + suffix); + } + + if(!entry) + { + // not found? try without suffix + entry = value_set.find_entry(identifier); + } + + return entry; +} + +// Explicitly instantiate the two possible versions of the method above: + +value_sett::entryt *value_sett::get_entry_for_symbol( + irep_idt identifier, + const typet &type, + const std::string &suffix, + const namespacet &ns) +{ + return get_entry_for_symbol(*this, identifier, type, suffix, ns); +} + +const value_sett::entryt *value_sett::get_entry_for_symbol( + irep_idt identifier, + const typet &type, + const std::string &suffix, + const namespacet &ns) const +{ + return get_entry_for_symbol(*this, identifier, type, suffix, ns); +} + void value_sett::get_value_set_rec( const exprt &expr, object_mapt &dest, @@ -413,43 +492,11 @@ void value_sett::get_value_set_rec( } else if(expr.id()==ID_symbol) { - irep_idt identifier=to_symbol_expr(expr).get_identifier(); - - // is it a pointer, integer, array or struct? - if(expr_type.id()==ID_pointer || - expr_type.id()==ID_signedbv || - expr_type.id()==ID_unsignedbv || - expr_type.id()==ID_struct || - expr_type.id()==ID_union || - expr_type.id()==ID_array) - { - // look it up - const entryt *entry = - find_entry(id2string(identifier) + suffix); + const entryt *entry = get_entry_for_symbol( + to_symbol_expr(expr).get_identifier(), expr_type, suffix, ns); - // try first component name as suffix if not yet found - if(!entry && (expr_type.id() == ID_struct || expr_type.id() == ID_union)) - { - const struct_union_typet &struct_union_type= - to_struct_union_type(expr_type); - - const irep_idt &first_component_name = - struct_union_type.components().front().get_name(); - - entry = find_entry( - id2string(identifier) + "." + id2string(first_component_name) + - suffix); - } - - // not found? try without suffix - if(!entry) - entry = find_entry(identifier); - - if(entry) - make_union(dest, entry->object_map); - else - insert(dest, exprt(ID_unknown, original_type)); - } + if(entry) + make_union(dest, entry->object_map); else insert(dest, exprt(ID_unknown, original_type)); } @@ -1592,3 +1639,29 @@ void value_sett::guard( assign(expr.op0(), address_of, ns, false, false); } } + +void value_sett::erase_values_from_entry( + entryt &entry, + const std::unordered_set &values_to_erase) +{ + std::vector keys_to_erase; + + for(auto &key_value : entry.object_map.read()) + { + const auto &rhs_object = to_expr(key_value); + if(values_to_erase.count(rhs_object)) + { + keys_to_erase.emplace_back(key_value.first); + } + } + + DATA_INVARIANT( + keys_to_erase.size() == values_to_erase.size(), + "value_sett::erase_value_from_entry() should erase exactly one value for " + "each element in the set it is given"); + + for(const auto &key_to_erase : keys_to_erase) + { + entry.object_map.write().erase(key_to_erase); + } +} diff --git a/src/pointer-analysis/value_set.h b/src/pointer-analysis/value_set.h index df479262cde..5138f0c0e79 100644 --- a/src/pointer-analysis/value_set.h +++ b/src/pointer-analysis/value_set.h @@ -13,6 +13,8 @@ Author: Daniel Kroening, kroening@kroening.com #define CPROVER_POINTER_ANALYSIS_VALUE_SET_H #include +#include +#include #include #include @@ -328,6 +330,9 @@ class value_sett /// \return a constant pointer to an entry if found, or null otherwise. /// Note the pointer may be invalidated by insert operations, including /// get_entry. + entryt *find_entry(const idt &id); + + /// Const version of \ref find_entry const entryt *find_entry(const idt &id) const; /// Gets or inserts an entry in this value-set. @@ -460,6 +465,44 @@ class value_sett exprt &expr, const namespacet &ns) const; +private: + /// Helper method for \ref get_entry_for_symbol + template + static auto get_entry_for_symbol( + maybe_const_value_sett &value_set, + irep_idt identifier, + const typet &type, + const std::string &suffix, + const namespacet &ns) -> + typename std::conditional::value, + const value_sett::entryt *, + value_sett::entryt *>::type; + +public: + /// Get the entry for the symbol and suffix + /// \param identifier: The identifier for the symbol + /// \param type: The type of the symbol + /// \param suffix: The suffix for the entry + /// \param ns: The global namespace, for following \p type if it is a + /// struct tag type or a union tag type + /// \return The entry for the symbol and suffix + value_sett::entryt *get_entry_for_symbol( + irep_idt identifier, + const typet &type, + const std::string &suffix, + const namespacet &ns); + + /// const version of /ref get_entry_for_symbol + const value_sett::entryt *get_entry_for_symbol( + irep_idt identifier, + const typet &type, + const std::string &suffix, + const namespacet &ns) const; + + void erase_values_from_entry( + entryt &entry, + const std::unordered_set &values_to_erase); + protected: /// Reads the set of objects pointed to by `expr`, including making /// recursive lookups for dereference operations etc. diff --git a/src/pointer-analysis/value_set_dereference.cpp b/src/pointer-analysis/value_set_dereference.cpp index 223b930cded..68c75184cef 100644 --- a/src/pointer-analysis/value_set_dereference.cpp +++ b/src/pointer-analysis/value_set_dereference.cpp @@ -74,7 +74,8 @@ exprt value_set_dereferencet::dereference(const exprt &pointer) it!=points_to_set.end(); it++) { - valuet value = build_reference_to(*it, pointer); + valuet value = + build_reference_to(*it, pointer, exclude_null_derefs, language_mode, ns); #if 0 std::cout << "V: " << format(value.pointer_guard) << " --> "; @@ -179,7 +180,8 @@ exprt value_set_dereferencet::dereference(const exprt &pointer) /// - object_type=(int *), dereference_type=(void **) is not ok; bool value_set_dereferencet::dereference_type_compare( const typet &object_type, - const typet &dereference_type) const + const typet &dereference_type, + const namespacet &ns) { const typet *object_unwrapped = &object_type; const typet *dereference_unwrapped = &dereference_type; @@ -245,6 +247,11 @@ bool value_set_dereferencet::dereference_type_compare( /// ID_unknown, ID_invalid, or an object_descriptor_exprt giving a referred /// object and offset. /// \param pointer_expr: pointer expression that may point to `what` +/// \param exclude_null_derefs: Ignore value-set entries that indicate a +/// given dereference may follow a null pointer +/// \param language_mode: Mode for any new symbols created to represent a +/// dereference failure +/// \param ns: A namespace /// \return a `valuet` object containing `guard`, `value` and `ignore` fields. /// The `ignore` field is true for a `null` object when `exclude_null_derefs` /// is true (set by our creator when they know \p what cannot be null) @@ -258,7 +265,10 @@ bool value_set_dereferencet::dereference_type_compare( /// .ignore = false}` value_set_dereferencet::valuet value_set_dereferencet::build_reference_to( const exprt &what, - const exprt &pointer_expr) + const exprt &pointer_expr, + const bool exclude_null_derefs, + const irep_idt language_mode, + const namespacet &ns) { const typet &dereference_type = pointer_expr.type().subtype(); @@ -324,8 +334,9 @@ value_set_dereferencet::valuet value_set_dereferencet::build_reference_to( result.value=index_expr; } - else if(dereference_type_compare( - memory_symbol.type.subtype(), dereference_type)) + else if( + dereference_type_compare( + memory_symbol.type.subtype(), dereference_type, ns)) { const index_exprt index_expr( symbol_expr, @@ -372,18 +383,19 @@ value_set_dereferencet::valuet value_set_dereferencet::build_reference_to( exprt root_object_subexpression=root_object; - if(dereference_type_compare(object_type, dereference_type) && - o.offset().is_zero()) + if( + dereference_type_compare(object_type, dereference_type, ns) && + o.offset().is_zero()) { // The simplest case: types match, and offset is zero! // This is great, we are almost done. result.value = typecast_exprt::conditional_cast(object, dereference_type); } - else if(root_object_type.id()==ID_array && - dereference_type_compare( - root_object_type.subtype(), - dereference_type)) + else if( + root_object_type.id() == ID_array && + dereference_type_compare( + root_object_type.subtype(), dereference_type, ns)) { // We have an array with a subtype that matches // the dereferencing type. @@ -451,7 +463,7 @@ value_set_dereferencet::valuet value_set_dereferencet::build_reference_to( else offset=o.offset(); - if(memory_model(result.value, dereference_type, offset)) + if(memory_model(result.value, dereference_type, offset, ns)) { // ok, done } @@ -486,7 +498,8 @@ static bool is_a_bv_type(const typet &type) bool value_set_dereferencet::memory_model( exprt &value, const typet &to_type, - const exprt &offset) + const exprt &offset, + const namespacet &ns) { // we will allow more or less arbitrary pointer type cast @@ -518,7 +531,7 @@ bool value_set_dereferencet::memory_model( // otherwise, we will stitch it together from bytes - return memory_model_bytes(value, to_type, offset); + return memory_model_bytes(value, to_type, offset, ns); } /// Replace `value` by an expression of type `to_type` corresponding to the @@ -532,7 +545,8 @@ bool value_set_dereferencet::memory_model( bool value_set_dereferencet::memory_model_bytes( exprt &value, const typet &to_type, - const exprt &offset) + const exprt &offset, + const namespacet &ns) { const typet from_type=value.type(); diff --git a/src/pointer-analysis/value_set_dereference.h b/src/pointer-analysis/value_set_dereference.h index 531030cf1ef..2ba5eed81f4 100644 --- a/src/pointer-analysis/value_set_dereference.h +++ b/src/pointer-analysis/value_set_dereference.h @@ -54,21 +54,6 @@ class value_set_dereferencet final /// \param pointer: A pointer-typed expression, to be dereferenced. exprt dereference(const exprt &pointer); -private: - const namespacet &ns; - symbol_tablet &new_symbol_table; - dereference_callbackt &dereference_callback; - /// language_mode: ID_java, ID_C or another language identifier - /// if we know the source language in use, irep_idt() otherwise. - const irep_idt language_mode; - /// Flag indicating whether `value_set_dereferencet::dereference` should - /// disregard an apparent attempt to dereference NULL - const bool exclude_null_derefs; - - bool dereference_type_compare( - const typet &object_type, - const typet &dereference_type) const; - /// Return value for `build_reference_to`; see that method for documentation. class valuet { @@ -82,14 +67,40 @@ class value_set_dereferencet final } }; - valuet build_reference_to(const exprt &what, const exprt &pointer); + static valuet build_reference_to( + const exprt &what, + const exprt &pointer, + bool exclude_null_derefs, + irep_idt language_mode, + const namespacet &ns); + + static bool dereference_type_compare( + const typet &object_type, + const typet &dereference_type, + const namespacet &ns); - bool memory_model(exprt &value, const typet &type, const exprt &offset); + static bool memory_model( + exprt &value, + const typet &type, + const exprt &offset, + const namespacet &ns); - bool memory_model_bytes( + static bool memory_model_bytes( exprt &value, const typet &type, - const exprt &offset); + const exprt &offset, + const namespacet &ns); + +private: + const namespacet &ns; + symbol_tablet &new_symbol_table; + dereference_callbackt &dereference_callback; + /// language_mode: ID_java, ID_C or another language identifier + /// if we know the source language in use, irep_idt() otherwise. + const irep_idt language_mode; + /// Flag indicating whether `value_set_dereferencet::dereference` should + /// disregard an apparent attempt to dereference NULL + const bool exclude_null_derefs; }; #endif // CPROVER_POINTER_ANALYSIS_VALUE_SET_DEREFERENCE_H diff --git a/src/util/Makefile b/src/util/Makefile index 8c4c6b5beb4..ebb2a015b19 100644 --- a/src/util/Makefile +++ b/src/util/Makefile @@ -53,6 +53,7 @@ SRC = allocate_objects.cpp \ parse_options.cpp \ parser.cpp \ pointer_offset_size.cpp \ + pointer_offset_sum.cpp \ pointer_predicates.cpp \ rational.cpp \ rational_tools.cpp \ diff --git a/src/pointer-analysis/pointer_offset_sum.cpp b/src/util/pointer_offset_sum.cpp similarity index 86% rename from src/pointer-analysis/pointer_offset_sum.cpp rename to src/util/pointer_offset_sum.cpp index 19baae143d2..dc34d0eb0ce 100644 --- a/src/pointer-analysis/pointer_offset_sum.cpp +++ b/src/util/pointer_offset_sum.cpp @@ -11,13 +11,13 @@ Author: Daniel Kroening, kroening@kroening.com #include "pointer_offset_sum.h" -#include +#include "std_expr.h" exprt pointer_offset_sum(const exprt &a, const exprt &b) { - if(a.id()==ID_unknown) + if(a.id() == ID_unknown) return a; - else if(b.id()==ID_unknown) + else if(b.id() == ID_unknown) return b; else if(a.is_zero()) return b; diff --git a/src/pointer-analysis/pointer_offset_sum.h b/src/util/pointer_offset_sum.h similarity index 63% rename from src/pointer-analysis/pointer_offset_sum.h rename to src/util/pointer_offset_sum.h index e4e6dc238ff..128151930cb 100644 --- a/src/pointer-analysis/pointer_offset_sum.h +++ b/src/util/pointer_offset_sum.h @@ -9,11 +9,11 @@ Author: Daniel Kroening, kroening@kroening.com /// \file /// Pointer Dereferencing -#ifndef CPROVER_POINTER_ANALYSIS_POINTER_OFFSET_SUM_H -#define CPROVER_POINTER_ANALYSIS_POINTER_OFFSET_SUM_H +#ifndef CPROVER_UTIL_POINTER_OFFSET_SUM_H +#define CPROVER_UTIL_POINTER_OFFSET_SUM_H -#include +#include "expr.h" exprt pointer_offset_sum(const exprt &a, const exprt &b); -#endif // CPROVER_POINTER_ANALYSIS_POINTER_OFFSET_SUM_H +#endif // CPROVER_UTIL_POINTER_OFFSET_SUM_H diff --git a/src/util/simplify_expr.cpp b/src/util/simplify_expr.cpp index b2275cb421b..eed4a723368 100644 --- a/src/util/simplify_expr.cpp +++ b/src/util/simplify_expr.cpp @@ -21,6 +21,7 @@ Author: Daniel Kroening, kroening@kroening.com #include "mathematical_expr.h" #include "namespace.h" #include "pointer_offset_size.h" +#include "pointer_offset_sum.h" #include "rational.h" #include "rational_tools.h" #include "simplify_utils.h" @@ -904,7 +905,7 @@ bool simplify_exprt::simplify_dereference(exprt &expr) { index_exprt idx( old.array(), - plus_exprt(old.index(), pointer.op1()), + pointer_offset_sum(old.index(), pointer.op1()), old.array().type().subtype()); simplify_rec(idx); expr.swap(idx);