diff --git a/src/analyses/constant_propagator.cpp b/src/analyses/constant_propagator.cpp index f8b469784ae..24901da0a91 100644 --- a/src/analyses/constant_propagator.cpp +++ b/src/analyses/constant_propagator.cpp @@ -18,6 +18,7 @@ Author: Peter Schrammel #include #include +#include #include #include #include @@ -29,40 +30,64 @@ Author: Peter Schrammel #include #include +/// Assign value `rhs` to `lhs`, recording any newly-known constants in +/// `dest_values`. +/// \param [out] dest_values: results of the assignment are recorded here. We +/// might add extra entries (if we determine some symbol is constant), or +/// might remove existing ones (if the lhs expression is unknown), except if +/// `is_assignment` is false, in which case only the former is done. +/// \param lhs: lhs expression to assign +/// \param rhs: rhs expression to assign to lhs +/// \param ns: namespace, used to check for type mismatches +/// \param cp: owning constant propagator instance, used to filter out symbols +/// that the user doesn't want tracked +/// \param is_assignment: if true, assign_rec may remove entries from +/// dest_values when a constant assignment cannot be determined. This is used +/// when an actual assignment instruction is processed. If false, new entries +/// can be added but existing ones will not be removed; this is used when the +/// "assignment" is actually implied by a read-only operation, such as passing +/// "IF x == y" -- if we know what 'y' is that tells us the value for x, but +/// if we don't there is no reason to discard pre-existing knowledge about x. void constant_propagator_domaint::assign_rec( valuest &dest_values, const exprt &lhs, const exprt &rhs, const namespacet &ns, - const constant_propagator_ait *cp) + const constant_propagator_ait *cp, + bool is_assignment) { if(lhs.id() == ID_dereference) { exprt eval_lhs = lhs; if(partial_evaluate(dest_values, eval_lhs, ns)) { - const bool have_dirty = (cp != nullptr); + if(is_assignment) + { + const bool have_dirty = (cp != nullptr); - if(have_dirty) - dest_values.set_dirty_to_top(cp->dirty, ns); - else - dest_values.set_to_top(); + if(have_dirty) + dest_values.set_dirty_to_top(cp->dirty, ns); + else + dest_values.set_to_top(); + } + // Otherwise disregard this unknown deref in a read-only context. } else - assign_rec(dest_values, eval_lhs, rhs, ns, cp); + assign_rec(dest_values, eval_lhs, rhs, ns, cp, is_assignment); } else if(lhs.id() == ID_index) { const index_exprt &index_expr = to_index_expr(lhs); with_exprt new_rhs(index_expr.array(), index_expr.index(), rhs); - assign_rec(dest_values, index_expr.array(), new_rhs, ns, cp); + assign_rec(dest_values, index_expr.array(), new_rhs, ns, cp, is_assignment); } else if(lhs.id() == ID_member) { const member_exprt &member_expr = to_member_expr(lhs); with_exprt new_rhs(member_expr.compound(), exprt(ID_member_name), rhs); new_rhs.where().set(ID_component_name, member_expr.get_component_name()); - assign_rec(dest_values, member_expr.compound(), new_rhs, ns, cp); + assign_rec( + dest_values, member_expr.compound(), new_rhs, ns, cp, is_assignment); } else if(lhs.id() == ID_symbol) { @@ -82,13 +107,15 @@ void constant_propagator_domaint::assign_rec( dest_values.set_to(s, tmp); } else - dest_values.set_to_top(s); + { + if(is_assignment) + dest_values.set_to_top(s); + } } - else + else if(is_assignment) { // it's an assignment, but we don't really know what object is being written - // to on the left-hand side - bail and set all values to top to be on the - // safe side in terms of soundness + // to: assume it may write to anything. dest_values.set_to_top(); } } @@ -137,7 +164,7 @@ void constant_propagator_domaint::transform( const code_assignt &assignment=to_code_assign(from->code); const exprt &lhs=assignment.lhs(); const exprt &rhs=assignment.rhs(); - assign_rec(values, lhs, rhs, ns, cp); + assign_rec(values, lhs, rhs, ns, cp, true); } else if(from->is_assume()) { @@ -157,15 +184,7 @@ void constant_propagator_domaint::transform( if(g.is_false()) values.set_to_bottom(); else - { two_way_propagate_rec(g, ns, cp); - // If two way propagate is enabled then it may be possible to detect - // that the branch condition is infeasible and thus the domain should - // be set to bottom. Without it the domain will only be set to bottom - // if the guard expression is trivially (i.e. without context) false. - INVARIANT(!values.is_bottom, - "Without two-way propagation this should be impossible."); - } } else if(from->is_dead()) { @@ -223,7 +242,7 @@ void constant_propagator_domaint::transform( break; const symbol_exprt parameter_expr(p_it->get_identifier(), arg.type()); - assign_rec(values, parameter_expr, arg, ns, cp); + assign_rec(values, parameter_expr, arg, ns, cp, true); ++p_it; } @@ -264,6 +283,26 @@ void constant_propagator_domaint::transform( #endif } +static void +replace_typecast_of_bool(exprt &lhs, exprt &rhs, const namespacet &ns) +{ + // (int)var xx 0 ==> var xx (_Bool)0 or similar (xx is == or !=) + // Note this is restricted to bools because in general turning a widening + // into a narrowing typecast is not correct. + if(lhs.id() != ID_typecast) + return; + + const exprt &lhs_underlying = to_typecast_expr(lhs).op(); + if( + lhs_underlying.type() == bool_typet() || + lhs_underlying.type() == c_bool_type()) + { + rhs = typecast_exprt(rhs, lhs_underlying.type()); + simplify(rhs, ns); + + lhs = lhs_underlying; + } +} /// handles equalities and conjunctions containing equalities bool constant_propagator_domaint::two_way_propagate_rec( @@ -280,26 +319,81 @@ bool constant_propagator_domaint::two_way_propagate_rec( if(expr.id()==ID_and) { // need a fixed point here to get the most out of it + bool change_this_time; do { - change = false; + change_this_time = false; forall_operands(it, expr) - if(two_way_propagate_rec(*it, ns, cp)) - change=true; + { + change_this_time |= two_way_propagate_rec(*it, ns, cp); + if(change_this_time) + change = true; + } + } while(change_this_time); + } + else if(expr.id() == ID_not) + { + if(expr.op0().id() == ID_equal || expr.op0().id() == ID_notequal) + { + exprt subexpr = expr.op0(); + subexpr.id(subexpr.id() == ID_equal ? ID_notequal : ID_equal); + change = two_way_propagate_rec(subexpr, ns, cp); + } + else if(expr.op0().id() == ID_symbol && expr.type() == bool_typet()) + { + // Treat `IF !x` like `IF x == FALSE`: + change = + two_way_propagate_rec(equal_exprt(expr.op0(), false_exprt()), ns, cp); + } + } + else if(expr.id() == ID_symbol) + { + if(expr.type() == bool_typet()) + { + // Treat `IF x` like `IF x == TRUE`: + change = two_way_propagate_rec(equal_exprt(expr, true_exprt()), ns, cp); } - while(change); } - else if(expr.id()==ID_equal) + else if(expr.id() == ID_notequal) { - const exprt &lhs=expr.op0(); - const exprt &rhs=expr.op1(); + // Treat "symbol != constant" like "symbol == !constant" when the constant + // is a boolean. + exprt lhs = expr.op0(); + exprt rhs = expr.op1(); + + if(lhs.is_constant() && !rhs.is_constant()) + std::swap(lhs, rhs); + + if(lhs.is_constant() || !rhs.is_constant()) + return false; + + replace_typecast_of_bool(lhs, rhs, ns); + + if(lhs.type() != bool_typet() && lhs.type() != c_bool_type()) + return false; + + // x != FALSE ==> x == TRUE + + if(rhs.is_zero() || rhs.is_false()) + rhs = from_integer(1, rhs.type()); + else + rhs = from_integer(0, rhs.type()); + + change = two_way_propagate_rec(equal_exprt(lhs, rhs), ns, cp); + } + else if(expr.id() == ID_equal) + { + exprt lhs = expr.op0(); + exprt rhs = expr.op1(); + + replace_typecast_of_bool(lhs, rhs, ns); // two-way propagation valuest copy_values=values; - assign_rec(copy_values, lhs, rhs, ns, cp); + assign_rec(copy_values, lhs, rhs, ns, cp, false); if(!values.is_constant(rhs) || values.is_constant(lhs)) - assign_rec(values, rhs, lhs, ns, cp); + assign_rec(values, rhs, lhs, ns, cp, false); change = values.meet(copy_values, ns); } diff --git a/src/analyses/constant_propagator.h b/src/analyses/constant_propagator.h index c5bf6c55f1f..2d995efb984 100644 --- a/src/analyses/constant_propagator.h +++ b/src/analyses/constant_propagator.h @@ -146,7 +146,8 @@ class constant_propagator_domaint:public ai_domain_baset const exprt &lhs, const exprt &rhs, const namespacet &ns, - const constant_propagator_ait *cp); + const constant_propagator_ait *cp, + bool is_assignment); bool two_way_propagate_rec( const exprt &expr, diff --git a/unit/analyses/constant_propagator.cpp b/unit/analyses/constant_propagator.cpp index 37f68599595..46972e42974 100644 --- a/unit/analyses/constant_propagator.cpp +++ b/unit/analyses/constant_propagator.cpp @@ -11,10 +11,11 @@ Author: Diffblue Ltd #include -#include - #include +#include +#include + static bool starts_with_x(const exprt &e, const namespacet &) { if(e.id() != ID_symbol) @@ -42,19 +43,17 @@ SCENARIO("constant_propagator", "[core][analyses][constant_propagator]") local_y.type = integer_typet(); local_y.mode = ID_C; - code_blockt code; - code.copy_to_operands(code_declt(local_x.symbol_expr())); - code.copy_to_operands(code_declt(local_y.symbol_expr())); - code.copy_to_operands( - code_assignt( - local_x.symbol_expr(), constant_exprt("1", integer_typet()))); - code.copy_to_operands( - code_assignt( - local_y.symbol_expr(), constant_exprt("2", integer_typet()))); + code_blockt code( + {code_declt(local_x.symbol_expr()), + code_declt(local_y.symbol_expr()), + code_assignt( + local_x.symbol_expr(), constant_exprt("1", integer_typet())), + code_assignt( + local_y.symbol_expr(), constant_exprt("2", integer_typet()))}); symbolt main_function_symbol; main_function_symbol.name = "main"; - main_function_symbol.type = code_typet(); + main_function_symbol.type = code_typet({}, void_typet()); main_function_symbol.value = code; main_function_symbol.mode = ID_C; @@ -108,4 +107,258 @@ SCENARIO("constant_propagator", "[core][analyses][constant_propagator]") } } } + + GIVEN("A GOTO program featuring a condition over a boolean") + { + // Create a program like: + // bool b; + // if(!b) + // b = true; + // Repeat this using bool_typet and c_bool_typet for "bool". + + goto_modelt goto_model; + namespacet ns(goto_model.symbol_table); + + symbolt bool_local; + symbolt c_bool_local; + bool_local.name = "bool_local"; + bool_local.type = bool_typet(); + bool_local.mode = ID_C; + c_bool_local.name = "c_bool_local"; + c_bool_local.type = c_bool_typet(8); + c_bool_local.mode = ID_C; + + code_blockt code({code_declt(bool_local.symbol_expr()), + code_declt(c_bool_local.symbol_expr())}); + + code_ifthenelset bool_cond_block( + not_exprt(bool_local.symbol_expr()), + code_assignt(bool_local.symbol_expr(), true_exprt())); + + const exprt c_bool_true = from_integer(1, c_bool_typet(8)); + code_ifthenelset c_bool_cond_block( + notequal_exprt(c_bool_local.symbol_expr(), c_bool_true), + code_assignt(c_bool_local.symbol_expr(), c_bool_true)); + + code.add(std::move(bool_cond_block)); + code.add(std::move(c_bool_cond_block)); + + symbolt main_function_symbol; + main_function_symbol.name = "main"; + main_function_symbol.type = code_typet({}, void_typet()); + main_function_symbol.value = code; + main_function_symbol.mode = ID_C; + + goto_model.symbol_table.add(bool_local); + goto_model.symbol_table.add(c_bool_local); + goto_model.symbol_table.add(main_function_symbol); + + goto_convert(goto_model, null_message_handler); + + const goto_functiont &main_function = goto_model.get_goto_function("main"); + + // Find the first DEAD instruction -- we will test our results there, after + // the function body but before the exit sequence. + goto_programt::const_targett test_instruction = + main_function.body.instructions.begin(); + while(test_instruction != main_function.body.instructions.end() && + !test_instruction->is_dead()) + { + ++test_instruction; + } + + REQUIRE(test_instruction != main_function.body.instructions.end()); + + WHEN("Constant propagation is performed") + { + constant_propagator_ait constant_propagator(main_function); + constant_propagator(main_function_symbol.name, main_function, ns); + + THEN( + "The propagator should conclude that both booleans are true at the " + "end of the function") + { + const auto &final_domain = constant_propagator[test_instruction]; + + REQUIRE(final_domain.values.is_constant(bool_local.symbol_expr())); + REQUIRE(final_domain.values.is_constant(c_bool_local.symbol_expr())); + } + } + } + + GIVEN("A GOTO program testing ways of expressing boolean tests") + { + // Create a program like: + // bool b0, b1, b2, ...; + // int marker; + // if(b0) + // if(!b1) + // if(b2 && b3) + // if(b4 == TRUE) + // if(b5 == FALSE) + // if(b6 != TRUE) + // if(b7 != FALSE) + // if((int)b8 == 0) + // if((char)b9 == '\1') + // marker = 1234; + + // At the marker assignment we should have: + // b0, !b1, b2, b3, b4, !b5, !b6, b7, !b8, b9 all known. + // Then repeat the whole thing with C_bools instead of plain bools, + // except for the first two (b0 and !b1), which can't be done with C_bool. + + std::vector bool_locals; + std::vector c_bool_locals; + const size_t n_bool_locals = 10; + const size_t n_c_bool_locals = 8; + + for(size_t i = 0; i < n_bool_locals; ++i) + { + symbolt bool_local; + bool_local.name = "b" + std::to_string(i); + bool_local.type = bool_typet(); + bool_local.mode = ID_C; + bool_locals.push_back(bool_local); + } + + for(size_t i = 0; i < n_c_bool_locals; ++i) + { + symbolt c_bool_local; + c_bool_local.name = "cb" + std::to_string(i); + c_bool_local.type = c_bool_typet(8); + c_bool_local.mode = ID_C; + c_bool_locals.push_back(c_bool_local); + } + + const exprt bool_tests[] = { + bool_locals.at(0).symbol_expr(), + not_exprt(bool_locals.at(1).symbol_expr()), + and_exprt( + bool_locals.at(2).symbol_expr(), bool_locals.at(3).symbol_expr()), + equal_exprt(bool_locals.at(4).symbol_expr(), true_exprt()), + equal_exprt(bool_locals.at(5).symbol_expr(), false_exprt()), + notequal_exprt(bool_locals.at(6).symbol_expr(), true_exprt()), + notequal_exprt(bool_locals.at(7).symbol_expr(), false_exprt()), + equal_exprt( + typecast_exprt(bool_locals.at(8).symbol_expr(), signedbv_typet(32)), + from_integer(0, signedbv_typet(32))), + equal_exprt( + typecast_exprt(bool_locals.at(9).symbol_expr(), unsignedbv_typet(8)), + from_integer(1, unsignedbv_typet(8)))}; + + const exprt c_bool_false = from_integer(0, c_bool_typet(8)); + const exprt c_bool_true = from_integer(1, c_bool_typet(8)); + + const exprt c_bool_tests[] = { + and_exprt( + equal_exprt(c_bool_locals.at(0).symbol_expr(), c_bool_true), + equal_exprt(c_bool_locals.at(1).symbol_expr(), c_bool_true)), + equal_exprt(c_bool_locals.at(2).symbol_expr(), c_bool_true), + equal_exprt(c_bool_locals.at(3).symbol_expr(), c_bool_false), + notequal_exprt(c_bool_locals.at(4).symbol_expr(), c_bool_true), + notequal_exprt(c_bool_locals.at(5).symbol_expr(), c_bool_false), + equal_exprt( + typecast_exprt(c_bool_locals.at(6).symbol_expr(), signedbv_typet(32)), + from_integer(0, signedbv_typet(32))), + equal_exprt( + typecast_exprt(c_bool_locals.at(7).symbol_expr(), unsignedbv_typet(8)), + from_integer(1, unsignedbv_typet(8)))}; + + const bool bool_expectations[n_bool_locals] = { + true, false, true, true, true, false, false, true, false, true}; + + const bool c_bool_expectations[n_c_bool_locals] = { + true, true, true, false, false, true, false, true}; + + symbolt marker_symbol; + marker_symbol.type = signedbv_typet(32); + marker_symbol.name = "marker"; + marker_symbol.mode = ID_C; + + codet program = code_assignt( + marker_symbol.symbol_expr(), from_integer(1234, marker_symbol.type)); + + // Build a big nested-if around the marker assignment: + for(const exprt &test : bool_tests) + program = code_ifthenelset(test, program); + for(const exprt &test : c_bool_tests) + program = code_ifthenelset(test, program); + + goto_modelt goto_model; + namespacet ns(goto_model.symbol_table); + + for(const symbolt &symbol : bool_locals) + goto_model.symbol_table.add(symbol); + for(const symbolt &symbol : c_bool_locals) + goto_model.symbol_table.add(symbol); + + symbolt main_function_symbol; + main_function_symbol.name = "main"; + main_function_symbol.type = code_typet({}, void_typet()); + main_function_symbol.value = program; + main_function_symbol.mode = ID_C; + + goto_model.symbol_table.add(marker_symbol); + goto_model.symbol_table.add(main_function_symbol); + + goto_convert(goto_model, null_message_handler); + + const goto_functiont &main_function = goto_model.get_goto_function("main"); + + // Find the marker assignment: we will check that the correct constants + // have been propagated once we reach it. + goto_programt::const_targett test_instruction = + main_function.body.instructions.begin(); + while(test_instruction != main_function.body.instructions.end() && + !(test_instruction->is_assign() && + to_code_assign(test_instruction->code).lhs() == + marker_symbol.symbol_expr())) + { + ++test_instruction; + } + + REQUIRE(test_instruction != main_function.body.instructions.end()); + + WHEN("Constant propagation is performed") + { + constant_propagator_ait constant_propagator(main_function); + constant_propagator(main_function_symbol.name, main_function, ns); + + THEN("The propagator should match our expectations") + { + const auto &final_domain = constant_propagator[test_instruction]; + + for(size_t i = 0; i < n_bool_locals; ++i) + { + exprt bool_local = bool_locals[i].symbol_expr(); + + REQUIRE(final_domain.values.is_constant(bool_local)); + + final_domain.values.replace_const.replace(bool_local); + + exprt expected; + if(bool_expectations[i]) + expected = true_exprt(); + else + expected = false_exprt(); + + REQUIRE(bool_local == expected); + } + + for(size_t i = 0; i < n_c_bool_locals; ++i) + { + exprt c_bool_local = c_bool_locals[i].symbol_expr(); + + REQUIRE(final_domain.values.is_constant(c_bool_local)); + + final_domain.values.replace_const.replace(c_bool_local); + + const exprt expected = + c_bool_expectations[i] ? c_bool_true : c_bool_false; + + REQUIRE(c_bool_local == expected); + } + } + } + } }