From 69528ad6e19ca788fd83935cdc0aa3b36845f49c Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 2 Apr 2019 14:43:07 +0100 Subject: [PATCH 01/10] Update java-models-library for diffblue/java-models-library#20 https://github.com/diffblue/java-models-library/pull/20 --- jbmc/lib/java-models-library | 2 +- .../java_string_library_preprocess.cpp | 205 +++++++----------- .../java_string_library_preprocess.h | 32 ++- .../string_constraint_generator_format.cpp | 71 ++---- 4 files changed, 115 insertions(+), 195 deletions(-) diff --git a/jbmc/lib/java-models-library b/jbmc/lib/java-models-library index d92026cd8dd..287f0b7ef3c 160000 --- a/jbmc/lib/java-models-library +++ b/jbmc/lib/java-models-library @@ -1 +1 @@ -Subproject commit d92026cd8dd8f1e769f5b11831e831fefdc4c080 +Subproject commit 287f0b7ef3cba9b3779bccc70b6009e5e611d6fa diff --git a/jbmc/src/java_bytecode/java_string_library_preprocess.cpp b/jbmc/src/java_bytecode/java_string_library_preprocess.cpp index 34e27ba0333..e3e6df57001 100644 --- a/jbmc/src/java_bytecode/java_string_library_preprocess.cpp +++ b/jbmc/src/java_bytecode/java_string_library_preprocess.cpp @@ -1123,57 +1123,37 @@ code_blockt java_string_library_preprocesst::make_assign_function_from_call( /// primitive value contained in the given object. optionalt java_string_library_preprocesst::get_primitive_value_of_object( - const exprt &object, - irep_idt type_name, - const source_locationt &loc, - const irep_idt &function_id, - symbol_table_baset &symbol_table, - code_blockt &code) -{ + const exprt &object, irep_idt type_name, const source_locationt &loc, + const irep_idt &function_id, symbol_table_baset &symbol_table, + code_blockt &code) { optionalt object_type; typet value_type; - if(type_name==ID_boolean) - { - value_type=java_boolean_type(); + if (type_name == ID_boolean) { + value_type = java_boolean_type(); object_type = struct_tag_typet("java::java.lang.Boolean"); - } - else if(type_name==ID_char) - { - value_type=java_char_type(); + } else if (type_name == ID_char) { + value_type = java_char_type(); object_type = struct_tag_typet("java::java.lang.Character"); - } - else if(type_name==ID_byte) - { - value_type=java_byte_type(); + } else if (type_name == ID_byte) { + value_type = java_byte_type(); object_type = struct_tag_typet("java::java.lang.Byte"); - } - else if(type_name==ID_short) - { - value_type=java_short_type(); + } else if (type_name == ID_short) { + value_type = java_short_type(); object_type = struct_tag_typet("java::java.lang.Short"); - } - else if(type_name==ID_int) - { - value_type=java_int_type(); + } else if (type_name == ID_int) { + value_type = java_int_type(); object_type = struct_tag_typet("java::java.lang.Integer"); - } - else if(type_name==ID_long) - { - value_type=java_long_type(); + } else if (type_name == ID_long) { + value_type = java_long_type(); object_type = struct_tag_typet("java::java.lang.Long"); - } - else if(type_name==ID_float) - { - value_type=java_float_type(); + } else if (type_name == ID_float) { + value_type = java_float_type(); object_type = struct_tag_typet("java::java.lang.Float"); - } - else if(type_name==ID_double) - { - value_type=java_double_type(); + } else if (type_name == ID_double) { + value_type = java_double_type(); object_type = struct_tag_typet("java::java.lang.Double"); - } - else if(type_name==ID_void) + } else if (type_name == ID_void) return {}; else UNREACHABLE; @@ -1183,26 +1163,23 @@ java_string_library_preprocesst::get_primitive_value_of_object( // declare tmp_type_name to hold the value const std::string aux_name = "tmp_" + id2string(type_name); const symbolt &symbol = - fresh_java_symbol(value_type, aux_name, loc, function_id, symbol_table); + fresh_java_symbol(value_type, aux_name, loc, function_id, symbol_table); const auto value = symbol.symbol_expr(); // Check that the type of the object is in the symbol table, // otherwise there is no safe way of finding its value. - if( - const auto maybe_symbol = - symbol_table.lookup(object_type->get_identifier())) - { + if (const auto maybe_symbol = + symbol_table.lookup(object_type->get_identifier())) { const struct_typet &struct_type = to_struct_type(maybe_symbol->type); // Check that the type has a value field const struct_union_typet::componentt &value_comp = - struct_type.get_component("value"); - if(!value_comp.is_nil()) - { + struct_type.get_component("value"); + if (!value_comp.is_nil()) { const pointer_typet struct_pointer_type = pointer_type(struct_type); dereference_exprt deref{typecast_exprt(object, struct_pointer_type), struct_pointer_type.subtype()}; - const member_exprt deref_value{ - std::move(deref), value_comp.get_name(), value_comp.type()}; + const member_exprt deref_value{std::move(deref), value_comp.get_name(), + value_comp.type()}; code.add(code_assignt(value, deref_value), loc); return value; } @@ -1222,16 +1199,15 @@ java_string_library_preprocesst::get_primitive_value_of_object( /// \param argv: reference to an array of references /// \param index: index of the desired object /// \return An expression representing the object at position `index` of `argv`. -dereference_exprt java_string_library_preprocesst::get_object_at_index( - const exprt &argv, - std::size_t index) -{ +dereference_exprt +java_string_library_preprocesst::get_object_at_index(const exprt &argv, + std::size_t index) { const dereference_exprt deref_objs{argv, argv.type().subtype()}; const pointer_typet empty_pointer = pointer_type(java_void_type()); const pointer_typet pointer_of_pointer = pointer_type(empty_pointer); const member_exprt data_member{deref_objs, "data", pointer_of_pointer}; const plus_exprt data_pointer_plus_index{ - data_member, from_integer(index, java_int_type()), data_member.type()}; + data_member, from_integer(index, java_int_type()), data_member.type()}; return dereference_exprt{data_pointer_plus_index, data_pointer_plus_index.type().subtype()}; } @@ -1268,35 +1244,27 @@ dereference_exprt java_string_library_preprocesst::get_object_at_index( /// \return An expression of type `structured_type` representing the possible /// values of the argument at position `index` of `argv`. struct_exprt java_string_library_preprocesst::make_argument_for_format( - const exprt &argv, - std::size_t index, - const struct_typet &structured_type, - const source_locationt &loc, - const irep_idt &function_id, - symbol_table_baset &symbol_table, - code_blockt &code) -{ + const exprt &argv, std::size_t index, const struct_typet &structured_type, + const source_locationt &loc, const irep_idt &function_id, + symbol_table_baset &symbol_table, code_blockt &code) { // Declarations of the fields of arg_i_struct // arg_i_struct is { arg_i_string_expr, tmp_int, tmp_char, ... } struct_exprt arg_i_struct({}, structured_type); std::list field_exprs; - for(const auto & comp : structured_type.components()) - { - const irep_idt &name=comp.get_name(); - const typet &type=comp.type(); + for (const auto &comp : structured_type.components()) { + const irep_idt &name = comp.get_name(); + const typet &type = comp.type(); exprt field_expr; - if(name!="string_expr") - { - std::string tmp_name="tmp_"+id2string(name); + if (name != "string_expr") { + std::string tmp_name = "tmp_" + id2string(name); const symbolt &field_symbol = - fresh_java_symbol(type, tmp_name, loc, function_id, symbol_table); + fresh_java_symbol(type, tmp_name, loc, function_id, symbol_table); auto field_symbol_expr = field_symbol.symbol_expr(); field_expr = field_symbol_expr; code.add(code_declt(field_symbol_expr), loc); - } - else + } else field_expr = - make_nondet_string_expr(loc, function_id, symbol_table, code); + make_nondet_string_expr(loc, function_id, symbol_table, code); field_exprs.push_back(field_expr); arg_i_struct.copy_to_operands(field_expr); @@ -1305,7 +1273,7 @@ struct_exprt java_string_library_preprocesst::make_argument_for_format( // arg_i = argv[index] const exprt obj = get_object_at_index(argv, index); const symbolt &object_symbol = fresh_java_symbol( - obj.type(), "tmp_format_obj", loc, function_id, symbol_table); + obj.type(), "tmp_format_obj", loc, function_id, symbol_table); const symbol_exprt arg_i = object_symbol.symbol_expr(); allocate_objectst allocate_objects(ID_java, loc, function_id, symbol_table); @@ -1316,44 +1284,34 @@ struct_exprt java_string_library_preprocesst::make_argument_for_format( code.append(tmp); code.add(code_assignt(arg_i, obj), loc); - code.add( - code_assumet( - not_exprt( - equal_exprt(arg_i, null_pointer_exprt(to_pointer_type(arg_i.type()))))), - loc); + code.add(code_assumet(not_exprt(equal_exprt( + arg_i, null_pointer_exprt(to_pointer_type(arg_i.type()))))), + loc); code_blockt code_not_null; // Assigning all the fields of arg_i_struct - for(const auto &comp : structured_type.components()) - { - const irep_idt &name=comp.get_name(); - exprt field_expr=field_exprs.front(); + for (const auto &comp : structured_type.components()) { + const irep_idt &name = comp.get_name(); + exprt field_expr = field_exprs.front(); field_exprs.pop_front(); - if(name=="string_expr") - { + if (name == "string_expr") { const pointer_typet string_pointer = - java_reference_type(struct_tag_typet("java::java.lang.String")); + java_reference_type(struct_tag_typet("java::java.lang.String")); const typecast_exprt arg_i_as_string{arg_i, string_pointer}; - code_assign_java_string_to_string_expr( - to_string_expr(field_expr), - arg_i_as_string, - loc, - symbol_table, - code_not_null); - } - else if(name==ID_int || name==ID_float || name==ID_char || name==ID_boolean) - { + code_assign_java_string_to_string_expr(to_string_expr(field_expr), + arg_i_as_string, loc, symbol_table, + code_not_null); + } else if (name == ID_int || name == ID_float || name == ID_char || + name == ID_boolean) { const auto value = get_primitive_value_of_object( - arg_i, name, loc, function_id, symbol_table, code_not_null); - if(value.has_value()) + arg_i, name, loc, function_id, symbol_table, code_not_null); + if (value.has_value()) code_not_null.add(code_assignt(field_expr, *value), loc); else code_not_null.add(code_assignt(field_expr, nil_exprt()), loc); - } - else - { + } else { // TODO: date_time and hash_code not implemented } } @@ -1374,16 +1332,13 @@ struct_exprt java_string_library_preprocesst::make_argument_for_format( /// class of the arguments is not known, we give as argument to the internal // format function a structure containing the different possible types. code_blockt java_string_library_preprocesst::make_string_format_code( - const java_method_typet &type, - const source_locationt &loc, - const irep_idt &function_id, - symbol_table_baset &symbol_table) -{ - PRECONDITION(type.parameters().size()==2); + const java_method_typet &type, const source_locationt &loc, + const irep_idt &function_id, symbol_table_baset &symbol_table) { + PRECONDITION(type.parameters().size() == 2); code_blockt code; - exprt::operandst args = - process_parameters(type.parameters(), loc, function_id, symbol_table, code); - INVARIANT(args.size()==2, "String.format should have two arguments"); + exprt::operandst args = process_parameters(type.parameters(), loc, + function_id, symbol_table, code); + INVARIANT(args.size() == 2, "String.format should have two arguments"); // The argument can be: // a string, an integer, a floating point, a character, a boolean, @@ -1403,19 +1358,17 @@ code_blockt java_string_library_preprocesst::make_string_format_code( // containing each possible type used in format specifiers. std::vector processed_args; processed_args.push_back(args[0]); - for(std::size_t i=0; i get_primitive_value_of_object( - const exprt &object, - irep_idt type_name, - const source_locationt &loc, - const irep_idt &function_id, - symbol_table_baset &symbol_table, - code_blockt &code); + const exprt &object, irep_idt type_name, const source_locationt &loc, + const irep_idt &function_id, symbol_table_baset &symbol_table, + code_blockt &code); dereference_exprt get_object_at_index(const exprt &argv, std::size_t index); }; diff --git a/src/solvers/strings/string_constraint_generator_format.cpp b/src/solvers/strings/string_constraint_generator_format.cpp index 6e96d974074..04c91fb0f49 100644 --- a/src/solvers/strings/string_constraint_generator_format.cpp +++ b/src/solvers/strings/string_constraint_generator_format.cpp @@ -243,9 +243,8 @@ static std::vector parse_format_string(std::string s) /// \param expr: a structured expression /// \param component_name: name of the desired component /// \return Expression in the component of `expr` named `component_name`. -static exprt -get_component_in_struct(const struct_exprt &expr, irep_idt component_name) -{ +static exprt get_component_in_struct(const struct_exprt &expr, + irep_idt component_name) { const struct_typet &type = to_struct_type(expr.type()); std::size_t number = type.component_number(component_name); return expr.operands()[number]; @@ -266,16 +265,12 @@ get_component_in_struct(const struct_exprt &expr, irep_idt component_name) /// \param ns: namespace /// \return String expression representing the output of String.format. static std::pair -add_axioms_for_format_specifier( - symbol_generatort &fresh_symbol, - const format_specifiert &fs, - const struct_exprt &arg, - const typet &index_type, - const typet &char_type, - array_poolt &array_pool, - const messaget &message, - const namespacet &ns) -{ +add_axioms_for_format_specifier(symbol_generatort &fresh_symbol, + const format_specifiert &fs, + const struct_exprt &arg, + const typet &index_type, const typet &char_type, + array_poolt &array_pool, + const messaget &message, const namespacet &ns) { string_constraintst constraints; const array_string_exprt res = array_pool.fresh_string(index_type, char_type); std::pair return_code; @@ -283,45 +278,39 @@ add_axioms_for_format_specifier( { case format_specifiert::DECIMAL_INTEGER: return_code = add_axioms_for_string_of_int( - res, get_component_in_struct(arg, ID_int), 0, ns); + res, get_component_in_struct(arg, ID_int), 0, ns); return {res, std::move(return_code.second)}; case format_specifiert::HEXADECIMAL_INTEGER: return_code = - add_axioms_from_int_hex(res, get_component_in_struct(arg, ID_int)); + add_axioms_from_int_hex(res, get_component_in_struct(arg, ID_int)); return {res, std::move(return_code.second)}; case format_specifiert::SCIENTIFIC: return_code = add_axioms_from_float_scientific_notation( - fresh_symbol, - res, - get_component_in_struct(arg, ID_float), - array_pool, - ns); + fresh_symbol, res, get_component_in_struct(arg, ID_float), array_pool, + ns); return {res, std::move(return_code.second)}; case format_specifiert::DECIMAL_FLOAT: return_code = add_axioms_for_string_of_float( - fresh_symbol, - res, - get_component_in_struct(arg, ID_float), - array_pool, - ns); + fresh_symbol, res, get_component_in_struct(arg, ID_float), array_pool, + ns); return {res, std::move(return_code.second)}; case format_specifiert::CHARACTER: return_code = - add_axioms_from_char(res, get_component_in_struct(arg, ID_char)); + add_axioms_from_char(res, get_component_in_struct(arg, ID_char)); return {res, std::move(return_code.second)}; case format_specifiert::BOOLEAN: return_code = - add_axioms_from_bool(res, get_component_in_struct(arg, ID_boolean)); + add_axioms_from_bool(res, get_component_in_struct(arg, ID_boolean)); return {res, std::move(return_code.second)}; case format_specifiert::STRING: { - auto string_expr = - get_string_expr(array_pool, get_component_in_struct(arg, "string_expr")); + auto string_expr = get_string_expr( + array_pool, get_component_in_struct(arg, "string_expr")); return {std::move(string_expr), {}}; } case format_specifiert::HASHCODE: return_code = add_axioms_for_string_of_int( - res, get_component_in_struct(arg, "hashcode"), 0, ns); + res, get_component_in_struct(arg, "hashcode"), 0, ns); return {res, std::move(return_code.second)}; case format_specifiert::LINE_SEPARATOR: // TODO: the constant should depend on the system: System.lineSeparator() @@ -341,15 +330,9 @@ add_axioms_for_format_specifier( { format_specifiert fs_lower = fs; fs_lower.conversion = tolower(fs.conversion); - auto format_specifier_result = add_axioms_for_format_specifier( - fresh_symbol, - fs_lower, - arg, - index_type, - char_type, - array_pool, - message, - ns); + auto format_specifier_result = + add_axioms_for_format_specifier(fresh_symbol, fs_lower, arg, index_type, + char_type, array_pool, message, ns); const exprt return_code_upper_case = fresh_symbol("return_code_upper_case", get_return_code_type()); @@ -434,14 +417,8 @@ std::pair add_axioms_for_format( } auto result = add_axioms_for_format_specifier( - fresh_symbol, - fs, - to_struct_expr(arg), - index_type, - char_type, - array_pool, - message, - ns); + fresh_symbol, fs, to_struct_expr(arg), index_type, char_type, + array_pool, message, ns); merge(constraints, std::move(result.second)); intermediary_strings.push_back(result.first); } From 9899d25d1a540d1eea7116250d5d159341699c82 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 2 Apr 2019 14:43:08 +0100 Subject: [PATCH 02/10] Add preprocessing for CProverString.format This makes the link between the CProverString.format signature from java and the ID_cprover_string_format_func from the solver. --- jbmc/src/java_bytecode/java_string_library_preprocess.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/jbmc/src/java_bytecode/java_string_library_preprocess.cpp b/jbmc/src/java_bytecode/java_string_library_preprocess.cpp index e3e6df57001..11c72687a29 100644 --- a/jbmc/src/java_bytecode/java_string_library_preprocess.cpp +++ b/jbmc/src/java_bytecode/java_string_library_preprocess.cpp @@ -1807,6 +1807,14 @@ void java_string_library_preprocesst::initialize_conversion_table() std::bind(&java_string_library_preprocesst::make_string_format_code, this, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, std::placeholders::_4); + + std::string format_signature = "java::org.cprover.CProverString.format:("; + for(std::size_t i = 0; i < MAX_FORMAT_ARGS + 1; ++i) + format_signature += "Ljava/lang/String;"; + format_signature += ")Ljava/lang/String;"; + cprover_equivalent_to_java_string_returning_function[format_signature] = + ID_cprover_string_format_func; + cprover_equivalent_to_java_function ["java::java.lang.String.hashCode:()I"]= ID_cprover_string_hash_code_func; From 7e36dc20440804b4fcb6bfa3e481353c979eb932 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 2 Apr 2019 14:43:10 +0100 Subject: [PATCH 03/10] Refactor add_axioms_for_format_specifier This makes it accept a function as argument, which gives for possible types the corresponding value of the argument. This will allow us to adapt more easily to a change in the way the arguments of format are given: they are now given as a struct but we want to have the possibility to give them as a string. --- .../string_constraint_generator_format.cpp | 67 +++++++++++-------- 1 file changed, 39 insertions(+), 28 deletions(-) diff --git a/src/solvers/strings/string_constraint_generator_format.cpp b/src/solvers/strings/string_constraint_generator_format.cpp index 04c91fb0f49..18db2a44b56 100644 --- a/src/solvers/strings/string_constraint_generator_format.cpp +++ b/src/solvers/strings/string_constraint_generator_format.cpp @@ -257,7 +257,8 @@ static exprt get_component_in_struct(const struct_exprt &expr, /// specifier. /// \param fresh_symbol: generator of fresh symbols /// \param fs: a format specifier -/// \param arg: a struct containing the possible value of the argument to format +/// \param get_arg: a function returning the possible value of the argument to +/// format given their type. /// \param index_type: type for indexes in strings /// \param char_type: type of characters /// \param array_pool: pool of arrays representing strings @@ -265,52 +266,48 @@ static exprt get_component_in_struct(const struct_exprt &expr, /// \param ns: namespace /// \return String expression representing the output of String.format. static std::pair -add_axioms_for_format_specifier(symbol_generatort &fresh_symbol, - const format_specifiert &fs, - const struct_exprt &arg, - const typet &index_type, const typet &char_type, - array_poolt &array_pool, - const messaget &message, const namespacet &ns) { +add_axioms_for_format_specifier( + symbol_generatort &fresh_symbol, + const format_specifiert &fs, + const std::function &get_arg, + const typet &index_type, + const typet &char_type, + array_poolt &array_pool, + const messaget &message, + const namespacet &ns) +{ string_constraintst constraints; const array_string_exprt res = array_pool.fresh_string(index_type, char_type); std::pair return_code; switch(fs.conversion) { case format_specifiert::DECIMAL_INTEGER: - return_code = add_axioms_for_string_of_int( - res, get_component_in_struct(arg, ID_int), 0, ns); + return_code = add_axioms_for_string_of_int(res, get_arg(ID_int), 0, ns); return {res, std::move(return_code.second)}; case format_specifiert::HEXADECIMAL_INTEGER: - return_code = - add_axioms_from_int_hex(res, get_component_in_struct(arg, ID_int)); + return_code = add_axioms_from_int_hex(res, get_arg(ID_int)); return {res, std::move(return_code.second)}; case format_specifiert::SCIENTIFIC: return_code = add_axioms_from_float_scientific_notation( - fresh_symbol, res, get_component_in_struct(arg, ID_float), array_pool, - ns); + fresh_symbol, res, get_arg(ID_float), array_pool, ns); return {res, std::move(return_code.second)}; case format_specifiert::DECIMAL_FLOAT: return_code = add_axioms_for_string_of_float( - fresh_symbol, res, get_component_in_struct(arg, ID_float), array_pool, - ns); + fresh_symbol, res, get_arg(ID_float), array_pool, ns); return {res, std::move(return_code.second)}; case format_specifiert::CHARACTER: - return_code = - add_axioms_from_char(res, get_component_in_struct(arg, ID_char)); + return_code = add_axioms_from_char(res, get_arg(ID_char)); return {res, std::move(return_code.second)}; case format_specifiert::BOOLEAN: - return_code = - add_axioms_from_bool(res, get_component_in_struct(arg, ID_boolean)); + return_code = add_axioms_from_bool(res, get_arg(ID_boolean)); return {res, std::move(return_code.second)}; case format_specifiert::STRING: { - auto string_expr = get_string_expr( - array_pool, get_component_in_struct(arg, "string_expr")); + auto string_expr = get_string_expr(array_pool, get_arg("string_expr")); return {std::move(string_expr), {}}; } case format_specifiert::HASHCODE: - return_code = add_axioms_for_string_of_int( - res, get_component_in_struct(arg, "hashcode"), 0, ns); + return_code = add_axioms_for_string_of_int(res, get_arg("hashcode"), 0, ns); return {res, std::move(return_code.second)}; case format_specifiert::LINE_SEPARATOR: // TODO: the constant should depend on the system: System.lineSeparator() @@ -330,9 +327,15 @@ add_axioms_for_format_specifier(symbol_generatort &fresh_symbol, { format_specifiert fs_lower = fs; fs_lower.conversion = tolower(fs.conversion); - auto format_specifier_result = - add_axioms_for_format_specifier(fresh_symbol, fs_lower, arg, index_type, - char_type, array_pool, message, ns); + auto format_specifier_result = add_axioms_for_format_specifier( + fresh_symbol, + fs_lower, + get_arg, + index_type, + char_type, + array_pool, + message, + ns); const exprt return_code_upper_case = fresh_symbol("return_code_upper_case", get_return_code_type()); @@ -417,8 +420,16 @@ std::pair add_axioms_for_format( } auto result = add_axioms_for_format_specifier( - fresh_symbol, fs, to_struct_expr(arg), index_type, char_type, - array_pool, message, ns); + fresh_symbol, + fs, + [&](const irep_idt &id) { + return get_component_in_struct(to_struct_expr(arg), id); + }, + index_type, + char_type, + array_pool, + message, + ns); merge(constraints, std::move(result.second)); intermediary_strings.push_back(result.first); } From 8cd312cb4ac5c6217fb7bc16e358c258d9141316 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 2 Apr 2019 14:43:11 +0100 Subject: [PATCH 04/10] Make add_axioms_for_format accept string arguments Add the possibility to pass argument to the builtin format function as strings instead of structs with fields. --- .../string_constraint_generator_format.cpp | 76 +++++++++++++++++-- 1 file changed, 71 insertions(+), 5 deletions(-) diff --git a/src/solvers/strings/string_constraint_generator_format.cpp b/src/solvers/strings/string_constraint_generator_format.cpp index 18db2a44b56..9e07e703a02 100644 --- a/src/solvers/strings/string_constraint_generator_format.cpp +++ b/src/solvers/strings/string_constraint_generator_format.cpp @@ -303,7 +303,8 @@ add_axioms_for_format_specifier( return {res, std::move(return_code.second)}; case format_specifiert::STRING: { - auto string_expr = get_string_expr(array_pool, get_arg("string_expr")); + const exprt arg_string = get_arg("string_expr"); + const array_string_exprt string_expr = to_array_string_expr(arg_string); return {std::move(string_expr), {}}; } case format_specifiert::HASHCODE: @@ -365,6 +366,55 @@ add_axioms_for_format_specifier( } } +/// Deserialize an argument for format from \p string. +/// \p id should be one of: string_expr, int, char, boolean, float. +/// The primitive values are expected to all be encoded using 4 characters. +static exprt format_arg_from_string( + const array_string_exprt &string, const irep_idt &id) +{ + if(id == "string_expr") + return string; + if(id == ID_int) + { + // Assume the string has length 4 + // (int64)string.content[0] << 48 | (int64) string.content[1] << 32 | + // (int64)string.content[2] << 16 | (int64) string.content[3] + const signedbv_typet type{64}; + return bitor_exprt{ + bitor_exprt{ + shl_exprt{typecast_exprt{string[0], type}, 48}, + shl_exprt{typecast_exprt{string[1], type}, 32}}, + bitor_exprt{ + shl_exprt{typecast_exprt{string[2], type}, 16}, + typecast_exprt{string[3], type}}}; + } + if(id == ID_char) + { + // We assume the string has length exactly 4 and ignore the first 3 + // (unsigned16)string.content[3] + const unsignedbv_typet type{16}; + return typecast_exprt{ + index_exprt{string.content(), from_integer(3, string.length().type())}, + type}; + } + if(id == ID_boolean) + { + // We assume the string has length exactly 4 and ignore the first 3 + // (bool)string.content[3] + const c_bool_typet type{8}; + return typecast_exprt{ + index_exprt{string.content(), from_integer(3, string.length().type())}, + type}; + } + if(id == ID_float) + { + // Deserialize an int and cast to float + const exprt as_int = format_arg_from_string(string, ID_int); + return typecast_exprt{as_int, floatbv_typet{}}; + } + UNHANDLED_CASE; +} + /// Parse `s` and add axioms ensuring the output corresponds to the output of /// String.format. /// \param fresh_symbol: generator of fresh symbols @@ -406,7 +456,7 @@ std::pair add_axioms_for_format( { INVARIANT( arg_count < args.size(), "number of format must match specifiers"); - arg = to_struct_expr(args[arg_count++]); + arg = args[arg_count++]; } else { @@ -419,12 +469,28 @@ std::pair add_axioms_for_format( arg = to_struct_expr(args[fs.index - 1]); } + std::function get_arg; + if(is_refined_string_type(arg.type())) + { + const array_string_exprt string_arg = + get_string_expr(array_pool, arg); + get_arg = [string_arg](const irep_idt &id) { + return format_arg_from_string(string_arg, id); + }; + } + else + { + INVARIANT( + arg.id() == ID_struct, + "format argument should be a string or a struct"); + get_arg = [&](const irep_idt &id) { + return get_component_in_struct(to_struct_expr(arg), id); + }; + } auto result = add_axioms_for_format_specifier( fresh_symbol, fs, - [&](const irep_idt &id) { - return get_component_in_struct(to_struct_expr(arg), id); - }, + get_arg, index_type, char_type, array_pool, From 526746def28499943920a92f9d01dea2d6dcbbb4 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 2 Apr 2019 14:43:12 +0100 Subject: [PATCH 05/10] Use non-deprecated function for hex printing add_axioms_from_int_hex is deprecated, we use add_axioms_for_string_of_int_with_radix instead. --- src/solvers/strings/string_constraint_generator_format.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/solvers/strings/string_constraint_generator_format.cpp b/src/solvers/strings/string_constraint_generator_format.cpp index 9e07e703a02..8c2b5d9588c 100644 --- a/src/solvers/strings/string_constraint_generator_format.cpp +++ b/src/solvers/strings/string_constraint_generator_format.cpp @@ -285,7 +285,8 @@ add_axioms_for_format_specifier( return_code = add_axioms_for_string_of_int(res, get_arg(ID_int), 0, ns); return {res, std::move(return_code.second)}; case format_specifiert::HEXADECIMAL_INTEGER: - return_code = add_axioms_from_int_hex(res, get_arg(ID_int)); + return_code = add_axioms_for_string_of_int_with_radix( + res, get_arg(ID_int), from_integer(16, index_type), 16, ns); return {res, std::move(return_code.second)}; case format_specifiert::SCIENTIFIC: return_code = add_axioms_from_float_scientific_notation( From 5a3772ba2bfa3027557493f9b7f56a15e8a6efb7 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 2 Apr 2019 14:43:14 +0100 Subject: [PATCH 06/10] Remove preprocessing for String.format This should now be in the model, and the implementation of models can use CProverString.format. --- jbmc/src/java_bytecode/java_string_library_preprocess.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/jbmc/src/java_bytecode/java_string_library_preprocess.cpp b/jbmc/src/java_bytecode/java_string_library_preprocess.cpp index 11c72687a29..3f9a1e73652 100644 --- a/jbmc/src/java_bytecode/java_string_library_preprocess.cpp +++ b/jbmc/src/java_bytecode/java_string_library_preprocess.cpp @@ -1801,12 +1801,6 @@ void java_string_library_preprocesst::initialize_conversion_table() cprover_equivalent_to_java_function ["java::java.lang.String.equalsIgnoreCase:(Ljava/lang/String;)Z"]= ID_cprover_string_equals_ignore_case_func; - conversion_table - ["java::java.lang.String.format:(Ljava/lang/String;[Ljava/lang/Object;)" - "Ljava/lang/String;"] = - std::bind(&java_string_library_preprocesst::make_string_format_code, - this, std::placeholders::_1, std::placeholders::_2, - std::placeholders::_3, std::placeholders::_4); std::string format_signature = "java::org.cprover.CProverString.format:("; for(std::size_t i = 0; i < MAX_FORMAT_ARGS + 1; ++i) From 10924e17177dafd24dbd7ad41308e9f15326af72 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 2 Apr 2019 14:43:15 +0100 Subject: [PATCH 07/10] Add tests for String.format This tests the new implementation of String.format using the java models. --- .../jbmc-strings/StringFormat/Test.class | Bin 0 -> 1843 bytes .../jbmc-strings/StringFormat/Test.java | 52 ++++++++++++++++++ .../jbmc-strings/StringFormat/test-hex1.desc | 6 ++ .../jbmc-strings/StringFormat/test-hex2.desc | 6 ++ .../jbmc-strings/StringFormat/test-hex3.desc | 6 ++ .../jbmc-strings/StringFormat/test-int1.desc | 6 ++ .../jbmc-strings/StringFormat/test-int2.desc | 6 ++ .../jbmc-strings/StringFormat/test-int3.desc | 6 ++ .../StringFormat/test-string1.desc | 8 +++ .../StringFormat/test-string2.desc | 6 ++ .../StringFormat/test-string3.desc | 6 ++ 11 files changed, 108 insertions(+) create mode 100644 jbmc/regression/jbmc-strings/StringFormat/Test.class create mode 100644 jbmc/regression/jbmc-strings/StringFormat/Test.java create mode 100644 jbmc/regression/jbmc-strings/StringFormat/test-hex1.desc create mode 100644 jbmc/regression/jbmc-strings/StringFormat/test-hex2.desc create mode 100644 jbmc/regression/jbmc-strings/StringFormat/test-hex3.desc create mode 100644 jbmc/regression/jbmc-strings/StringFormat/test-int1.desc create mode 100644 jbmc/regression/jbmc-strings/StringFormat/test-int2.desc create mode 100644 jbmc/regression/jbmc-strings/StringFormat/test-int3.desc create mode 100644 jbmc/regression/jbmc-strings/StringFormat/test-string1.desc create mode 100644 jbmc/regression/jbmc-strings/StringFormat/test-string2.desc create mode 100644 jbmc/regression/jbmc-strings/StringFormat/test-string3.desc diff --git a/jbmc/regression/jbmc-strings/StringFormat/Test.class b/jbmc/regression/jbmc-strings/StringFormat/Test.class new file mode 100644 index 0000000000000000000000000000000000000000..4620e0f976fb37d65c49e7a91c0022597442f9de GIT binary patch literal 1843 zcmb7_-BTN76vm%7U%OeBLQ+7WrKU<~z68=QTcIDAwh{})8f*>Ki%qi7txHmOH?{Zv z0ba{Eqj%n-&d`pI?VZl}H+3AJce6>TjWc$b+4t+5_x+vooD=@~``2dx&f;?oVJxUv zR8Z7#8fP?ao>OsNj_<}XjSC7cYKTjt_vGf1ie(j-R9seZMa5MWD=OYsQBrYD#dQ@o zq}2!Vv#Ox1;X~XMI67;%uIYGItLa{|T%%^24FRzw5L>pImUmeooXOr1h?H6lQ($Dx zYMQs&yEW6KsCbUm+*uMBu6RcMi*@6n@1bBV za4I)F&RpNDC@`0K)p_83fy-%u3jT=xbA}1jx=C(m$|IpH=GrcMaW)anfC)M+3XMo)i7PlF&jPISkzvd z2fSblC)to;Hm(?*luC1^K**}$H^tQqN5L_gdjjzjCxR|O%-^IscnfcHp5op&h$6&s zBN>YPj&LK+z6dD!D%qOOEa*6CK7}kiFz!z@x8~y*cIsnDTR{ z;24pPQ|B2J_&rCxOU&_Y$|wn0{6EtQcenrm literal 0 HcmV?d00001 diff --git a/jbmc/regression/jbmc-strings/StringFormat/Test.java b/jbmc/regression/jbmc-strings/StringFormat/Test.java new file mode 100644 index 00000000000..8dd33a0e98f --- /dev/null +++ b/jbmc/regression/jbmc-strings/StringFormat/Test.java @@ -0,0 +1,52 @@ +public class Test { + public static String testHex(int i) { + String u = String.format("di%xlue", i); + if (u.equals("diffblue")) + assert(false); + else if (u.startsWith("di")) + assert(false); + else + assert(false); + return u; + } + + public static String testInt(int i) { + String u = String.format("Hello %d !", i); + if (u.equals("Hello 10 !")) + assert(false); + else if (!u.startsWith("Hello")) + assert(false); + else + assert(false); + return u; + } + + public static String string1(String s) { + if (s == null) + return "null!"; + String u = String.format("Hello %s !", s); + if (u.equals("Hello world !")) + assert(false); + else if (u.startsWith("impossible!")) + assert(false); + else + assert(false); + return u; + } + + public static String string2(String s, String t) { + if (s == null || t == null) + return "null null"; + String u = String.format("%s %s", s, t); + assert(!u.equals("ab")); + return u; + } + + public static String string3(String s, String t) { + if (s == null || t == null) + return "null null"; + String u = String.format("%s%s%s", s, t, s); + assert(s.length() != 1 || u.charAt(0) == u.charAt(u.length() - 1)); + return u; + } +} diff --git a/jbmc/regression/jbmc-strings/StringFormat/test-hex1.desc b/jbmc/regression/jbmc-strings/StringFormat/test-hex1.desc new file mode 100644 index 00000000000..c7b7eaa5ab7 --- /dev/null +++ b/jbmc/regression/jbmc-strings/StringFormat/test-hex1.desc @@ -0,0 +1,6 @@ +CORE +Test.class +--function Test.testHex --cp `../../../../scripts/format_classpath.sh . ../../../lib/java-models-library/target/core-models.jar` --property "java::Test.testHex:(I)Ljava/lang/String;.assertion.1" +^EXIT=10$ +^SIGNAL=0$ +line 5 assertion at file Test.java .*: FAILURE diff --git a/jbmc/regression/jbmc-strings/StringFormat/test-hex2.desc b/jbmc/regression/jbmc-strings/StringFormat/test-hex2.desc new file mode 100644 index 00000000000..6f2fcde2b15 --- /dev/null +++ b/jbmc/regression/jbmc-strings/StringFormat/test-hex2.desc @@ -0,0 +1,6 @@ +CORE +Test.class +--function Test.testHex --cp `../../../../scripts/format_classpath.sh . ../../../lib/java-models-library/target/core-models.jar` --property "java::Test.testHex:(I)Ljava/lang/String;.assertion.2" +^EXIT=10$ +^SIGNAL=0$ +line 7 assertion at file Test.java .*: FAILURE diff --git a/jbmc/regression/jbmc-strings/StringFormat/test-hex3.desc b/jbmc/regression/jbmc-strings/StringFormat/test-hex3.desc new file mode 100644 index 00000000000..bb03f1ef3e3 --- /dev/null +++ b/jbmc/regression/jbmc-strings/StringFormat/test-hex3.desc @@ -0,0 +1,6 @@ +CORE symex-driven-lazy-loading-expected-failure +Test.class +--function Test.testHex --cp `../../../../scripts/format_classpath.sh . ../../../lib/java-models-library/target/core-models.jar` --property "java::Test.testHex:(I)Ljava/lang/String;.assertion.3" +^EXIT=0$ +^SIGNAL=0$ +line 9 assertion at file Test.java .*: SUCCESS diff --git a/jbmc/regression/jbmc-strings/StringFormat/test-int1.desc b/jbmc/regression/jbmc-strings/StringFormat/test-int1.desc new file mode 100644 index 00000000000..a537361564f --- /dev/null +++ b/jbmc/regression/jbmc-strings/StringFormat/test-int1.desc @@ -0,0 +1,6 @@ +CORE +Test.class +--function Test.testInt --cp `../../../../scripts/format_classpath.sh . ../../../lib/java-models-library/target/core-models.jar` --property "java::Test.testInt:(I)Ljava/lang/String;.assertion.1" +^EXIT=10$ +^SIGNAL=0$ +line 16 assertion at file Test.java .*: FAILURE diff --git a/jbmc/regression/jbmc-strings/StringFormat/test-int2.desc b/jbmc/regression/jbmc-strings/StringFormat/test-int2.desc new file mode 100644 index 00000000000..b789c1bdd4c --- /dev/null +++ b/jbmc/regression/jbmc-strings/StringFormat/test-int2.desc @@ -0,0 +1,6 @@ +CORE symex-driven-lazy-loading-expected-failure +Test.class +--function Test.testInt --cp `../../../../scripts/format_classpath.sh . ../../../lib/java-models-library/target/core-models.jar` --property "java::Test.testInt:(I)Ljava/lang/String;.assertion.2" +^EXIT=0$ +^SIGNAL=0$ +line 18 assertion at file Test.java .*: SUCCESS diff --git a/jbmc/regression/jbmc-strings/StringFormat/test-int3.desc b/jbmc/regression/jbmc-strings/StringFormat/test-int3.desc new file mode 100644 index 00000000000..804c87f7a1a --- /dev/null +++ b/jbmc/regression/jbmc-strings/StringFormat/test-int3.desc @@ -0,0 +1,6 @@ +CORE +Test.class +--function Test.testInt --cp `../../../../scripts/format_classpath.sh . ../../../lib/java-models-library/target/core-models.jar` --property "java::Test.testInt:(I)Ljava/lang/String;.assertion.3" +^EXIT=10$ +^SIGNAL=0$ +line 20 assertion at file Test.java .*: FAILURE diff --git a/jbmc/regression/jbmc-strings/StringFormat/test-string1.desc b/jbmc/regression/jbmc-strings/StringFormat/test-string1.desc new file mode 100644 index 00000000000..89e02c5988d --- /dev/null +++ b/jbmc/regression/jbmc-strings/StringFormat/test-string1.desc @@ -0,0 +1,8 @@ +CORE +Test.class +--function Test.string1 --cp `../../../../scripts/format_classpath.sh . ../../../lib/java-models-library/target/core-models.jar` +^EXIT=10$ +^SIGNAL=0$ +line 29 assertion at file Test.java .*: FAILURE +line 31 assertion at file Test.java .*: SUCCESS +line 33 assertion at file Test.java .*: FAILURE diff --git a/jbmc/regression/jbmc-strings/StringFormat/test-string2.desc b/jbmc/regression/jbmc-strings/StringFormat/test-string2.desc new file mode 100644 index 00000000000..0c16d19f565 --- /dev/null +++ b/jbmc/regression/jbmc-strings/StringFormat/test-string2.desc @@ -0,0 +1,6 @@ +CORE +Test.class +--function Test.string2 --cp `../../../../scripts/format_classpath.sh . ../../../lib/java-models-library/target/core-models.jar` --max-nondet-string-length 20 --property "java::Test.string2:(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;.assertion.1" +^EXIT=0$ +^SIGNAL=0$ +line 41 assertion at file Test.java .*: SUCCESS diff --git a/jbmc/regression/jbmc-strings/StringFormat/test-string3.desc b/jbmc/regression/jbmc-strings/StringFormat/test-string3.desc new file mode 100644 index 00000000000..7503c75a23b --- /dev/null +++ b/jbmc/regression/jbmc-strings/StringFormat/test-string3.desc @@ -0,0 +1,6 @@ +CORE +Test.class +--function Test.string3 --cp `../../../../scripts/format_classpath.sh . ../../../lib/java-models-library/target/core-models.jar` --max-nondet-string-length 5 --property "java::Test.string3:(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;.assertion.1" +^EXIT=0$ +^SIGNAL=0$ +line 49 assertion at file Test.java .*: SUCCESS From c24de414c811300ce141fa1db2cc2af88a6e5704 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 2 Apr 2019 14:43:17 +0100 Subject: [PATCH 08/10] Remove code that is now unreachable To use the builtin format function, the java entry point is now CProverString.format, so we can remove the legacy code which was handling String.format directly. --- .../java_string_library_preprocess.cpp | 272 ------------------ .../java_string_library_preprocess.h | 19 -- .../string_constraint_generator_format.cpp | 62 ++-- 3 files changed, 22 insertions(+), 331 deletions(-) diff --git a/jbmc/src/java_bytecode/java_string_library_preprocess.cpp b/jbmc/src/java_bytecode/java_string_library_preprocess.cpp index 3f9a1e73652..98867804712 100644 --- a/jbmc/src/java_bytecode/java_string_library_preprocess.cpp +++ b/jbmc/src/java_bytecode/java_string_library_preprocess.cpp @@ -1101,278 +1101,6 @@ code_blockt java_string_library_preprocesst::make_assign_function_from_call( function_id, type, loc, symbol_table, false); } -/// Adds to the code an assignment of the form -/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/// type_name tmp_type_name -/// tmp_type_name = ((Classname*)arg_i)->value -/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/// and returns `tmp_typename`. -/// In case the class corresponding to `type_name` is not available in -/// `symbol_table`, the variable is declared but not assigned. -/// Used to access the values of the arguments of `String.format`. -/// \param object: an expression representing a reference to an object -/// \param type_name: name of the corresponding primitive type, this can be -/// one of the following: ID_boolean, ID_char, ID_byte, ID_short, ID_int, -/// ID_long, ID_float, ID_double, ID_void -/// \param loc: a location in the source -/// \param function_id: the name of the function -/// \param symbol_table: the symbol table -/// \param code: code block to which we are adding some assignments -/// \return An expression containing a symbol `tmp_type_name` where `type_name` -/// is the given argument (ie. boolean, char etc.). Which represents the -/// primitive value contained in the given object. -optionalt -java_string_library_preprocesst::get_primitive_value_of_object( - const exprt &object, irep_idt type_name, const source_locationt &loc, - const irep_idt &function_id, symbol_table_baset &symbol_table, - code_blockt &code) { - optionalt object_type; - - typet value_type; - if (type_name == ID_boolean) { - value_type = java_boolean_type(); - object_type = struct_tag_typet("java::java.lang.Boolean"); - } else if (type_name == ID_char) { - value_type = java_char_type(); - object_type = struct_tag_typet("java::java.lang.Character"); - } else if (type_name == ID_byte) { - value_type = java_byte_type(); - object_type = struct_tag_typet("java::java.lang.Byte"); - } else if (type_name == ID_short) { - value_type = java_short_type(); - object_type = struct_tag_typet("java::java.lang.Short"); - } else if (type_name == ID_int) { - value_type = java_int_type(); - object_type = struct_tag_typet("java::java.lang.Integer"); - } else if (type_name == ID_long) { - value_type = java_long_type(); - object_type = struct_tag_typet("java::java.lang.Long"); - } else if (type_name == ID_float) { - value_type = java_float_type(); - object_type = struct_tag_typet("java::java.lang.Float"); - } else if (type_name == ID_double) { - value_type = java_double_type(); - object_type = struct_tag_typet("java::java.lang.Double"); - } else if (type_name == ID_void) - return {}; - else - UNREACHABLE; - - DATA_INVARIANT(object_type.has_value(), "must have symbol for primitive"); - - // declare tmp_type_name to hold the value - const std::string aux_name = "tmp_" + id2string(type_name); - const symbolt &symbol = - fresh_java_symbol(value_type, aux_name, loc, function_id, symbol_table); - const auto value = symbol.symbol_expr(); - - // Check that the type of the object is in the symbol table, - // otherwise there is no safe way of finding its value. - if (const auto maybe_symbol = - symbol_table.lookup(object_type->get_identifier())) { - const struct_typet &struct_type = to_struct_type(maybe_symbol->type); - // Check that the type has a value field - const struct_union_typet::componentt &value_comp = - struct_type.get_component("value"); - if (!value_comp.is_nil()) { - const pointer_typet struct_pointer_type = pointer_type(struct_type); - dereference_exprt deref{typecast_exprt(object, struct_pointer_type), - struct_pointer_type.subtype()}; - const member_exprt deref_value{std::move(deref), value_comp.get_name(), - value_comp.type()}; - code.add(code_assignt(value, deref_value), loc); - return value; - } - } - - warning() << object_type->get_identifier() - << " not available to format function" << eom; - code.add(code_declt(value), loc); - return value; -} - -/// Helper for format function. Returns the expression: -/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/// *((void**)(argv->data)+index ) -/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/// which corresponds to the object at position `index` in `argv`. -/// \param argv: reference to an array of references -/// \param index: index of the desired object -/// \return An expression representing the object at position `index` of `argv`. -dereference_exprt -java_string_library_preprocesst::get_object_at_index(const exprt &argv, - std::size_t index) { - const dereference_exprt deref_objs{argv, argv.type().subtype()}; - const pointer_typet empty_pointer = pointer_type(java_void_type()); - const pointer_typet pointer_of_pointer = pointer_type(empty_pointer); - const member_exprt data_member{deref_objs, "data", pointer_of_pointer}; - const plus_exprt data_pointer_plus_index{ - data_member, from_integer(index, java_int_type()), data_member.type()}; - return dereference_exprt{data_pointer_plus_index, - data_pointer_plus_index.type().subtype()}; -} - -/// Helper for format function. Adds code: -/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/// string_expr arg_i_string_expr; -/// int tmp_int; -/// float tmp_float; -/// char tmp_char; -/// boolean tmp_boolean; -/// Object* arg_i=get_object_at_index(argv, index) -/// if(arg_i.@class_identifier=="java::java.lang.String") -/// { -/// arg_i_string_expr = (string_expr)((String*)arg_i_as_string) -/// } -/// tmp_int=((Integer)arg_i)->value -/// tmp_float=((Float)arg_i)->value -/// tmp_char=((Char)arg_i)->value -/// tmp_boolean=((Boolean)arg_i)->value -/// arg_i_struct = { string_expr=arg_i_string_expr; -/// integer=tmp_int; float=tmp_float; char=tmp_char; boolean=tmp_boolean} -/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/// and returns `arg_i_struct`. -/// -/// TODO: date_time and hash code are not implemented -/// \param argv: reference to an array of references -/// \param index: index of the desired argument -/// \param structured_type: type for arguments of the internal format function -/// \param loc: a location in the source -/// \param function_id: function the generated expression will be used in -/// \param symbol_table: the symbol table -/// \param code: code block to which we are adding some assignments -/// \return An expression of type `structured_type` representing the possible -/// values of the argument at position `index` of `argv`. -struct_exprt java_string_library_preprocesst::make_argument_for_format( - const exprt &argv, std::size_t index, const struct_typet &structured_type, - const source_locationt &loc, const irep_idt &function_id, - symbol_table_baset &symbol_table, code_blockt &code) { - // Declarations of the fields of arg_i_struct - // arg_i_struct is { arg_i_string_expr, tmp_int, tmp_char, ... } - struct_exprt arg_i_struct({}, structured_type); - std::list field_exprs; - for (const auto &comp : structured_type.components()) { - const irep_idt &name = comp.get_name(); - const typet &type = comp.type(); - exprt field_expr; - if (name != "string_expr") { - std::string tmp_name = "tmp_" + id2string(name); - const symbolt &field_symbol = - fresh_java_symbol(type, tmp_name, loc, function_id, symbol_table); - auto field_symbol_expr = field_symbol.symbol_expr(); - field_expr = field_symbol_expr; - code.add(code_declt(field_symbol_expr), loc); - } else - field_expr = - make_nondet_string_expr(loc, function_id, symbol_table, code); - - field_exprs.push_back(field_expr); - arg_i_struct.copy_to_operands(field_expr); - } - - // arg_i = argv[index] - const exprt obj = get_object_at_index(argv, index); - const symbolt &object_symbol = fresh_java_symbol( - obj.type(), "tmp_format_obj", loc, function_id, symbol_table); - const symbol_exprt arg_i = object_symbol.symbol_expr(); - - allocate_objectst allocate_objects(ID_java, loc, function_id, symbol_table); - - code_blockt tmp; - allocate_objects.allocate_dynamic_object(tmp, arg_i, arg_i.type().subtype()); - allocate_objects.declare_created_symbols(code); - code.append(tmp); - - code.add(code_assignt(arg_i, obj), loc); - code.add(code_assumet(not_exprt(equal_exprt( - arg_i, null_pointer_exprt(to_pointer_type(arg_i.type()))))), - loc); - - code_blockt code_not_null; - - // Assigning all the fields of arg_i_struct - for (const auto &comp : structured_type.components()) { - const irep_idt &name = comp.get_name(); - exprt field_expr = field_exprs.front(); - field_exprs.pop_front(); - - if (name == "string_expr") { - const pointer_typet string_pointer = - java_reference_type(struct_tag_typet("java::java.lang.String")); - const typecast_exprt arg_i_as_string{arg_i, string_pointer}; - code_assign_java_string_to_string_expr(to_string_expr(field_expr), - arg_i_as_string, loc, symbol_table, - code_not_null); - } else if (name == ID_int || name == ID_float || name == ID_char || - name == ID_boolean) { - const auto value = get_primitive_value_of_object( - arg_i, name, loc, function_id, symbol_table, code_not_null); - if (value.has_value()) - code_not_null.add(code_assignt(field_expr, *value), loc); - else - code_not_null.add(code_assignt(field_expr, nil_exprt()), loc); - } else { - // TODO: date_time and hash_code not implemented - } - } - - code.add(code_not_null, loc); - return arg_i_struct; -} - -/// Used to provide code for the Java String.format function. -/// -/// TODO: date_time and hash code are not implemented, and we set a limit of -/// 10 arguments -/// \param type: type of the function call -/// \param loc: location in the program_invocation_name -/// \param function_id: function the generated code will be used in -/// \param symbol_table: symbol table -/// \return Code implementing the Java String.format function. Since the exact -/// class of the arguments is not known, we give as argument to the internal -// format function a structure containing the different possible types. -code_blockt java_string_library_preprocesst::make_string_format_code( - const java_method_typet &type, const source_locationt &loc, - const irep_idt &function_id, symbol_table_baset &symbol_table) { - PRECONDITION(type.parameters().size() == 2); - code_blockt code; - exprt::operandst args = process_parameters(type.parameters(), loc, - function_id, symbol_table, code); - INVARIANT(args.size() == 2, "String.format should have two arguments"); - - // The argument can be: - // a string, an integer, a floating point, a character, a boolean, - // an object of which we take the hash code, or a date/time - struct_typet structured_type({{"string_expr", refined_string_type}, - {ID_int, java_int_type()}, - {ID_float, java_float_type()}, - {ID_char, java_char_type()}, - {ID_boolean, java_boolean_type()}, - // TODO: hash_code not implemented for now - {"hashcode", java_int_type()}, - // TODO: date_time type not implemented for now - {"date_time", java_int_type()}}); - structured_type.set_tag(CPROVER_PREFIX "string_formatter_variant"); - - // We will process arguments so that each is converted to a `struct_exprt` - // containing each possible type used in format specifiers. - std::vector processed_args; - processed_args.push_back(args[0]); - for (std::size_t i = 0; i < MAX_FORMAT_ARGS; ++i) - processed_args.push_back(make_argument_for_format( - args[1], i, structured_type, loc, function_id, symbol_table, code)); - - const refined_string_exprt string_expr = string_expr_of_function( - ID_cprover_string_format_func, processed_args, loc, symbol_table, code); - const exprt java_string = allocate_fresh_string( - type.return_type(), loc, function_id, symbol_table, code); - code.add(code_assign_string_expr_to_java_string(java_string, string_expr, - symbol_table, true), - loc); - code.add(code_returnt(java_string), loc); - return code; -} - /// Used to provide our own implementation of the java.lang.Object.getClass() /// function. /// \param type: type of the function called diff --git a/jbmc/src/java_bytecode/java_string_library_preprocess.h b/jbmc/src/java_bytecode/java_string_library_preprocess.h index b463c06e924..19035070128 100644 --- a/jbmc/src/java_bytecode/java_string_library_preprocess.h +++ b/jbmc/src/java_bytecode/java_string_library_preprocess.h @@ -151,11 +151,6 @@ class java_string_library_preprocesst:public messaget const irep_idt &function_id, symbol_table_baset &symbol_table); - code_blockt make_string_format_code(const java_method_typet &type, - const source_locationt &loc, - const irep_idt &function_id, - symbol_table_baset &symbol_table); - code_blockt make_copy_string_code( const java_method_typet &type, const source_locationt &loc, @@ -312,20 +307,6 @@ class java_string_library_preprocesst:public messaget const java_method_typet &type, const source_locationt &loc, symbol_table_baset &symbol_table); - - struct_exprt make_argument_for_format(const exprt &argv, std::size_t index, - const struct_typet &structured_type, - const source_locationt &loc, - const irep_idt &function_id, - symbol_table_baset &symbol_table, - code_blockt &code); - - optionalt get_primitive_value_of_object( - const exprt &object, irep_idt type_name, const source_locationt &loc, - const irep_idt &function_id, symbol_table_baset &symbol_table, - code_blockt &code); - - dereference_exprt get_object_at_index(const exprt &argv, std::size_t index); }; exprt make_nondet_infinite_char_array( diff --git a/src/solvers/strings/string_constraint_generator_format.cpp b/src/solvers/strings/string_constraint_generator_format.cpp index 8c2b5d9588c..6c7239fc9bb 100644 --- a/src/solvers/strings/string_constraint_generator_format.cpp +++ b/src/solvers/strings/string_constraint_generator_format.cpp @@ -239,17 +239,6 @@ static std::vector parse_format_string(std::string s) return al; } -/// Helper for add_axioms_for_format_specifier -/// \param expr: a structured expression -/// \param component_name: name of the desired component -/// \return Expression in the component of `expr` named `component_name`. -static exprt get_component_in_struct(const struct_exprt &expr, - irep_idt component_name) { - const struct_typet &type = to_struct_type(expr.type()); - std::size_t number = type.component_number(component_name); - return expr.operands()[number]; -} - /// Parse `s` and add axioms ensuring the output corresponds to the output of /// String.format. Assumes the argument is a structured expression which /// contains the fields: string expr, int, float, char, boolean, hashcode, @@ -447,6 +436,9 @@ std::pair add_axioms_for_format( if(fe.is_format_specifier()) { const format_specifiert &fs = fe.get_format_specifier(); + std::function get_arg = + [](const irep_idt &) -> exprt { UNREACHABLE; }; + if( fs.conversion != format_specifiert::PERCENT_SIGN && fs.conversion != format_specifiert::LINE_SEPARATOR) @@ -470,36 +462,26 @@ std::pair add_axioms_for_format( arg = to_struct_expr(args[fs.index - 1]); } - std::function get_arg; - if(is_refined_string_type(arg.type())) - { - const array_string_exprt string_arg = - get_string_expr(array_pool, arg); - get_arg = [string_arg](const irep_idt &id) { - return format_arg_from_string(string_arg, id); - }; - } - else - { - INVARIANT( - arg.id() == ID_struct, - "format argument should be a string or a struct"); - get_arg = [&](const irep_idt &id) { - return get_component_in_struct(to_struct_expr(arg), id); - }; - } - auto result = add_axioms_for_format_specifier( - fresh_symbol, - fs, - get_arg, - index_type, - char_type, - array_pool, - message, - ns); - merge(constraints, std::move(result.second)); - intermediary_strings.push_back(result.first); + INVARIANT( + is_refined_string_type(arg.type()), + "arguments of format should be strings"); + const array_string_exprt string_arg = get_string_expr(array_pool, arg); + get_arg = [string_arg](const irep_idt &id) { + return format_arg_from_string(string_arg, id); + }; } + + auto result = add_axioms_for_format_specifier( + fresh_symbol, + fs, + get_arg, + index_type, + char_type, + array_pool, + message, + ns); + merge(constraints, std::move(result.second)); + intermediary_strings.push_back(result.first); } else { From 8fc7988d8828bafdf87563b4fca6474b6a1e630f Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 2 Apr 2019 14:43:18 +0100 Subject: [PATCH 09/10] Update add_axioms_for_format_specifier documentation Reflects the new way the argument are passed to the format builtin function. --- .../strings/string_constraint_generator_format.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/solvers/strings/string_constraint_generator_format.cpp b/src/solvers/strings/string_constraint_generator_format.cpp index 6c7239fc9bb..ae6da1a8969 100644 --- a/src/solvers/strings/string_constraint_generator_format.cpp +++ b/src/solvers/strings/string_constraint_generator_format.cpp @@ -240,10 +240,10 @@ static std::vector parse_format_string(std::string s) } /// Parse `s` and add axioms ensuring the output corresponds to the output of -/// String.format. Assumes the argument is a structured expression which -/// contains the fields: string expr, int, float, char, boolean, hashcode, -/// date_time. The correct component will be fetched depending on the format -/// specifier. +/// String.format. Assumes the argument is a string representing one of: +/// string expr, int, float, char, boolean, hashcode, date_time. +/// The correct type will be retrieved by calling get_arg with an id depending +/// on the format specifier. /// \param fresh_symbol: generator of fresh symbols /// \param fs: a format specifier /// \param get_arg: a function returning the possible value of the argument to From 7c34ea4258d32854669aa3dc94084ae37c93fd1e Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 2 Apr 2019 14:43:20 +0100 Subject: [PATCH 10/10] Handle the case of null arguments for format --- .../string_constraint_generator_format.cpp | 57 ++++++++++++++----- 1 file changed, 43 insertions(+), 14 deletions(-) diff --git a/src/solvers/strings/string_constraint_generator_format.cpp b/src/solvers/strings/string_constraint_generator_format.cpp index ae6da1a8969..d13092a3d59 100644 --- a/src/solvers/strings/string_constraint_generator_format.cpp +++ b/src/solvers/strings/string_constraint_generator_format.cpp @@ -239,6 +239,18 @@ static std::vector parse_format_string(std::string s) return al; } +/// Expression which is true when the string is equal to the literal "null" +static exprt is_null(const array_string_exprt &string) +{ + return and_exprt{ + equal_exprt{string.length(), from_integer(4, string.length().type())}, + and_exprt{ + equal_exprt{string[0], from_integer('n', string[0].type())}, + equal_exprt{string[1], from_integer('u', string[0].type())}, + equal_exprt{string[2], from_integer('l', string[0].type())}, + equal_exprt{string[3], from_integer('l', string[0].type())}}}; +} + /// Parse `s` and add axioms ensuring the output corresponds to the output of /// String.format. Assumes the argument is a string representing one of: /// string expr, int, float, char, boolean, hashcode, date_time. @@ -286,8 +298,30 @@ add_axioms_for_format_specifier( fresh_symbol, res, get_arg(ID_float), array_pool, ns); return {res, std::move(return_code.second)}; case format_specifiert::CHARACTER: - return_code = add_axioms_from_char(res, get_arg(ID_char)); - return {res, std::move(return_code.second)}; + { + exprt arg_string = get_arg(ID_char); + array_string_exprt &string_expr = to_array_string_expr(arg_string); + // In the case the arg is null, the result will be equal to "null" and + // and otherwise we just take the 4th character of the string. + const exprt is_null_literal = is_null(string_expr); + constraints.existential.push_back(equal_exprt{ + res.length(), + if_exprt{ + is_null_literal, + from_integer(4, index_type), + from_integer(1, index_type)}}); + constraints.existential.push_back(implies_exprt{ + is_null_literal, + and_exprt{ + equal_exprt{res[0], from_integer('n', char_type)}, + equal_exprt{res[1], from_integer('u', char_type)}, + equal_exprt{res[2], from_integer('l', char_type)}, + equal_exprt{res[3], from_integer('l', char_type)}}}); + constraints.existential.push_back(implies_exprt{ + not_exprt{is_null_literal}, + equal_exprt{res[0], typecast_exprt{string_expr[3], char_type}}}); + return {res, constraints}; + } case format_specifiert::BOOLEAN: return_code = add_axioms_from_bool(res, get_arg(ID_boolean)); return {res, std::move(return_code.second)}; @@ -378,23 +412,18 @@ static exprt format_arg_from_string( shl_exprt{typecast_exprt{string[2], type}, 16}, typecast_exprt{string[3], type}}}; } + if(id == ID_char) { - // We assume the string has length exactly 4 and ignore the first 3 - // (unsigned16)string.content[3] - const unsignedbv_typet type{16}; - return typecast_exprt{ - index_exprt{string.content(), from_integer(3, string.length().type())}, - type}; + // Leave the string unchanged as the "null" string is used to represent null + return string; } if(id == ID_boolean) { - // We assume the string has length exactly 4 and ignore the first 3 - // (bool)string.content[3] - const c_bool_typet type{8}; - return typecast_exprt{ - index_exprt{string.content(), from_integer(3, string.length().type())}, - type}; + // We assume the string has length exactly 4, if it is "null" return false + // and otherwise ignore the first 3 and return (bool)string.content[3] + return if_exprt{ + is_null(string), false_exprt{}, typecast_exprt{string[3], bool_typet()}}; } if(id == ID_float) {