diff --git a/jbmc/lib/java-models-library b/jbmc/lib/java-models-library index d92026cd8dd..287f0b7ef3c 160000 --- a/jbmc/lib/java-models-library +++ b/jbmc/lib/java-models-library @@ -1 +1 @@ -Subproject commit d92026cd8dd8f1e769f5b11831e831fefdc4c080 +Subproject commit 287f0b7ef3cba9b3779bccc70b6009e5e611d6fa diff --git a/jbmc/regression/jbmc-strings/StringFormat/Test.class b/jbmc/regression/jbmc-strings/StringFormat/Test.class new file mode 100644 index 00000000000..4620e0f976f Binary files /dev/null and b/jbmc/regression/jbmc-strings/StringFormat/Test.class differ diff --git a/jbmc/regression/jbmc-strings/StringFormat/Test.java b/jbmc/regression/jbmc-strings/StringFormat/Test.java new file mode 100644 index 00000000000..8dd33a0e98f --- /dev/null +++ b/jbmc/regression/jbmc-strings/StringFormat/Test.java @@ -0,0 +1,52 @@ +public class Test { + public static String testHex(int i) { + String u = String.format("di%xlue", i); + if (u.equals("diffblue")) + assert(false); + else if (u.startsWith("di")) + assert(false); + else + assert(false); + return u; + } + + public static String testInt(int i) { + String u = String.format("Hello %d !", i); + if (u.equals("Hello 10 !")) + assert(false); + else if (!u.startsWith("Hello")) + assert(false); + else + assert(false); + return u; + } + + public static String string1(String s) { + if (s == null) + return "null!"; + String u = String.format("Hello %s !", s); + if (u.equals("Hello world !")) + assert(false); + else if (u.startsWith("impossible!")) + assert(false); + else + assert(false); + return u; + } + + public static String string2(String s, String t) { + if (s == null || t == null) + return "null null"; + String u = String.format("%s %s", s, t); + assert(!u.equals("ab")); + return u; + } + + public static String string3(String s, String t) { + if (s == null || t == null) + return "null null"; + String u = String.format("%s%s%s", s, t, s); + assert(s.length() != 1 || u.charAt(0) == u.charAt(u.length() - 1)); + return u; + } +} diff --git a/jbmc/regression/jbmc-strings/StringFormat/test-hex1.desc b/jbmc/regression/jbmc-strings/StringFormat/test-hex1.desc new file mode 100644 index 00000000000..c7b7eaa5ab7 --- /dev/null +++ b/jbmc/regression/jbmc-strings/StringFormat/test-hex1.desc @@ -0,0 +1,6 @@ +CORE +Test.class +--function Test.testHex --cp `../../../../scripts/format_classpath.sh . ../../../lib/java-models-library/target/core-models.jar` --property "java::Test.testHex:(I)Ljava/lang/String;.assertion.1" +^EXIT=10$ +^SIGNAL=0$ +line 5 assertion at file Test.java .*: FAILURE diff --git a/jbmc/regression/jbmc-strings/StringFormat/test-hex2.desc b/jbmc/regression/jbmc-strings/StringFormat/test-hex2.desc new file mode 100644 index 00000000000..6f2fcde2b15 --- /dev/null +++ b/jbmc/regression/jbmc-strings/StringFormat/test-hex2.desc @@ -0,0 +1,6 @@ +CORE +Test.class +--function Test.testHex --cp `../../../../scripts/format_classpath.sh . ../../../lib/java-models-library/target/core-models.jar` --property "java::Test.testHex:(I)Ljava/lang/String;.assertion.2" +^EXIT=10$ +^SIGNAL=0$ +line 7 assertion at file Test.java .*: FAILURE diff --git a/jbmc/regression/jbmc-strings/StringFormat/test-hex3.desc b/jbmc/regression/jbmc-strings/StringFormat/test-hex3.desc new file mode 100644 index 00000000000..bb03f1ef3e3 --- /dev/null +++ b/jbmc/regression/jbmc-strings/StringFormat/test-hex3.desc @@ -0,0 +1,6 @@ +CORE symex-driven-lazy-loading-expected-failure +Test.class +--function Test.testHex --cp `../../../../scripts/format_classpath.sh . ../../../lib/java-models-library/target/core-models.jar` --property "java::Test.testHex:(I)Ljava/lang/String;.assertion.3" +^EXIT=0$ +^SIGNAL=0$ +line 9 assertion at file Test.java .*: SUCCESS diff --git a/jbmc/regression/jbmc-strings/StringFormat/test-int1.desc b/jbmc/regression/jbmc-strings/StringFormat/test-int1.desc new file mode 100644 index 00000000000..a537361564f --- /dev/null +++ b/jbmc/regression/jbmc-strings/StringFormat/test-int1.desc @@ -0,0 +1,6 @@ +CORE +Test.class +--function Test.testInt --cp `../../../../scripts/format_classpath.sh . ../../../lib/java-models-library/target/core-models.jar` --property "java::Test.testInt:(I)Ljava/lang/String;.assertion.1" +^EXIT=10$ +^SIGNAL=0$ +line 16 assertion at file Test.java .*: FAILURE diff --git a/jbmc/regression/jbmc-strings/StringFormat/test-int2.desc b/jbmc/regression/jbmc-strings/StringFormat/test-int2.desc new file mode 100644 index 00000000000..b789c1bdd4c --- /dev/null +++ b/jbmc/regression/jbmc-strings/StringFormat/test-int2.desc @@ -0,0 +1,6 @@ +CORE symex-driven-lazy-loading-expected-failure +Test.class +--function Test.testInt --cp `../../../../scripts/format_classpath.sh . ../../../lib/java-models-library/target/core-models.jar` --property "java::Test.testInt:(I)Ljava/lang/String;.assertion.2" +^EXIT=0$ +^SIGNAL=0$ +line 18 assertion at file Test.java .*: SUCCESS diff --git a/jbmc/regression/jbmc-strings/StringFormat/test-int3.desc b/jbmc/regression/jbmc-strings/StringFormat/test-int3.desc new file mode 100644 index 00000000000..804c87f7a1a --- /dev/null +++ b/jbmc/regression/jbmc-strings/StringFormat/test-int3.desc @@ -0,0 +1,6 @@ +CORE +Test.class +--function Test.testInt --cp `../../../../scripts/format_classpath.sh . ../../../lib/java-models-library/target/core-models.jar` --property "java::Test.testInt:(I)Ljava/lang/String;.assertion.3" +^EXIT=10$ +^SIGNAL=0$ +line 20 assertion at file Test.java .*: FAILURE diff --git a/jbmc/regression/jbmc-strings/StringFormat/test-string1.desc b/jbmc/regression/jbmc-strings/StringFormat/test-string1.desc new file mode 100644 index 00000000000..89e02c5988d --- /dev/null +++ b/jbmc/regression/jbmc-strings/StringFormat/test-string1.desc @@ -0,0 +1,8 @@ +CORE +Test.class +--function Test.string1 --cp `../../../../scripts/format_classpath.sh . ../../../lib/java-models-library/target/core-models.jar` +^EXIT=10$ +^SIGNAL=0$ +line 29 assertion at file Test.java .*: FAILURE +line 31 assertion at file Test.java .*: SUCCESS +line 33 assertion at file Test.java .*: FAILURE diff --git a/jbmc/regression/jbmc-strings/StringFormat/test-string2.desc b/jbmc/regression/jbmc-strings/StringFormat/test-string2.desc new file mode 100644 index 00000000000..0c16d19f565 --- /dev/null +++ b/jbmc/regression/jbmc-strings/StringFormat/test-string2.desc @@ -0,0 +1,6 @@ +CORE +Test.class +--function Test.string2 --cp `../../../../scripts/format_classpath.sh . ../../../lib/java-models-library/target/core-models.jar` --max-nondet-string-length 20 --property "java::Test.string2:(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;.assertion.1" +^EXIT=0$ +^SIGNAL=0$ +line 41 assertion at file Test.java .*: SUCCESS diff --git a/jbmc/regression/jbmc-strings/StringFormat/test-string3.desc b/jbmc/regression/jbmc-strings/StringFormat/test-string3.desc new file mode 100644 index 00000000000..7503c75a23b --- /dev/null +++ b/jbmc/regression/jbmc-strings/StringFormat/test-string3.desc @@ -0,0 +1,6 @@ +CORE +Test.class +--function Test.string3 --cp `../../../../scripts/format_classpath.sh . ../../../lib/java-models-library/target/core-models.jar` --max-nondet-string-length 5 --property "java::Test.string3:(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;.assertion.1" +^EXIT=0$ +^SIGNAL=0$ +line 49 assertion at file Test.java .*: SUCCESS diff --git a/jbmc/src/java_bytecode/java_string_library_preprocess.cpp b/jbmc/src/java_bytecode/java_string_library_preprocess.cpp index 34e27ba0333..98867804712 100644 --- a/jbmc/src/java_bytecode/java_string_library_preprocess.cpp +++ b/jbmc/src/java_bytecode/java_string_library_preprocess.cpp @@ -1101,325 +1101,6 @@ code_blockt java_string_library_preprocesst::make_assign_function_from_call( function_id, type, loc, symbol_table, false); } -/// Adds to the code an assignment of the form -/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/// type_name tmp_type_name -/// tmp_type_name = ((Classname*)arg_i)->value -/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/// and returns `tmp_typename`. -/// In case the class corresponding to `type_name` is not available in -/// `symbol_table`, the variable is declared but not assigned. -/// Used to access the values of the arguments of `String.format`. -/// \param object: an expression representing a reference to an object -/// \param type_name: name of the corresponding primitive type, this can be -/// one of the following: ID_boolean, ID_char, ID_byte, ID_short, ID_int, -/// ID_long, ID_float, ID_double, ID_void -/// \param loc: a location in the source -/// \param function_id: the name of the function -/// \param symbol_table: the symbol table -/// \param code: code block to which we are adding some assignments -/// \return An expression containing a symbol `tmp_type_name` where `type_name` -/// is the given argument (ie. boolean, char etc.). Which represents the -/// primitive value contained in the given object. -optionalt -java_string_library_preprocesst::get_primitive_value_of_object( - const exprt &object, - irep_idt type_name, - const source_locationt &loc, - const irep_idt &function_id, - symbol_table_baset &symbol_table, - code_blockt &code) -{ - optionalt object_type; - - typet value_type; - if(type_name==ID_boolean) - { - value_type=java_boolean_type(); - object_type = struct_tag_typet("java::java.lang.Boolean"); - } - else if(type_name==ID_char) - { - value_type=java_char_type(); - object_type = struct_tag_typet("java::java.lang.Character"); - } - else if(type_name==ID_byte) - { - value_type=java_byte_type(); - object_type = struct_tag_typet("java::java.lang.Byte"); - } - else if(type_name==ID_short) - { - value_type=java_short_type(); - object_type = struct_tag_typet("java::java.lang.Short"); - } - else if(type_name==ID_int) - { - value_type=java_int_type(); - object_type = struct_tag_typet("java::java.lang.Integer"); - } - else if(type_name==ID_long) - { - value_type=java_long_type(); - object_type = struct_tag_typet("java::java.lang.Long"); - } - else if(type_name==ID_float) - { - value_type=java_float_type(); - object_type = struct_tag_typet("java::java.lang.Float"); - } - else if(type_name==ID_double) - { - value_type=java_double_type(); - object_type = struct_tag_typet("java::java.lang.Double"); - } - else if(type_name==ID_void) - return {}; - else - UNREACHABLE; - - DATA_INVARIANT(object_type.has_value(), "must have symbol for primitive"); - - // declare tmp_type_name to hold the value - const std::string aux_name = "tmp_" + id2string(type_name); - const symbolt &symbol = - fresh_java_symbol(value_type, aux_name, loc, function_id, symbol_table); - const auto value = symbol.symbol_expr(); - - // Check that the type of the object is in the symbol table, - // otherwise there is no safe way of finding its value. - if( - const auto maybe_symbol = - symbol_table.lookup(object_type->get_identifier())) - { - const struct_typet &struct_type = to_struct_type(maybe_symbol->type); - // Check that the type has a value field - const struct_union_typet::componentt &value_comp = - struct_type.get_component("value"); - if(!value_comp.is_nil()) - { - const pointer_typet struct_pointer_type = pointer_type(struct_type); - dereference_exprt deref{typecast_exprt(object, struct_pointer_type), - struct_pointer_type.subtype()}; - const member_exprt deref_value{ - std::move(deref), value_comp.get_name(), value_comp.type()}; - code.add(code_assignt(value, deref_value), loc); - return value; - } - } - - warning() << object_type->get_identifier() - << " not available to format function" << eom; - code.add(code_declt(value), loc); - return value; -} - -/// Helper for format function. Returns the expression: -/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/// *((void**)(argv->data)+index ) -/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/// which corresponds to the object at position `index` in `argv`. -/// \param argv: reference to an array of references -/// \param index: index of the desired object -/// \return An expression representing the object at position `index` of `argv`. -dereference_exprt java_string_library_preprocesst::get_object_at_index( - const exprt &argv, - std::size_t index) -{ - const dereference_exprt deref_objs{argv, argv.type().subtype()}; - const pointer_typet empty_pointer = pointer_type(java_void_type()); - const pointer_typet pointer_of_pointer = pointer_type(empty_pointer); - const member_exprt data_member{deref_objs, "data", pointer_of_pointer}; - const plus_exprt data_pointer_plus_index{ - data_member, from_integer(index, java_int_type()), data_member.type()}; - return dereference_exprt{data_pointer_plus_index, - data_pointer_plus_index.type().subtype()}; -} - -/// Helper for format function. Adds code: -/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/// string_expr arg_i_string_expr; -/// int tmp_int; -/// float tmp_float; -/// char tmp_char; -/// boolean tmp_boolean; -/// Object* arg_i=get_object_at_index(argv, index) -/// if(arg_i.@class_identifier=="java::java.lang.String") -/// { -/// arg_i_string_expr = (string_expr)((String*)arg_i_as_string) -/// } -/// tmp_int=((Integer)arg_i)->value -/// tmp_float=((Float)arg_i)->value -/// tmp_char=((Char)arg_i)->value -/// tmp_boolean=((Boolean)arg_i)->value -/// arg_i_struct = { string_expr=arg_i_string_expr; -/// integer=tmp_int; float=tmp_float; char=tmp_char; boolean=tmp_boolean} -/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/// and returns `arg_i_struct`. -/// -/// TODO: date_time and hash code are not implemented -/// \param argv: reference to an array of references -/// \param index: index of the desired argument -/// \param structured_type: type for arguments of the internal format function -/// \param loc: a location in the source -/// \param function_id: function the generated expression will be used in -/// \param symbol_table: the symbol table -/// \param code: code block to which we are adding some assignments -/// \return An expression of type `structured_type` representing the possible -/// values of the argument at position `index` of `argv`. -struct_exprt java_string_library_preprocesst::make_argument_for_format( - const exprt &argv, - std::size_t index, - const struct_typet &structured_type, - const source_locationt &loc, - const irep_idt &function_id, - symbol_table_baset &symbol_table, - code_blockt &code) -{ - // Declarations of the fields of arg_i_struct - // arg_i_struct is { arg_i_string_expr, tmp_int, tmp_char, ... } - struct_exprt arg_i_struct({}, structured_type); - std::list field_exprs; - for(const auto & comp : structured_type.components()) - { - const irep_idt &name=comp.get_name(); - const typet &type=comp.type(); - exprt field_expr; - if(name!="string_expr") - { - std::string tmp_name="tmp_"+id2string(name); - const symbolt &field_symbol = - fresh_java_symbol(type, tmp_name, loc, function_id, symbol_table); - auto field_symbol_expr = field_symbol.symbol_expr(); - field_expr = field_symbol_expr; - code.add(code_declt(field_symbol_expr), loc); - } - else - field_expr = - make_nondet_string_expr(loc, function_id, symbol_table, code); - - field_exprs.push_back(field_expr); - arg_i_struct.copy_to_operands(field_expr); - } - - // arg_i = argv[index] - const exprt obj = get_object_at_index(argv, index); - const symbolt &object_symbol = fresh_java_symbol( - obj.type(), "tmp_format_obj", loc, function_id, symbol_table); - const symbol_exprt arg_i = object_symbol.symbol_expr(); - - allocate_objectst allocate_objects(ID_java, loc, function_id, symbol_table); - - code_blockt tmp; - allocate_objects.allocate_dynamic_object(tmp, arg_i, arg_i.type().subtype()); - allocate_objects.declare_created_symbols(code); - code.append(tmp); - - code.add(code_assignt(arg_i, obj), loc); - code.add( - code_assumet( - not_exprt( - equal_exprt(arg_i, null_pointer_exprt(to_pointer_type(arg_i.type()))))), - loc); - - code_blockt code_not_null; - - // Assigning all the fields of arg_i_struct - for(const auto &comp : structured_type.components()) - { - const irep_idt &name=comp.get_name(); - exprt field_expr=field_exprs.front(); - field_exprs.pop_front(); - - if(name=="string_expr") - { - const pointer_typet string_pointer = - java_reference_type(struct_tag_typet("java::java.lang.String")); - const typecast_exprt arg_i_as_string{arg_i, string_pointer}; - code_assign_java_string_to_string_expr( - to_string_expr(field_expr), - arg_i_as_string, - loc, - symbol_table, - code_not_null); - } - else if(name==ID_int || name==ID_float || name==ID_char || name==ID_boolean) - { - const auto value = get_primitive_value_of_object( - arg_i, name, loc, function_id, symbol_table, code_not_null); - if(value.has_value()) - code_not_null.add(code_assignt(field_expr, *value), loc); - else - code_not_null.add(code_assignt(field_expr, nil_exprt()), loc); - } - else - { - // TODO: date_time and hash_code not implemented - } - } - - code.add(code_not_null, loc); - return arg_i_struct; -} - -/// Used to provide code for the Java String.format function. -/// -/// TODO: date_time and hash code are not implemented, and we set a limit of -/// 10 arguments -/// \param type: type of the function call -/// \param loc: location in the program_invocation_name -/// \param function_id: function the generated code will be used in -/// \param symbol_table: symbol table -/// \return Code implementing the Java String.format function. Since the exact -/// class of the arguments is not known, we give as argument to the internal -// format function a structure containing the different possible types. -code_blockt java_string_library_preprocesst::make_string_format_code( - const java_method_typet &type, - const source_locationt &loc, - const irep_idt &function_id, - symbol_table_baset &symbol_table) -{ - PRECONDITION(type.parameters().size()==2); - code_blockt code; - exprt::operandst args = - process_parameters(type.parameters(), loc, function_id, symbol_table, code); - INVARIANT(args.size()==2, "String.format should have two arguments"); - - // The argument can be: - // a string, an integer, a floating point, a character, a boolean, - // an object of which we take the hash code, or a date/time - struct_typet structured_type({{"string_expr", refined_string_type}, - {ID_int, java_int_type()}, - {ID_float, java_float_type()}, - {ID_char, java_char_type()}, - {ID_boolean, java_boolean_type()}, - // TODO: hash_code not implemented for now - {"hashcode", java_int_type()}, - // TODO: date_time type not implemented for now - {"date_time", java_int_type()}}); - structured_type.set_tag(CPROVER_PREFIX "string_formatter_variant"); - - // We will process arguments so that each is converted to a `struct_exprt` - // containing each possible type used in format specifiers. - std::vector processed_args; - processed_args.push_back(args[0]); - for(std::size_t i=0; i get_primitive_value_of_object( - const exprt &object, - irep_idt type_name, - const source_locationt &loc, - const irep_idt &function_id, - symbol_table_baset &symbol_table, - code_blockt &code); - - dereference_exprt get_object_at_index(const exprt &argv, std::size_t index); }; exprt make_nondet_infinite_char_array( diff --git a/src/solvers/strings/string_constraint_generator_format.cpp b/src/solvers/strings/string_constraint_generator_format.cpp index 6e96d974074..d13092a3d59 100644 --- a/src/solvers/strings/string_constraint_generator_format.cpp +++ b/src/solvers/strings/string_constraint_generator_format.cpp @@ -239,26 +239,27 @@ static std::vector parse_format_string(std::string s) return al; } -/// Helper for add_axioms_for_format_specifier -/// \param expr: a structured expression -/// \param component_name: name of the desired component -/// \return Expression in the component of `expr` named `component_name`. -static exprt -get_component_in_struct(const struct_exprt &expr, irep_idt component_name) +/// Expression which is true when the string is equal to the literal "null" +static exprt is_null(const array_string_exprt &string) { - const struct_typet &type = to_struct_type(expr.type()); - std::size_t number = type.component_number(component_name); - return expr.operands()[number]; + return and_exprt{ + equal_exprt{string.length(), from_integer(4, string.length().type())}, + and_exprt{ + equal_exprt{string[0], from_integer('n', string[0].type())}, + equal_exprt{string[1], from_integer('u', string[0].type())}, + equal_exprt{string[2], from_integer('l', string[0].type())}, + equal_exprt{string[3], from_integer('l', string[0].type())}}}; } /// Parse `s` and add axioms ensuring the output corresponds to the output of -/// String.format. Assumes the argument is a structured expression which -/// contains the fields: string expr, int, float, char, boolean, hashcode, -/// date_time. The correct component will be fetched depending on the format -/// specifier. +/// String.format. Assumes the argument is a string representing one of: +/// string expr, int, float, char, boolean, hashcode, date_time. +/// The correct type will be retrieved by calling get_arg with an id depending +/// on the format specifier. /// \param fresh_symbol: generator of fresh symbols /// \param fs: a format specifier -/// \param arg: a struct containing the possible value of the argument to format +/// \param get_arg: a function returning the possible value of the argument to +/// format given their type. /// \param index_type: type for indexes in strings /// \param char_type: type of characters /// \param array_pool: pool of arrays representing strings @@ -269,7 +270,7 @@ static std::pair add_axioms_for_format_specifier( symbol_generatort &fresh_symbol, const format_specifiert &fs, - const struct_exprt &arg, + const std::function &get_arg, const typet &index_type, const typet &char_type, array_poolt &array_pool, @@ -282,46 +283,56 @@ add_axioms_for_format_specifier( switch(fs.conversion) { case format_specifiert::DECIMAL_INTEGER: - return_code = add_axioms_for_string_of_int( - res, get_component_in_struct(arg, ID_int), 0, ns); + return_code = add_axioms_for_string_of_int(res, get_arg(ID_int), 0, ns); return {res, std::move(return_code.second)}; case format_specifiert::HEXADECIMAL_INTEGER: - return_code = - add_axioms_from_int_hex(res, get_component_in_struct(arg, ID_int)); + return_code = add_axioms_for_string_of_int_with_radix( + res, get_arg(ID_int), from_integer(16, index_type), 16, ns); return {res, std::move(return_code.second)}; case format_specifiert::SCIENTIFIC: return_code = add_axioms_from_float_scientific_notation( - fresh_symbol, - res, - get_component_in_struct(arg, ID_float), - array_pool, - ns); + fresh_symbol, res, get_arg(ID_float), array_pool, ns); return {res, std::move(return_code.second)}; case format_specifiert::DECIMAL_FLOAT: return_code = add_axioms_for_string_of_float( - fresh_symbol, - res, - get_component_in_struct(arg, ID_float), - array_pool, - ns); + fresh_symbol, res, get_arg(ID_float), array_pool, ns); return {res, std::move(return_code.second)}; case format_specifiert::CHARACTER: - return_code = - add_axioms_from_char(res, get_component_in_struct(arg, ID_char)); - return {res, std::move(return_code.second)}; + { + exprt arg_string = get_arg(ID_char); + array_string_exprt &string_expr = to_array_string_expr(arg_string); + // In the case the arg is null, the result will be equal to "null" and + // and otherwise we just take the 4th character of the string. + const exprt is_null_literal = is_null(string_expr); + constraints.existential.push_back(equal_exprt{ + res.length(), + if_exprt{ + is_null_literal, + from_integer(4, index_type), + from_integer(1, index_type)}}); + constraints.existential.push_back(implies_exprt{ + is_null_literal, + and_exprt{ + equal_exprt{res[0], from_integer('n', char_type)}, + equal_exprt{res[1], from_integer('u', char_type)}, + equal_exprt{res[2], from_integer('l', char_type)}, + equal_exprt{res[3], from_integer('l', char_type)}}}); + constraints.existential.push_back(implies_exprt{ + not_exprt{is_null_literal}, + equal_exprt{res[0], typecast_exprt{string_expr[3], char_type}}}); + return {res, constraints}; + } case format_specifiert::BOOLEAN: - return_code = - add_axioms_from_bool(res, get_component_in_struct(arg, ID_boolean)); + return_code = add_axioms_from_bool(res, get_arg(ID_boolean)); return {res, std::move(return_code.second)}; case format_specifiert::STRING: { - auto string_expr = - get_string_expr(array_pool, get_component_in_struct(arg, "string_expr")); + const exprt arg_string = get_arg("string_expr"); + const array_string_exprt string_expr = to_array_string_expr(arg_string); return {std::move(string_expr), {}}; } case format_specifiert::HASHCODE: - return_code = add_axioms_for_string_of_int( - res, get_component_in_struct(arg, "hashcode"), 0, ns); + return_code = add_axioms_for_string_of_int(res, get_arg("hashcode"), 0, ns); return {res, std::move(return_code.second)}; case format_specifiert::LINE_SEPARATOR: // TODO: the constant should depend on the system: System.lineSeparator() @@ -344,7 +355,7 @@ add_axioms_for_format_specifier( auto format_specifier_result = add_axioms_for_format_specifier( fresh_symbol, fs_lower, - arg, + get_arg, index_type, char_type, array_pool, @@ -379,6 +390,50 @@ add_axioms_for_format_specifier( } } +/// Deserialize an argument for format from \p string. +/// \p id should be one of: string_expr, int, char, boolean, float. +/// The primitive values are expected to all be encoded using 4 characters. +static exprt format_arg_from_string( + const array_string_exprt &string, const irep_idt &id) +{ + if(id == "string_expr") + return string; + if(id == ID_int) + { + // Assume the string has length 4 + // (int64)string.content[0] << 48 | (int64) string.content[1] << 32 | + // (int64)string.content[2] << 16 | (int64) string.content[3] + const signedbv_typet type{64}; + return bitor_exprt{ + bitor_exprt{ + shl_exprt{typecast_exprt{string[0], type}, 48}, + shl_exprt{typecast_exprt{string[1], type}, 32}}, + bitor_exprt{ + shl_exprt{typecast_exprt{string[2], type}, 16}, + typecast_exprt{string[3], type}}}; + } + + if(id == ID_char) + { + // Leave the string unchanged as the "null" string is used to represent null + return string; + } + if(id == ID_boolean) + { + // We assume the string has length exactly 4, if it is "null" return false + // and otherwise ignore the first 3 and return (bool)string.content[3] + return if_exprt{ + is_null(string), false_exprt{}, typecast_exprt{string[3], bool_typet()}}; + } + if(id == ID_float) + { + // Deserialize an int and cast to float + const exprt as_int = format_arg_from_string(string, ID_int); + return typecast_exprt{as_int, floatbv_typet{}}; + } + UNHANDLED_CASE; +} + /// Parse `s` and add axioms ensuring the output corresponds to the output of /// String.format. /// \param fresh_symbol: generator of fresh symbols @@ -410,6 +465,9 @@ std::pair add_axioms_for_format( if(fe.is_format_specifier()) { const format_specifiert &fs = fe.get_format_specifier(); + std::function get_arg = + [](const irep_idt &) -> exprt { UNREACHABLE; }; + if( fs.conversion != format_specifiert::PERCENT_SIGN && fs.conversion != format_specifiert::LINE_SEPARATOR) @@ -420,7 +478,7 @@ std::pair add_axioms_for_format( { INVARIANT( arg_count < args.size(), "number of format must match specifiers"); - arg = to_struct_expr(args[arg_count++]); + arg = args[arg_count++]; } else { @@ -433,18 +491,26 @@ std::pair add_axioms_for_format( arg = to_struct_expr(args[fs.index - 1]); } - auto result = add_axioms_for_format_specifier( - fresh_symbol, - fs, - to_struct_expr(arg), - index_type, - char_type, - array_pool, - message, - ns); - merge(constraints, std::move(result.second)); - intermediary_strings.push_back(result.first); + INVARIANT( + is_refined_string_type(arg.type()), + "arguments of format should be strings"); + const array_string_exprt string_arg = get_string_expr(array_pool, arg); + get_arg = [string_arg](const irep_idt &id) { + return format_arg_from_string(string_arg, id); + }; } + + auto result = add_axioms_for_format_specifier( + fresh_symbol, + fs, + get_arg, + index_type, + char_type, + array_pool, + message, + ns); + merge(constraints, std::move(result.second)); + intermediary_strings.push_back(result.first); } else {