diff --git a/jbmc/regression/jbmc/array-cell-sensitivity-static-fields/Test.class b/jbmc/regression/jbmc/array-cell-sensitivity-static-fields/Test.class new file mode 100644 index 00000000000..a41151d6a03 Binary files /dev/null and b/jbmc/regression/jbmc/array-cell-sensitivity-static-fields/Test.class differ diff --git a/jbmc/regression/jbmc/array-cell-sensitivity-static-fields/Test.java b/jbmc/regression/jbmc/array-cell-sensitivity-static-fields/Test.java new file mode 100644 index 00000000000..0d4ff9303aa --- /dev/null +++ b/jbmc/regression/jbmc/array-cell-sensitivity-static-fields/Test.java @@ -0,0 +1,36 @@ +class Util { + public static char[] chars(int count) { + char result[] = new char[count]; + for (int i = 0; i < count; ++i) { + result[i] = (char)((int)'a' + i); + } + return result; + } +} + +public class Test { + // static field which is initialized to a non-empty value + public static char[] staticCharArray = Util.chars(26); + // reference to the same array + public static char[] staticCharArrayRef = staticCharArray; + + public static char[] charArray() { + staticCharArray[0] = staticCharArray[3]; + staticCharArray[1] = 'e'; + assert staticCharArray[0] == 'd'; + assert staticCharArray[0] == 'A'; + return staticCharArray; + } + + public static char[] charArrayRef() { + staticCharArrayRef[0] = staticCharArray[3]; + staticCharArray[1] = 'e'; + if (staticCharArray[0] == 'A') { + assert false; + } else if (staticCharArray[0] == 'd') { + assert false; + } + + return staticCharArray; + } +} diff --git a/jbmc/regression/jbmc/array-cell-sensitivity-static-fields/Util.class b/jbmc/regression/jbmc/array-cell-sensitivity-static-fields/Util.class new file mode 100644 index 00000000000..4eeec110121 Binary files /dev/null and b/jbmc/regression/jbmc/array-cell-sensitivity-static-fields/Util.class differ diff --git a/jbmc/regression/jbmc/array-cell-sensitivity-static-fields/static-values.json b/jbmc/regression/jbmc/array-cell-sensitivity-static-fields/static-values.json new file mode 100644 index 00000000000..9607ce108af --- /dev/null +++ b/jbmc/regression/jbmc/array-cell-sensitivity-static-fields/static-values.json @@ -0,0 +1,17 @@ +{ + "@type":"com.diffblue.retriever.StaticFieldMap", + "Test":{ + "@type":"com.diffblue.retriever.StaticFieldMap", + "staticCharArrayRef":{ + "@type":"[C", + "@ref": "id1" + }, + "staticCharArray":{ + "@type":"[C", + "@id": "id1", + "@items":[ + "abcAefghijklmnopqrstuvwxyz" + ] + } + } +} diff --git a/jbmc/regression/jbmc/array-cell-sensitivity-static-fields/test-char-array-ref-static-values.desc b/jbmc/regression/jbmc/array-cell-sensitivity-static-fields/test-char-array-ref-static-values.desc new file mode 100644 index 00000000000..a7bb9e69ada --- /dev/null +++ b/jbmc/regression/jbmc/array-cell-sensitivity-static-fields/test-char-array-ref-static-values.desc @@ -0,0 +1,13 @@ +CORE symex-driven-lazy-loading-expected-failure +Test.class +--function Test.charArrayRef --property "java::Test.charArrayRef:()[C.assertion.2" --static-values static-values.json +Generated 0 VCC[(]s[)], 0 remaining after simplification +assertion at file Test.java line 31 .*: SUCCESS +^EXIT=0$ +^SIGNAL=0$ +-- +-- +Not that the json file has been modified on purpose so that this checks +that it is actually loaded and not retrieved by execution of the Util function. +This test does not work with symex-driven-lazy-loading because this option is +not compatible with --property. diff --git a/jbmc/regression/jbmc/array-cell-sensitivity-static-fields/test-char-array-ref.desc b/jbmc/regression/jbmc/array-cell-sensitivity-static-fields/test-char-array-ref.desc new file mode 100644 index 00000000000..b1a08a7af10 --- /dev/null +++ b/jbmc/regression/jbmc/array-cell-sensitivity-static-fields/test-char-array-ref.desc @@ -0,0 +1,13 @@ +CORE symex-driven-lazy-loading-expected-failure +Test.class +--function Test.charArrayRef --property "java::Test.charArrayRef:()[C.assertion.1" +Generated 0 VCC[(]s[)], 0 remaining after simplification +assertion at file Test.java line 29 .*: SUCCESS +^EXIT=0$ +^SIGNAL=0$ +-- +-- +Note that the json file has been modified on purpose so that this checks +that it is actually loaded and not retrieved by execution of the Util function. +This test does not work with symex-driven-lazy-loading because this option is +not compatible with --property. diff --git a/jbmc/regression/jbmc/array-cell-sensitivity-static-fields/test-char-array-static-values.desc b/jbmc/regression/jbmc/array-cell-sensitivity-static-fields/test-char-array-static-values.desc new file mode 100644 index 00000000000..d00d568de89 --- /dev/null +++ b/jbmc/regression/jbmc/array-cell-sensitivity-static-fields/test-char-array-static-values.desc @@ -0,0 +1,13 @@ +CORE symex-driven-lazy-loading-expected-failure +Test.class +--function Test.charArray --property "java::Test.charArray:()[C.assertion.2" --static-values static-values.json +Generated 0 VCC[(]s[)], 0 remaining after simplification +assertion at file Test.java line 21 .*: SUCCESS +^EXIT=0$ +^SIGNAL=0$ +-- +-- +Note that the json file has been modified on purpose so that this checks +that it is actually loaded and not retrieved by execution of the Util function. +This test does not work with symex-driven-lazy-loading because this option is +not compatible with --property. diff --git a/jbmc/regression/jbmc/array-cell-sensitivity-static-fields/test-char-array1.desc b/jbmc/regression/jbmc/array-cell-sensitivity-static-fields/test-char-array1.desc new file mode 100644 index 00000000000..e515604e4ab --- /dev/null +++ b/jbmc/regression/jbmc/array-cell-sensitivity-static-fields/test-char-array1.desc @@ -0,0 +1,12 @@ +CORE symex-driven-lazy-loading-expected-failure +Test.class +--function Test.charArray --property "java::Test.charArray:()[C.assertion.1" +Generated 0 VCC[(]s[)], 0 remaining after simplification +assertion at file Test.java line 20 .*: SUCCESS +^EXIT=0$ +^SIGNAL=0$ +-- +-- +Checks that constant propagation happens through static arrays. +This test does not work with symex-driven-lazy-loading because this option is +not compatible with --property. diff --git a/jbmc/src/java_bytecode/Makefile b/jbmc/src/java_bytecode/Makefile index 1e99b02d42f..7a8a4bfb670 100644 --- a/jbmc/src/java_bytecode/Makefile +++ b/jbmc/src/java_bytecode/Makefile @@ -3,6 +3,7 @@ SRC = assignments_from_json.cpp \ character_refine_preprocess.cpp \ ci_lazy_methods.cpp \ ci_lazy_methods_needed.cpp \ + code_with_references.cpp \ convert_java_nondet.cpp \ create_array_with_type_intrinsic.cpp \ expr2java.cpp \ diff --git a/jbmc/src/java_bytecode/assignments_from_json.cpp b/jbmc/src/java_bytecode/assignments_from_json.cpp index e0440f6ab17..1ebe0f7b032 100644 --- a/jbmc/src/java_bytecode/assignments_from_json.cpp +++ b/jbmc/src/java_bytecode/assignments_from_json.cpp @@ -9,6 +9,7 @@ Author: Diffblue Ltd. #include "assignments_from_json.h" #include "ci_lazy_methods_needed.h" +#include "code_with_references.h" #include "java_static_initializers.h" #include "java_string_library_preprocess.h" #include "java_string_literals.h" @@ -27,9 +28,6 @@ Author: Diffblue Ltd. /// ones that are not marked `const` may be mutated. struct object_creation_infot { - /// Code block to append all new code to for the deterministic assignments. - code_blockt █ - /// Handles allocation of new symbols, adds them to its symbol table (which /// will usually be the same as the `symbol_table` of this struct) and keeps /// track of them so declarations for them can be added by the caller before @@ -326,7 +324,7 @@ static optionalt element_type_from_array_type( return {}; } -void assign_from_json_rec( +code_with_references_listt assign_from_json_rec( const exprt &expr, const jsont &json, const optionalt &type_from_array, @@ -335,60 +333,57 @@ void assign_from_json_rec( /// One of the base cases (primitive case) of the recursion. /// For characters, the encoding in \p json is assumed to be UTF-8. /// See \ref assign_from_json_rec. -static void assign_primitive_from_json( - const exprt &expr, - const jsont &json, - code_blockt &block) +static code_with_references_listt +assign_primitive_from_json(const exprt &expr, const jsont &json) { + code_with_references_listt result; if(json.is_null()) // field is not mentioned in json, leave as default value - return; + return result; if(expr.type() == java_boolean_type()) { - if(json.is_true()) - block.add(code_assignt{expr, true_exprt{}}); - else - block.add(code_assignt{expr, false_exprt{}}); + result.add(code_assignt{ + expr, json.is_true() ? (exprt)true_exprt{} : (exprt)false_exprt{}}); } else if( expr.type() == java_int_type() || expr.type() == java_byte_type() || expr.type() == java_short_type() || expr.type() == java_long_type()) { - block.add( + result.add( code_assignt{expr, from_integer(std::stoll(json.value), expr.type())}); } else if(expr.type() == java_double_type()) { ieee_floatt ieee_float(to_floatbv_type(expr.type())); ieee_float.from_double(std::stod(json.value)); - block.add(code_assignt{expr, ieee_float.to_expr()}); + result.add(code_assignt{expr, ieee_float.to_expr()}); } else if(expr.type() == java_float_type()) { ieee_floatt ieee_float(to_floatbv_type(expr.type())); ieee_float.from_float(std::stof(json.value)); - block.add(code_assignt{expr, ieee_float.to_expr()}); + result.add(code_assignt{expr, ieee_float.to_expr()}); } else if(expr.type() == java_char_type()) { const std::wstring wide_value = utf8_to_utf16_native_endian(json.value); PRECONDITION(wide_value.length() == 1); - block.add( + result.add( code_assignt{expr, from_integer(wide_value.front(), expr.type())}); } + return result; } /// One of the base cases of the recursive algorithm. See /// \ref assign_from_json_rec. -static void assign_null(const exprt &expr, code_blockt &block) +static code_assignt assign_null(const exprt &expr) { - block.add( - code_assignt{expr, null_pointer_exprt{to_pointer_type(expr.type())}}); + return code_assignt{expr, null_pointer_exprt{to_pointer_type(expr.type())}}; } /// In the case of an assignment of an array given a JSON representation, this /// function assigns the data component of the array, which contains the array /// elements. \p expr is a pointer to the array containing the component. -static void assign_array_data_component_from_json( +static code_with_references_listt assign_array_data_component_from_json( const exprt &expr, const jsont &json, const optionalt &type_from_array, @@ -405,8 +400,8 @@ static void assign_array_data_component_from_json( const symbol_exprt &array_init_data = info.allocate_objects.allocate_automatic_local_object( data_member_expr.type(), "user_specified_array_data_init"); - const code_assignt data_assign{array_init_data, data_member_expr, info.loc}; - info.block.add(data_assign); + code_with_references_listt result; + result.add(code_assignt{array_init_data, data_member_expr, info.loc}); size_t index = 0; const optionalt inferred_element_type = @@ -416,25 +411,10 @@ static void assign_array_data_component_from_json( { const dereference_exprt element_at_index = array_element_from_pointer( array_init_data, from_integer(index, java_int_type())); - assign_from_json_rec(element_at_index, *it, inferred_element_type, info); + result.append( + assign_from_json_rec(element_at_index, *it, inferred_element_type, info)); } -} - -/// Allocate a fresh array of length \p array_length_expr and assigns \p expr -/// to it. -static void allocate_array( - const exprt &expr, - const exprt &array_length_expr, - object_creation_infot &info) -{ - const pointer_typet &pointer_type = to_pointer_type(expr.type()); - const auto &element_type = - java_array_element_type(to_struct_tag_type(pointer_type.subtype())); - side_effect_exprt java_new_array{ID_java_new_array, pointer_type}; - java_new_array.copy_to_operands(array_length_expr); - java_new_array.type().subtype().set(ID_element_type, element_type); - code_assignt assign{expr, java_new_array, info.loc}; - info.block.add(assign); + return result; } /// Declare a non-deterministic length expression @@ -442,18 +422,17 @@ static void allocate_array( /// \param[out] code: code block to which code will get appended /// \param loc: location for the created code /// \return the length expression that has been non-deterministically created -static symbol_exprt nondet_length( - allocate_objectst &allocate, - code_blockt &code, - source_locationt loc) +static std::pair +nondet_length(allocate_objectst &allocate, source_locationt loc) { - const symbol_exprt length_expr = allocate.allocate_automatic_local_object( + symbol_exprt length_expr = allocate.allocate_automatic_local_object( java_int_type(), "user_specified_array_length"); - code.add(code_assignt{ - length_expr, side_effect_expr_nondett{java_int_type(), std::move(loc)}}); + code_with_references_listt code; + code.add( + code_assignt{length_expr, side_effect_expr_nondett{java_int_type(), loc}}); code.add(code_assumet{binary_predicate_exprt{ length_expr, ID_ge, from_integer(0, java_int_type())}}); - return length_expr; + return std::make_pair(length_expr, std::move(code)); } /// One of the cases in the recursive algorithm: the case where \p expr @@ -466,10 +445,11 @@ static symbol_exprt nondet_length( /// For the assignment of the array elements, see /// \ref assign_array_data_component_from_json. /// For the overall algorithm, see \ref assign_from_json_rec. -static void assign_det_length_array_from_json( +/// \return code for the assignment and length of the created array. +static std::pair +assign_det_length_array_from_json( const exprt &expr, const jsont &json, - const exprt &given_length_expr, const optionalt &type_from_array, object_creation_infot &info) { @@ -479,9 +459,9 @@ static void assign_det_length_array_from_json( const json_arrayt json_array = get_untyped_array(json, element_type); const auto number_of_elements = from_integer(json_array.size(), java_int_type()); - info.block.add( - code_assumet{equal_exprt{given_length_expr, number_of_elements}}); - assign_array_data_component_from_json(expr, json, type_from_array, info); + return { + assign_array_data_component_from_json(expr, json, type_from_array, info), + number_of_elements}; } /// One of the cases in the recursive algorithm: the case where \p expr @@ -494,53 +474,57 @@ static void assign_det_length_array_from_json( /// For the assignment of the array elements, see /// \ref assign_array_data_component_from_json. /// For the overall algorithm, see \ref assign_from_json_rec. -static void assign_nondet_length_array_from_json( - const exprt &expr, +/// \return code for the assignment and length of the created array. +static code_with_references_listt assign_nondet_length_array_from_json( + const exprt &array, const jsont &json, const exprt &given_length_expr, const optionalt &type_from_array, object_creation_infot &info) { - PRECONDITION(is_java_array_type(expr.type())); + PRECONDITION(is_java_array_type(array.type())); const auto &element_type = - java_array_element_type(to_struct_tag_type(expr.type().subtype())); + java_array_element_type(to_struct_tag_type(array.type().subtype())); const json_arrayt json_array = get_untyped_array(json, element_type); - const auto number_of_elements = - from_integer(json_array.size(), java_int_type()); - info.block.add(code_assumet{and_exprt{ + code_with_references_listt result; + exprt number_of_elements = from_integer(json_array.size(), java_int_type()); + result.add(code_assumet{and_exprt{ binary_predicate_exprt{given_length_expr, ID_ge, number_of_elements}, binary_predicate_exprt{ given_length_expr, ID_le, from_integer(info.max_user_array_length, java_int_type())}}}); - assign_array_data_component_from_json(expr, json, type_from_array, info); + result.append( + assign_array_data_component_from_json(array, json, type_from_array, info)); + return result; } /// One of the cases in the recursive algorithm: the case where \p expr /// represents a string. /// See \ref assign_from_json_rec. -static void assign_string_from_json( +static code_assignt assign_string_from_json( const jsont &json, const exprt &expr, object_creation_infot &info) { const auto json_string = get_untyped_string(json); PRECONDITION(json_string.is_string()); - info.block.add(code_assignt{expr, - get_or_create_string_literal_symbol( - json_string.value, info.symbol_table, true)}); + return code_assignt{expr, + get_or_create_string_literal_symbol( + json_string.value, info.symbol_table, true)}; } /// Helper function for \ref assign_struct_from_json which recursively assigns /// values to all of the fields of the Java object represented by \p expr (the /// components of its type and all of its parent types). -static void assign_struct_components_from_json( +static code_with_references_listt assign_struct_components_from_json( const exprt &expr, const jsont &json, object_creation_infot &info) { const java_class_typet &java_class_type = to_java_class_type(namespacet{info.symbol_table}.follow(expr.type())); + code_with_references_listt result; for(const auto &component : java_class_type.components()) { const irep_idt &component_name = component.get_name(); @@ -552,7 +536,10 @@ static void assign_struct_components_from_json( } member_exprt member_expr{expr, component_name, component.type()}; if(component_name[0] == '@') // component is parent struct type - assign_struct_components_from_json(member_expr, json, info); + { + result.append( + assign_struct_components_from_json(member_expr, json, info)); + } else // component is class field (pointer to struct) { const auto member_json = [&]() -> jsont { @@ -565,16 +552,17 @@ static void assign_struct_components_from_json( } return json[id2string(component_name)]; }(); - assign_from_json_rec(member_expr, member_json, {}, info); + result.append(assign_from_json_rec(member_expr, member_json, {}, info)); } } + return result; } /// One of the cases in the recursive algorithm: the case where \p expr is a /// struct, which is the result of dereferencing a pointer that corresponds to /// the Java object described in \p json. /// See \ref assign_from_json_rec. -static void assign_struct_from_json( +static code_with_references_listt assign_struct_from_json( const exprt &expr, const jsont &json, object_creation_infot &info) @@ -582,8 +570,11 @@ static void assign_struct_from_json( const namespacet ns{info.symbol_table}; const java_class_typet &java_class_type = to_java_class_type(ns.follow(expr.type())); + code_with_references_listt result; if(is_java_string_type(java_class_type)) - assign_string_from_json(json, expr, info); + { + result.add(assign_string_from_json(json, expr, info)); + } else { auto initial_object = zero_initializer(expr.type(), info.loc, ns); @@ -592,21 +583,27 @@ static void assign_struct_from_json( to_struct_expr(*initial_object), ns, struct_tag_typet("java::" + id2string(java_class_type.get_tag()))); - info.block.add(code_assignt{expr, *initial_object}); - assign_struct_components_from_json(expr, json, info); + result.add(code_assignt{expr, *initial_object}); + result.append(assign_struct_components_from_json(expr, json, info)); } + return result; } /// Same as \ref assign_pointer_from_json without special cases (enums). -static void assign_non_enum_pointer_from_json( +static code_with_references_listt assign_non_enum_pointer_from_json( const exprt &expr, const jsont &json, object_creation_infot &info) { + code_with_references_listt result; + code_blockt output_code; exprt dereferenced_symbol_expr = info.allocate_objects.allocate_dynamic_object( - info.block, expr, to_pointer_type(expr.type()).subtype()); - assign_struct_from_json(dereferenced_symbol_expr, json, info); + output_code, expr, to_pointer_type(expr.type()).subtype()); + for(codet &code : output_code.statements()) + result.add(std::move(code)); + result.append(assign_struct_from_json(dereferenced_symbol_expr, json, info)); + return result; } /// One of the cases in the recursive algorithm: the case where the expression @@ -616,22 +613,23 @@ static void assign_non_enum_pointer_from_json( /// the recursion.) /// Once reference-equality of fields in different classes is supported, this /// function can be removed. -static void assign_enum_from_json( +static code_with_references_listt assign_enum_from_json( const exprt &expr, const jsont &json, object_creation_infot &info) { const auto &java_class_type = followed_class_type(expr, info.symbol_table); const std::string &enum_name = id2string(java_class_type.get_name()); + code_with_references_listt result; if(const auto func = info.symbol_table.lookup(clinit_wrapper_name(enum_name))) - info.block.add(code_function_callt{func->symbol_expr()}); + result.add(code_function_callt{func->symbol_expr()}); const irep_idt values_name = enum_name + ".$VALUES"; if(!info.symbol_table.has_symbol(values_name)) { // Fallback: generate a new enum instance instead of getting it from $VALUES - assign_non_enum_pointer_from_json(expr, json, info); - return; + result.append(assign_non_enum_pointer_from_json(expr, json, info)); + return result; } dereference_exprt values_struct{ @@ -645,17 +643,18 @@ static void assign_enum_from_json( const exprt ordinal_expr = from_integer(std::stoi(json["ordinal"].value), java_int_type()); - info.block.add(code_assignt{ + result.add(code_assignt{ expr, typecast_exprt::conditional_cast( array_element_from_pointer(values_data, ordinal_expr), expr.type())}); + return result; } /// One of the cases in the recursive algorithm: the case where \p expr is a /// pointer to a struct, whose type is the same as the runtime-type of the /// corresponding Java object. /// See \ref assign_from_json_rec. -static void assign_pointer_from_json( +static code_with_references_listt assign_pointer_from_json( const exprt &expr, const jsont &json, object_creation_infot &info) @@ -663,9 +662,9 @@ static void assign_pointer_from_json( // This check can be removed when tracking reference-equal objects across // different classes has been implemented. if(has_enum_type(expr, info.symbol_table)) - assign_enum_from_json(expr, json, info); + return assign_enum_from_json(expr, json, info); else - assign_non_enum_pointer_from_json(expr, json, info); + return assign_non_enum_pointer_from_json(expr, json, info); } /// One of the cases in the recursive algorithm: the case where \p expr is a @@ -673,7 +672,7 @@ static void assign_pointer_from_json( /// corresponding Java object, which may be more specific than the type pointed /// to by `expr.type()` (the compile-time type of the object). /// See \ref assign_from_json_rec. -static void assign_pointer_with_given_type_from_json( +static code_with_references_listt assign_pointer_with_given_type_from_json( const exprt &expr, const jsont &json, const java_class_typet &runtime_type, @@ -693,14 +692,26 @@ static void assign_pointer_with_given_type_from_json( replacement_pointer_type); } - assign_pointer_from_json(new_symbol, json, info); - info.block.add( - code_assignt{expr, typecast_exprt{new_symbol, pointer_type}}); + auto result = assign_pointer_from_json(new_symbol, json, info); + result.add(code_assignt{expr, typecast_exprt{new_symbol, pointer_type}}); + return result; } else - assign_pointer_from_json(expr, json, info); + return assign_pointer_from_json(expr, json, info); } +struct get_or_create_reference_resultt +{ + /// true if a new symbol was allocated for + /// the given ID and false if the ID was found in the reference map + bool newly_allocated; + /// symbol expression(s) for the given ID. + std::unordered_map::iterator + reference; + /// initialization code for the reference + code_with_references_listt code; +}; + /// Helper function for \ref assign_reference_from_json. /// Look up the given \p id in the reference map and gets or creates the symbol /// for it. @@ -716,7 +727,7 @@ static void assign_pointer_with_given_type_from_json( /// \return a pair: the first element is true if a new symbol was allocated for /// the given ID and false if the ID was found in the reference map. The /// second element has the symbol expression(s) for this ID. -static std::pair get_or_create_reference( +static get_or_create_reference_resultt get_or_create_reference( const exprt &expr, const std::string &id, object_creation_infot &info) @@ -725,6 +736,7 @@ static std::pair get_or_create_reference( const auto id_it = info.references.find(id); if(id_it == info.references.end()) { + code_with_references_listt code; object_creation_referencet reference; if(is_java_array_type(expr.type())) { @@ -733,22 +745,19 @@ static std::pair get_or_create_reference( reference.array_length = info.allocate_objects.allocate_automatic_local_object( java_int_type(), "user_specified_array_length"); - info.block.add(code_assignt{*reference.array_length, - side_effect_expr_nondett{java_int_type()}}); - info.block.add(code_assumet{binary_predicate_exprt{ - *reference.array_length, ID_ge, from_integer(0, java_int_type())}}); - allocate_array(reference.expr, *reference.array_length, info); - info.references.insert({id, reference}); + code.add(reference_allocationt{id, info.loc}); } else { + code_blockt block; reference.expr = info.allocate_objects.allocate_dynamic_object_symbol( - info.block, expr, pointer_type.subtype()); - info.references.insert({id, reference}); + block, expr, pointer_type.subtype()); + code.add(block); } - return {true, reference}; + auto iterator_inserted_pair = info.references.insert({id, reference}); + return {iterator_inserted_pair.second, iterator_inserted_pair.first, code}; } - return {false, id_it->second}; + return {false, id_it, {}}; } /// One of the cases in the recursive algorithm: the case where \p expr @@ -773,7 +782,7 @@ static std::pair get_or_create_reference( /// several different classes (e.g.\ as soon as we find a `@ref` for the first /// time we might want to search the whole initial JSON file for the /// corresponding `@id`). -static void assign_reference_from_json( +static code_with_references_listt assign_reference_from_json( const exprt &expr, const jsont &json, const optionalt &type_from_array, @@ -782,9 +791,11 @@ static void assign_reference_from_json( const std::string &id = has_enum_type(expr, info.symbol_table) ? get_enum_id(expr, json, info.symbol_table) : get_id_or_reference_value(json); - const auto bool_reference_pair = get_or_create_reference(expr, id, info); - const bool is_new_id = bool_reference_pair.first; - const object_creation_referencet &reference = bool_reference_pair.second; + auto get_reference_result = get_or_create_reference(expr, id, info); + const bool is_new_id = get_reference_result.newly_allocated; + object_creation_referencet &reference = + get_reference_result.reference->second; + code_with_references_listt result = std::move(get_reference_result.code); if(has_id(json) || (is_new_id && has_enum_type(expr, info.symbol_table))) { if(is_java_array_type(expr.type())) @@ -793,20 +804,30 @@ static void assign_reference_from_json( reference.array_length, "an array reference should store its length"); if(has_nondet_length(json)) { - assign_nondet_length_array_from_json( - reference.expr, json, *reference.array_length, type_from_array, info); + result.append(assign_nondet_length_array_from_json( + reference.expr, + json, + *reference.array_length, + type_from_array, + info)); } else { - assign_det_length_array_from_json( - reference.expr, json, *reference.array_length, type_from_array, info); + auto code_length_pair = assign_det_length_array_from_json( + reference.expr, json, type_from_array, info); + result.append(std::move(code_length_pair.first)); + reference.array_length = std::move(code_length_pair.second); } } else - assign_struct_from_json(dereference_exprt(reference.expr), json, info); + { + result.append( + assign_struct_from_json(dereference_exprt(reference.expr), json, info)); + } } - info.block.add(code_assignt{ + result.add(code_assignt{ expr, typecast_exprt::conditional_cast(reference.expr, expr.type())}); + return result; } /// Entry point of the recursive deterministic assignment algorithm. @@ -817,16 +838,20 @@ static void assign_reference_from_json( /// \param type_from_array: if `expr` was found as an element of an array, /// the element type of this array. /// \param info: references used throughout the recursive algorithm. -void assign_from_json_rec( +code_with_references_listt assign_from_json_rec( const exprt &expr, const jsont &json, const optionalt &type_from_array, object_creation_infot &info) { + code_with_references_listt result; if(can_cast_type(expr.type())) { if(json.is_null()) - assign_null(expr, info.block); + { + result.add(assign_null(expr)); + return result; + } else if( is_reference(json) || has_id(json) || is_enum_with_type_equal_to_declaring_type( @@ -835,17 +860,18 @@ void assign_from_json_rec( // has_enum_type(expr, info.symbol_table) once tracking reference-equality // across different classes has been implemented. { - assign_reference_from_json(expr, json, type_from_array, info); + return assign_reference_from_json(expr, json, type_from_array, info); } else if(is_java_array_type(expr.type())) { if(has_nondet_length(json)) { - const exprt length = - nondet_length(info.allocate_objects, info.block, info.loc); - allocate_array(expr, length, info); - assign_nondet_length_array_from_json( - expr, json, length, type_from_array, info); + auto length_code_pair = nondet_length(info.allocate_objects, info.loc); + length_code_pair.second.add( + allocate_array(expr, length_code_pair.first, info.loc)); + length_code_pair.second.append(assign_nondet_length_array_from_json( + expr, json, length_code_pair.first, type_from_array, info)); + return length_code_pair.second; } else { @@ -853,29 +879,33 @@ void assign_from_json_rec( const auto &element_type = java_array_element_type(to_struct_tag_type(expr.type().subtype())); const std::size_t length = get_untyped_array(json, element_type).size(); - allocate_array(expr, from_integer(length, java_int_type()), info); - assign_array_data_component_from_json( - expr, json, type_from_array, info); + result.add(allocate_array( + expr, from_integer(length, java_int_type()), info.loc)); + result.append(assign_array_data_component_from_json( + expr, json, type_from_array, info)); + return result; } } else if( const auto runtime_type = ::runtime_type(json, type_from_array, info.symbol_table)) { - assign_pointer_with_given_type_from_json(expr, json, *runtime_type, info); + return assign_pointer_with_given_type_from_json( + expr, json, *runtime_type, info); } else - assign_pointer_from_json(expr, json, info); + return assign_pointer_from_json(expr, json, info); } else - assign_primitive_from_json(expr, get_untyped_primitive(json), info.block); + result.append( + assign_primitive_from_json(expr, get_untyped_primitive(json))); + return result; } -void assign_from_json( +code_with_references_listt assign_from_json( const exprt &expr, const jsont &json, const irep_idt &function_id, - code_blockt &assignments, symbol_table_baset &symbol_table, optionalt &needed_lazy_methods, size_t max_user_array_length, @@ -884,7 +914,6 @@ void assign_from_json( source_locationt location{}; location.set_function(function_id); allocate_objectst allocate(ID_java, location, function_id, symbol_table); - code_blockt body_rec; const symbolt *function_symbol = symbol_table.lookup(function_id); INVARIANT(function_symbol, "Function must appear in symbol table"); const auto class_name = declaring_class(*function_symbol); @@ -893,15 +922,22 @@ void assign_from_json( "Function " + id2string(function_id) + " must be declared by a class."); const auto &class_type = to_java_class_type(symbol_table.lookup_ref(*class_name).type); - object_creation_infot info{body_rec, - allocate, - symbol_table, - needed_lazy_methods, - references, - location, - max_user_array_length, - class_type}; - assign_from_json_rec(expr, json, {}, info); + object_creation_infot info{allocate, + symbol_table, + needed_lazy_methods, + references, + location, + max_user_array_length, + class_type}; + code_with_references_listt code_with_references = + assign_from_json_rec(expr, json, {}, info); + code_blockt assignments; allocate.declare_created_symbols(assignments); - assignments.append(body_rec); + std::for_each( + assignments.statements().rbegin(), + assignments.statements().rend(), + [&](const codet &c) { + code_with_references.add_to_front(code_without_referencest{c}); + }); + return code_with_references; } diff --git a/jbmc/src/java_bytecode/assignments_from_json.h b/jbmc/src/java_bytecode/assignments_from_json.h index 65976a135e3..85209e175e5 100644 --- a/jbmc/src/java_bytecode/assignments_from_json.h +++ b/jbmc/src/java_bytecode/assignments_from_json.h @@ -11,23 +11,13 @@ Author: Diffblue Ltd. #ifndef CPROVER_JAVA_BYTECODE_ASSIGNMENTS_FROM_JSON_H #define CPROVER_JAVA_BYTECODE_ASSIGNMENTS_FROM_JSON_H +#include "code_with_references.h" #include class jsont; class symbol_table_baset; class ci_lazy_methods_neededt; -/// Information to store when several references point to the same Java object. -struct object_creation_referencet -{ - /// Expression for the symbol that stores the value that may be reference - /// equal to other values. - exprt expr; - - /// If `symbol` is an array, this expression stores its length. - optionalt array_length; -}; - /// Given an expression \p expr representing a Java object or primitive and a /// JSON representation \p json of the value of a Java object or primitive of a /// compatible type, adds statements to \p block to assign \p expr to the @@ -106,11 +96,10 @@ struct object_creation_referencet /// a function, not a line number. /// /// For parameter documentation, see \ref object_creation_infot. -void assign_from_json( +code_with_references_listt assign_from_json( const exprt &expr, const jsont &json, const irep_idt &function_id, - code_blockt &block, symbol_table_baset &symbol_table, optionalt &needed_lazy_methods, size_t max_user_array_length, diff --git a/jbmc/src/java_bytecode/code_with_references.cpp b/jbmc/src/java_bytecode/code_with_references.cpp new file mode 100644 index 00000000000..0055e6d935c --- /dev/null +++ b/jbmc/src/java_bytecode/code_with_references.cpp @@ -0,0 +1,76 @@ +/*******************************************************************\ + +Module: Java bytecode + +Author: Romain Brenguier, romain.brenguier@diffblue.com + +\*******************************************************************/ + +#include "code_with_references.h" +#include "java_types.h" +#include + +codet allocate_array( + const exprt &expr, + const exprt &array_length_expr, + const source_locationt &loc) +{ + const pointer_typet &pointer_type = to_pointer_type(expr.type()); + const auto &element_type = + java_array_element_type(to_struct_tag_type(pointer_type.subtype())); + side_effect_exprt java_new_array{ID_java_new_array, pointer_type}; + java_new_array.copy_to_operands(array_length_expr); + java_new_array.type().subtype().set(ID_element_type, element_type); + return code_assignt{expr, java_new_array, loc}; +} + +code_blockt +reference_allocationt::to_code(reference_substitutiont &references) const +{ + const object_creation_referencet &reference = references(reference_id); + INVARIANT(reference.array_length, "Length of reference array must be known"); + if(can_cast_expr(*reference.array_length)) + { + return code_blockt( + {allocate_array(reference.expr, *reference.array_length, loc)}); + } + // Fallback in case the array definition has not been seen yet, this can + // happen if `to_code` is called before the whole json file has been processed + // or the "@id" json field corresponding to `reference_id` doesn't appear in + // the file. + code_blockt code; + code.add(code_assignt{*reference.array_length, + side_effect_expr_nondett{java_int_type()}}); + code.add(code_assumet{binary_predicate_exprt{ + *reference.array_length, ID_ge, from_integer(0, java_int_type())}}); + code.add(allocate_array(reference.expr, *reference.array_length, loc)); + return code; +} + +void code_with_references_listt::add(code_without_referencest code) +{ + auto ptr = std::make_shared(std::move(code)); + list.emplace_back(std::move(ptr)); +} + +void code_with_references_listt::add(codet code) +{ + add(code_without_referencest{std::move(code)}); +} + +void code_with_references_listt::add(reference_allocationt ref) +{ + auto ptr = std::make_shared(std::move(ref)); + list.emplace_back(std::move(ptr)); +} + +void code_with_references_listt::append(code_with_references_listt &&other) +{ + list.splice(list.end(), other.list); +} + +void code_with_references_listt::add_to_front(code_without_referencest code) +{ + auto ptr = std::make_shared(std::move(code)); + list.emplace_front(std::move(ptr)); +} diff --git a/jbmc/src/java_bytecode/code_with_references.h b/jbmc/src/java_bytecode/code_with_references.h new file mode 100644 index 00000000000..ba775f88ada --- /dev/null +++ b/jbmc/src/java_bytecode/code_with_references.h @@ -0,0 +1,108 @@ +/*******************************************************************\ + +Module: Java bytecode + +Author: Romain Brenguier, romain.brenguier@diffblue.com + +\*******************************************************************/ + +#ifndef CPROVER_JAVA_BYTECODE_CODE_WITH_REFERENCES_H +#define CPROVER_JAVA_BYTECODE_CODE_WITH_REFERENCES_H + +#include +#include + +/// Allocate a fresh array of length \p array_length_expr and assigns \p expr +/// to it. +codet allocate_array( + const exprt &expr, + const exprt &array_length_expr, + const source_locationt &loc); + +/// Information to store when several references point to the same Java object. +struct object_creation_referencet +{ + /// Expression for the symbol that stores the value that may be reference + /// equal to other values. + exprt expr; + + /// If `symbol` is an array, this expression stores its length. + /// This should only be set once the actual elements of the array have been + /// seen, not the first time an `@ref` have been seen, only when `@id` is + /// seen. + optionalt array_length; +}; + +/// Base class for code which can contain references which can get replaced +/// before generating actual codet. +/// Currently only references in array allocations are supported, because this +/// is currently the only use required by \ref assign_from_json. +class code_with_referencest +{ +public: + using reference_substitutiont = + std::function; + + virtual code_blockt to_code(reference_substitutiont &) const = 0; + + virtual ~code_with_referencest() = default; +}; + +/// Code that should not contain reference +class code_without_referencest : public code_with_referencest +{ +public: + codet code; + + explicit code_without_referencest(codet code) : code(std::move(code)) + { + } + + code_blockt to_code(reference_substitutiont &) const override + { + return code_blockt({code}); + } +}; + +/// Allocation code which contains a reference. +/// The generated code will be of the form: +/// +/// array_length = nondet(int) +/// assume(array_length >= 0) +/// array_expr = new array_type[array_length]; +/// +/// Where array_length and array_expr are given by the reference substitution +/// function. +class reference_allocationt : public code_with_referencest +{ +public: + std::string reference_id; + source_locationt loc; + + reference_allocationt(std::string reference_id, source_locationt loc) + : reference_id(std::move(reference_id)), loc(std::move(loc)) + { + } + + code_blockt to_code(reference_substitutiont &references) const override; +}; + +/// Wrapper around a list of shared pointer to code_with_referencest objects, +/// which provides a nicer interface. +class code_with_references_listt +{ +public: + std::list> list; + + void add(code_without_referencest code); + + void add(codet code); + + void add(reference_allocationt ref); + + void append(code_with_references_listt &&other); + + void add_to_front(code_without_referencest code); +}; + +#endif // CPROVER_JAVA_BYTECODE_CODE_WITH_REFERENCES_H diff --git a/jbmc/src/java_bytecode/java_bytecode_language.h b/jbmc/src/java_bytecode/java_bytecode_language.h index b2c71721518..622785470d8 100644 --- a/jbmc/src/java_bytecode/java_bytecode_language.h +++ b/jbmc/src/java_bytecode/java_bytecode_language.h @@ -13,6 +13,7 @@ Author: Daniel Kroening, kroening@kroening.com #include "assignments_from_json.h" #include "ci_lazy_methods.h" #include "ci_lazy_methods_needed.h" +#include "code_with_references.h" #include "java_class_loader.h" #include "java_object_factory_parameters.h" #include "java_static_initializers.h" diff --git a/jbmc/src/java_bytecode/java_static_initializers.cpp b/jbmc/src/java_bytecode/java_static_initializers.cpp index 3ad52762a65..c407555f305 100644 --- a/jbmc/src/java_bytecode/java_static_initializers.cpp +++ b/jbmc/src/java_bytecode/java_static_initializers.cpp @@ -822,19 +822,27 @@ code_blockt get_user_specified_clinit_body( } } } - code_blockt body; + code_with_references_listt code_with_references; for(const auto &value_pair : static_field_values) { - assign_from_json( + code_with_references.append(assign_from_json( value_pair.first, value_pair.second, real_clinit_name, - body, symbol_table, needed_lazy_methods, max_user_array_length, - references); + references)); } + code_with_referencest::reference_substitutiont reference_substitution = + [&](const std::string &reference_id) -> object_creation_referencet & { + auto it = references.find(reference_id); + INVARIANT(it != references.end(), "reference id must be present in map"); + return it->second; + }; + code_blockt body; + for(const auto &code_with_ref : code_with_references.list) + body.append(code_with_ref->to_code(reference_substitution)); return body; } return code_blockt{{code_function_callt{clinit_func->symbol_expr()}}}; diff --git a/jbmc/unit/java_bytecode/java_static_initializers/assignments_from_json.cpp b/jbmc/unit/java_bytecode/java_static_initializers/assignments_from_json.cpp index da4fa66278d..18b18692cd9 100644 --- a/jbmc/unit/java_bytecode/java_static_initializers/assignments_from_json.cpp +++ b/jbmc/unit/java_bytecode/java_static_initializers/assignments_from_json.cpp @@ -89,28 +89,33 @@ SCENARIO( const reference_typet test_class_type = reference_type(struct_tag_typet{"java::TestClass"}); - code_blockt block; optionalt option{}; - assign_from_json( + const code_with_references_listt code = assign_from_json( symbol_exprt{"symbol_to_assign", test_class_type}, json, clinit_name, - block, symbol_table, option, max_user_array_length, references); - THEN("The instruction is the declaration of a symbol of TestClass* type") + + code_with_referencest::reference_substitutiont reference_substitution = + [&](const std::string &reference_id) -> object_creation_referencet & { + UNREACHABLE; + }; + code_blockt block; + for(auto code_with_references : code.list) + block.append(code_with_references->to_code(reference_substitution)); + THEN( + "The instruction is the declaration of a symbol of TestClass* type," + "followed by its allocation") { const symbol_exprt declared_symbol = make_query(block)[0].as()[0].as().get(); REQUIRE(declared_symbol.type() == test_class_type); - THEN("The instruction allocates the symbol") - { - REQUIRE( - make_query(block)[1].as()[0].as().get() == - declared_symbol); - } + REQUIRE( + make_query(block)[1].as()[0].as().get() == + declared_symbol); } THEN("The instruction assigns the symbol to `symbol_to_assign`") { @@ -175,18 +180,24 @@ SCENARIO( const reference_typet test_class_type = reference_type(struct_tag_typet{"java::TestClass"}); - code_blockt block; optionalt option{}; - assign_from_json( + const code_with_references_listt code = assign_from_json( symbol_exprt{"symbol_to_assign", test_class_type}, json, clinit_name, - block, symbol_table, option, max_user_array_length, references); + code_with_referencest::reference_substitutiont reference_substitution = + [&](const std::string &reference_id) -> object_creation_referencet & { + UNREACHABLE; + }; + code_blockt block; + for(auto code_with_references : code.list) + block.append(code_with_references->to_code(reference_substitution)); + THEN("The instruction is the declaration of a symbol of TestClass* type") { const symbol_exprt allocation_symbol = @@ -294,18 +305,24 @@ SCENARIO( const reference_typet test_class_type = reference_type(struct_tag_typet{"java::TestClass"}); - code_blockt block; optionalt option{}; - assign_from_json( + const code_with_references_listt code = assign_from_json( symbol_exprt{"symbol_to_assign", test_class_type}, json, clinit_name, - block, symbol_table, option, max_user_array_length, references); + code_with_referencest::reference_substitutiont reference_substitution = + [&](const std::string &reference_id) -> object_creation_referencet & { + UNREACHABLE; + }; + code_blockt block; + for(auto code_with_references : code.list) + block.append(code_with_references->to_code(reference_substitution)); + THEN("The instruction is the declaration of a symbol of TestClass* type") { const symbol_exprt allocation_symbol = @@ -409,4 +426,210 @@ SCENARIO( .get()) == 42); } } + GIVEN("An object with two fields referencing the same array") + { + const json_objectt json = [] { + json_objectt entry{}; + entry["array_field_1"] = [] { + json_objectt json; + json["@ref"] = json_stringt{"id_of_array_2"}; + return json; + }(); + entry["array_field_2"] = [] { + json_objectt int_json; + int_json["@items"] = json_arrayt{json_numbert{"42"}}; + int_json["@type"] = json_stringt{"[I"}; + int_json["@id"] = json_stringt{"id_of_array_2"}; + return int_json; + }(); + entry["@type"] = json_stringt{"TestClass"}; + return entry; + }(); + class_to_declared_symbols.emplace("java::TestClass", [] { + symbolt field_symbol; + field_symbol.base_name = "array_field_1"; + field_symbol.name = "field_name_1_for_codet"; + field_symbol.type = java_int_type(); + field_symbol.is_static_lifetime = true; + return field_symbol; + }()); + class_to_declared_symbols.emplace("java::TestClass", [] { + symbolt field_symbol; + field_symbol.base_name = "array_field_2"; + field_symbol.name = "field_name_2_for_codet"; + field_symbol.type = java_int_type(); + field_symbol.is_static_lifetime = true; + return field_symbol; + }()); + const irep_idt clinit_name = add_clinit(symbol_table, "TestClass"); + add_class_with_components( + symbol_table, + "TestClass", + {std::pair("@class_identifier", string_typet{}), + std::pair("array_field_1", java_array_type('i')), + std::pair("array_field_2", java_array_type('i'))}); + // For array[int] + add_java_array_types(symbol_table); + + const reference_typet test_class_type = + reference_type(struct_tag_typet{"java::TestClass"}); + optionalt option{}; + const code_with_references_listt code = assign_from_json( + symbol_exprt{"symbol_to_assign", test_class_type}, + json, + clinit_name, + symbol_table, + option, + max_user_array_length, + references); + + code_with_referencest::reference_substitutiont reference_substitution = + [&](const std::string &reference_id) -> object_creation_referencet & { + REQUIRE(reference_id == "id_of_array_2"); + auto it = references.find(reference_id); + REQUIRE(it != references.end()); + return it->second; + }; + code_blockt block; + for(auto code_with_references : code.list) + block.append(code_with_references->to_code(reference_substitution)); + + THEN( + "The instruction declares a symbol of TestClass* type: " + "TestClass* malloc_site;") + { + const symbol_exprt allocation_symbol = + make_query(block)[0].as()[0].as().get(); + REQUIRE(allocation_symbol.type() == test_class_type); + } + THEN( + "declares the array data: " + "int[] user_specified_array_ref;") + { + REQUIRE( + make_query(block)[1] + .as()[0] + .as() + .get() + .type() == java_array_type('i')); + } + THEN( + "declares the length: " + "int user_specified_array_length;") + { + REQUIRE( + make_query(block)[2] + .as()[0] + .as() + .get() + .type() == java_int_type()); + } + THEN( + "declares the data:" + "int* user_specified_array_data_init;") + { + REQUIRE( + make_query(block)[3] + .as()[0] + .as() + .get() + .type() == java_reference_type(java_int_type())); + } + THEN( + "allocates the object:" + "malloc_site = new TestClass();") + { + REQUIRE( + make_query(block)[4] + .as()[0] + .as() + .get() + .type() == test_class_type); + REQUIRE( + make_query(block)[4] + .as()[1] + .as() + .get() + .get_statement() == ID_allocate); + } + + THEN( + "assigns the allocated object:" + "symbol_to_assign = malloc_site;") + { + REQUIRE( + make_query(block)[5] + .as()[0] + .as() + .get() + .get_identifier() == "symbol_to_assign"); + } + THEN( + "zero-initialize the object:" + "*malloc_site = {\"java::\", NULL, NULL};") + { + REQUIRE( + make_query(block)[6] + .as()[0] + .as()[0] + .get() + .type() == test_class_type); + } + THEN( + "allocate the array :" + "user_specified_array_ref = new int[1];") + { + REQUIRE( + make_query(block)[7].as()[0].get().type() == + java_array_type('i')); + const auto side_effect_expr = make_query(block)[7] + .as()[1] + .as() + .get(); + REQUIRE(side_effect_expr.get_statement() == ID_java_new_array); + REQUIRE( + numeric_cast_v( + make_query(side_effect_expr)[0].as().get()) == 1); + } + THEN( + "assign array_field_1 :" + "malloc_site->array_field_1 = user_specified_array_ref;") + { + REQUIRE( + make_query(block)[8] + .as()[0] + .as() + .get() + .get_component_name() == "array_field_1"); + } + THEN( + "assign pointer to the data for initializing it:" + "user_specified_array_data_init = user_specified_array_ref->data;") + { + REQUIRE( + make_query(block)[9].as()[0].get().type() == + java_reference_type(java_int_type())); + } + THEN( + "assign the array cell value :" + "user_specified_array_data_init[0] = 42;") + { + REQUIRE( + numeric_cast_v(make_query(block)[10] + .as()[1] + .as() + .get()) == 42); + } + THEN( + "assign array_field_2 :" + "malloc_site->array_field_2 = user_specified_array_ref;") + { + REQUIRE( + make_query(block)[11] + .as()[0] + .as() + .get() + .get_component_name() == "array_field_2"); + } + } }