diff --git a/regression/cbmc-java/enum1/test.desc b/regression/cbmc-java/enum1/test.desc index eff2c2e6817..51d41e272c3 100644 --- a/regression/cbmc-java/enum1/test.desc +++ b/regression/cbmc-java/enum1/test.desc @@ -1,4 +1,4 @@ -CORE +KNOWNBUG enum1.class --java-unwind-enum-static --unwind 3 ^VERIFICATION SUCCESSFUL$ @@ -7,3 +7,5 @@ enum1.class ^Unwinding loop java::enum1.:\(\)V.0 iteration 5 \(6 max\) file enum1.java line 6 function java::enum1.:\(\)V bytecode-index 78 thread 0$ -- ^warning: ignoring +-- +cf. https://diffblue.atlassian.net/browse/TG-611 diff --git a/regression/cbmc-java/iterator2/test.desc b/regression/cbmc-java/iterator2/test.desc index 68525dd9b76..4f4f1490295 100644 --- a/regression/cbmc-java/iterator2/test.desc +++ b/regression/cbmc-java/iterator2/test.desc @@ -1,8 +1,10 @@ -CORE +KNOWNBUG iterator2.class ---cover location --unwind 3 --function iterator2.f +--cover location --unwind 3 --function iterator2.f ^EXIT=0$ ^SIGNAL=0$ ^.*SATISFIED$ -- ^warning: ignoring +-- +https://diffblue.atlassian.net/browse/TG-610 diff --git a/regression/cbmc-java/lvt-groovy/test.desc b/regression/cbmc-java/lvt-groovy/test.desc index 050fcd09c7e..7bbc58e4854 100644 --- a/regression/cbmc-java/lvt-groovy/test.desc +++ b/regression/cbmc-java/lvt-groovy/test.desc @@ -1,7 +1,8 @@ -CORE +KNOWNBUG DetectSplitPackagesTask.class --show-symbol-table ^EXIT=0$ ^SIGNAL=0$ -- -- +cf. https://diffblue.atlassian.net/browse/TG-610 diff --git a/src/java_bytecode/ci_lazy_methods.cpp b/src/java_bytecode/ci_lazy_methods.cpp index 41eab823e42..d69fd113eb3 100644 --- a/src/java_bytecode/ci_lazy_methods.cpp +++ b/src/java_bytecode/ci_lazy_methods.cpp @@ -79,7 +79,7 @@ bool ci_lazy_methodst::operator()( { const irep_idt methodid="java::"+id2string(classname)+"."+ id2string(method.name)+":"+ - id2string(method.signature); + id2string(method.descriptor); method_worklist2.push_back(methodid); } } @@ -505,4 +505,3 @@ irep_idt ci_lazy_methodst::get_virtual_method_target( else return irep_idt(); } - diff --git a/src/java_bytecode/java_bytecode_convert_class.cpp b/src/java_bytecode/java_bytecode_convert_class.cpp index 71d742c247f..1549e78a1b8 100644 --- a/src/java_bytecode/java_bytecode_convert_class.cpp +++ b/src/java_bytecode/java_bytecode_convert_class.cpp @@ -21,6 +21,7 @@ Author: Daniel Kroening, kroening@kroening.com #include "java_bytecode_language.h" #include "java_utils.h" +#include #include #include #include @@ -101,6 +102,25 @@ void java_bytecode_convert_classt::convert(const classt &c) } java_class_typet class_type; + if(c.signature.has_value()) + { + java_generics_class_typet generic_class_type; +#ifdef DEBUG + std::cout << "INFO: found generic class signature " + << c.signature.value() + << " in parsed class " + << c.name << "\n"; +#endif + for(auto t : java_generic_type_from_string( + id2string(c.name), + c.signature.value())) + { + generic_class_type.generic_types() + .push_back(to_java_generic_parameter(t)); + } + + class_type=generic_class_type; + } class_type.set_tag(c.name); class_type.set(ID_base_name, c.name); @@ -174,7 +194,7 @@ void java_bytecode_convert_classt::convert(const classt &c) const irep_idt method_identifier= id2string(qualified_classname)+ "."+id2string(method.name)+ - ":"+method.signature; + ":"+method.descriptor; // Always run the lazy pre-stage, as it symbol-table // registers the function. debug() << "Adding symbol: method '" << method_identifier << "'" << eom; @@ -195,7 +215,48 @@ void java_bytecode_convert_classt::convert( symbolt &class_symbol, const fieldt &f) { - typet field_type=java_type_from_string(f.signature); + typet field_type; + if(f.signature.has_value()) + { + field_type=java_type_from_string( + f.signature.value(), + id2string(class_symbol.name)); + + /// this is for a free type variable, e.g., a field of the form `T f;` + if(is_java_generic_parameter(field_type)) + { +#ifdef DEBUG + std::cout << "fieldtype: generic " + << to_java_generic_parameter(field_type).type_variable() + .get_identifier() + << " name " << f.name << "\n"; +#endif + } + + /// this is for a field that holds a generic type, wither with instantiated + /// or with free type variables, e.g., `List l;` or `List l;` + else if(is_java_generic_type(field_type)) + { + java_generic_typet &with_gen_type= + to_java_generic_type(field_type); +#ifdef DEBUG + std::cout << "fieldtype: generic container type " + << std::to_string(with_gen_type.generic_type_variables().size()) + << " type " << with_gen_type.id() + << " name " << f.name + << " subtype id " << with_gen_type.subtype().id() << "\n"; +#endif + field_type=with_gen_type; + } + + /// This case is not possible, a field is either a non-instantiated type + /// variable or a generics container type. + INVARIANT( + !is_java_generic_inst_parameter(field_type), + "Cannot be an instantiated type variable here."); + } + else + field_type=java_type_from_string(f.descriptor); // is this a static field? if(f.is_static) diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index 840b214049a..ea4949e9f93 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -252,7 +252,7 @@ const exprt java_bytecode_convert_methodt::variable( /// method conversion just yet. The caller should call /// java_bytecode_convert_method later to give the symbol/method a body. /// \par parameters: `class_symbol`: class this method belongs to -/// `method_identifier`: fully qualified method name, including type signature +/// `method_identifier`: fully qualified method name, including type descriptor /// (e.g. "x.y.z.f:(I)") /// `m`: parsed method object to convert /// `symbol_table`: global symbol table (will be modified) @@ -263,7 +263,19 @@ void java_bytecode_convert_method_lazy( symbol_tablet &symbol_table) { symbolt method_symbol; - typet member_type=java_type_from_string(m.signature); + typet member_type; + if(m.signature.has_value()) + { +#ifdef DEBUG + std::cout << "method " << m.name + << " has signature " << m.signature.value() << "\n"; +#endif + member_type=java_type_from_string( + m.signature.value(), + id2string(class_symbol.name)); + } + else + member_type=java_type_from_string(m.descriptor); method_symbol.name=method_identifier; method_symbol.base_name=m.base_name; @@ -317,7 +329,7 @@ void java_bytecode_convert_methodt::convert( // to retrieve the symbol (constructed by java_bytecode_convert_method_lazy) // associated to the method const irep_idt method_identifier= - id2string(class_symbol.name)+"."+id2string(m.name)+":"+m.signature; + id2string(class_symbol.name)+"."+id2string(m.name)+":"+m.descriptor; method_id=method_identifier; const auto &old_sym=symbol_table.lookup(method_identifier); @@ -350,7 +362,16 @@ void java_bytecode_convert_methodt::convert( // Construct a fully qualified name for the parameter v, // e.g. my.package.ClassName.myMethodName:(II)I::anIntParam, and then a // symbol_exprt with the parameter and its type - typet t=java_type_from_string(v.signature); + typet t; + if(v.signature.has_value()) + { + t=java_type_from_string( + v.signature.value(), + id2string(class_symbol.name)); + } + else + t=java_type_from_string(v.descriptor); + std::ostringstream id_oss; id_oss << method_id << "::" << v.name; irep_idt identifier(id_oss.str()); diff --git a/src/java_bytecode/java_bytecode_parse_tree.cpp b/src/java_bytecode/java_bytecode_parse_tree.cpp index f19de120195..b8a60d8ddca 100644 --- a/src/java_bytecode/java_bytecode_parse_tree.cpp +++ b/src/java_bytecode/java_bytecode_parse_tree.cpp @@ -28,6 +28,7 @@ void java_bytecode_parse_treet::classt::swap( std::swap(other.is_public, is_public); std::swap(other.is_protected, is_protected); std::swap(other.is_private, is_private); + std::swap(other.signature, signature); other.implements.swap(implements); other.fields.swap(fields); other.methods.swap(methods); @@ -151,7 +152,7 @@ void java_bytecode_parse_treet::methodt::output(std::ostream &out) const out << "synchronized "; out << name; - out << " : " << signature; + out << " : " << descriptor; out << '\n'; @@ -188,7 +189,7 @@ void java_bytecode_parse_treet::methodt::output(std::ostream &out) const for(const auto &v : local_variable_table) { out << " " << v.index << ": " << v.name << ' ' - << v.signature << '\n'; + << v.descriptor << '\n'; } out << '\n'; @@ -212,7 +213,7 @@ void java_bytecode_parse_treet::fieldt::output(std::ostream &out) const out << "static "; out << name; - out << " : " << signature << ';'; + out << " : " << descriptor << ';'; out << '\n'; } diff --git a/src/java_bytecode/java_bytecode_parse_tree.h b/src/java_bytecode/java_bytecode_parse_tree.h index d982a62cd39..8470d0cb864 100644 --- a/src/java_bytecode/java_bytecode_parse_tree.h +++ b/src/java_bytecode/java_bytecode_parse_tree.h @@ -12,6 +12,7 @@ Author: Daniel Kroening, kroening@kroening.com #include +#include #include #include @@ -54,7 +55,8 @@ class java_bytecode_parse_treet class membert { public: - std::string signature; + std::string descriptor; + optionalt signature; irep_idt name; bool is_public, is_protected, is_private, is_static, is_final; annotationst annotations; @@ -62,8 +64,8 @@ class java_bytecode_parse_treet virtual void output(std::ostream &out) const = 0; membert(): - is_public(false), is_protected(false), is_private(false), - is_static(false), is_final(false) + is_public(false), is_protected(false), + is_private(false), is_static(false), is_final(false) { } }; @@ -100,7 +102,8 @@ class java_bytecode_parse_treet { public: irep_idt name; - std::string signature; + std::string descriptor; + optionalt signature; std::size_t index; std::size_t start_pc; std::size_t length; @@ -174,7 +177,7 @@ class java_bytecode_parse_treet typedef std::list implementst; implementst implements; - + optionalt signature; typedef std::list fieldst; typedef std::list methodst; fieldst fields; diff --git a/src/java_bytecode/java_bytecode_parser.cpp b/src/java_bytecode/java_bytecode_parser.cpp index 1b424291a5c..9bea073ff14 100644 --- a/src/java_bytecode/java_bytecode_parser.cpp +++ b/src/java_bytecode/java_bytecode_parser.cpp @@ -125,6 +125,7 @@ class java_bytecode_parsert:public parsert void rbytecode(methodt::instructionst &); void get_class_refs(); void get_class_refs_rec(const typet &); + void parse_local_variable_type_table(methodt &method); void skip_bytes(std::size_t bytes) { @@ -333,19 +334,45 @@ void java_bytecode_parsert::get_class_refs() } } - for(const auto &m : parse_tree.parsed_class.fields) + for(const auto &field : parse_tree.parsed_class.fields) { - typet t=java_type_from_string(m.signature); - get_class_refs_rec(t); + typet field_type; + if(field.signature.has_value()) + { + field_type=java_type_from_string( + field.signature.value(), + "java::"+id2string(parse_tree.parsed_class.name)); + } + else + field_type=java_type_from_string(field.descriptor); + + get_class_refs_rec(field_type); } - for(const auto &m : parse_tree.parsed_class.methods) + for(const auto &method : parse_tree.parsed_class.methods) { - typet t=java_type_from_string(m.signature); - get_class_refs_rec(t); - for(const auto &var : m.local_variable_table) + typet method_type; + if(method.signature.has_value()) { - typet var_type=java_type_from_string(var.signature); + method_type=java_type_from_string( + method.signature.value(), + "java::"+id2string(parse_tree.parsed_class.name)); + } + else + method_type=java_type_from_string(method.descriptor); + + get_class_refs_rec(method_type); + for(const auto &var : method.local_variable_table) + { + typet var_type; + if(var.signature.has_value()) + { + var_type=java_type_from_string( + var.signature.value(), + "java::"+id2string(parse_tree.parsed_class.name)); + } + else + var_type=java_type_from_string(var.descriptor); get_class_refs_rec(var_type); } } @@ -628,7 +655,8 @@ void java_bytecode_parsert::rfields(classt &parsed_class) field.is_static=(access_flags&ACC_STATIC)!=0; field.is_final=(access_flags&ACC_FINAL)!=0; field.is_enum=(access_flags&ACC_ENUM)!=0; - field.signature=id2string(pool_entry(descriptor_index).s); + + field.descriptor=id2string(pool_entry(descriptor_index).s); field.is_public=(access_flags&ACC_PUBLIC)!=0; field.is_protected=(access_flags&ACC_PROTECTED)!=0; field.is_private=(access_flags&ACC_PRIVATE)!=0; @@ -936,7 +964,7 @@ void java_bytecode_parsert::rmethod_attribute(methodt &method) it->source_location .set_function( "java::"+id2string(parse_tree.parsed_class.name)+"."+ - id2string(method.name)+":"+method.signature); + id2string(method.name)+":"+method.descriptor); } // line number of method @@ -944,6 +972,11 @@ void java_bytecode_parsert::rmethod_attribute(methodt &method) method.source_location.set_line( method.instructions.begin()->source_location.get_line()); } + else if(attribute_name=="Signature") + { + u2 signature_index=read_u2(); + method.signature=id2string(pool_entry(signature_index).s); + } else if(attribute_name=="RuntimeInvisibleAnnotations" || attribute_name=="RuntimeVisibleAnnotations") { @@ -960,7 +993,12 @@ void java_bytecode_parsert::rfield_attribute(fieldt &field) irep_idt attribute_name=pool_entry(attribute_name_index).s; - if(attribute_name=="RuntimeInvisibleAnnotations" || + if(attribute_name=="Signature") + { + u2 signature_index=read_u2(); + field.signature=id2string(pool_entry(signature_index).s); + } + else if(attribute_name=="RuntimeInvisibleAnnotations" || attribute_name=="RuntimeVisibleAnnotations") { rRuntimeAnnotation_attribute(field.annotations); @@ -1022,12 +1060,16 @@ void java_bytecode_parsert::rcode_attribute(methodt &method) method.local_variable_table[i].index=index; method.local_variable_table[i].name=pool_entry(name_index).s; - method.local_variable_table[i].signature= + method.local_variable_table[i].descriptor= id2string(pool_entry(descriptor_index).s); method.local_variable_table[i].start_pc=start_pc; method.local_variable_table[i].length=length; } } + else if(attribute_name=="LocalVariableTypeTable") + { + parse_local_variable_type_table(method); + } else if(attribute_name=="StackMapTable") { u2 stack_map_entries=read_u2(); @@ -1307,6 +1349,11 @@ void java_bytecode_parsert::rclass_attribute(classt &parsed_class) } } } + else if(attribute_name=="Signature") + { + u2 signature_index=read_u2(); + parsed_class.signature=id2string(pool_entry(signature_index).s); + } else if(attribute_name=="RuntimeInvisibleAnnotations" || attribute_name=="RuntimeVisibleAnnotations") { @@ -1356,7 +1403,7 @@ void java_bytecode_parsert::rmethod(classt &parsed_class) method.is_native=(access_flags&ACC_NATIVE)!=0; method.name=pool_entry(name_index).s; method.base_name=pool_entry(name_index).s; - method.signature=id2string(pool_entry(descriptor_index).s); + method.descriptor=id2string(pool_entry(descriptor_index).s); size_t flags=(method.is_public?1:0)+ (method.is_protected?1:0)+ @@ -1401,3 +1448,42 @@ bool java_bytecode_parse( return java_bytecode_parse(in, parse_tree, message_handler); } + +/// Parses the local variable type table of a method. The LVTT holds generic +/// type information for variables in the local variable table (LVT). At most as +/// many variables as present in the LVT can be in the LVTT. +void java_bytecode_parsert::parse_local_variable_type_table(methodt &method) +{ + u2 local_variable_type_table_length=read_u2(); + + INVARIANT( + local_variable_type_table_length<=method.local_variable_table.size(), + "Local variable type table cannot have more elements " + "than the local variable table."); + for(std::size_t i=0; ivar.name!=it->var.name || - pred_var->var.signature!=it->var.signature) + pred_var->var.descriptor!=it->var.descriptor) { // These sorts of infeasible edges can occur because // jsr handling is presently vague (any subroutine is @@ -714,7 +714,7 @@ void java_bytecode_convert_methodt::setup_local_variables( #ifdef DEBUG debug() << "jcm: setup-local-vars: m.is_static " - << m.is_static << " m.signature " << m.signature << eom; + << m.is_static << " m.descriptor " << m.descriptor << eom; debug() << "jcm: setup-local-vars: lv arg slots " << slots_for_parameters << eom; debug() << "jcm: setup-local-vars: lvt size " @@ -765,10 +765,15 @@ void java_bytecode_convert_methodt::setup_local_variables( #ifdef DEBUG debug() << "jcm: setup-local-vars: merged variable: idx " << v.var.index - << " name " << v.var.name << " v.var.signature '" - << v.var.signature << "' holes " << v.holes.size() << eom; + << " name " << v.var.name << " v.var.descriptor '" + << v.var.descriptor << "' holes " << v.holes.size() << eom; #endif - typet t=java_type_from_string(v.var.signature); + typet t; + if(v.var.signature.has_value()) + t=java_type_from_string(v.var.signature.value()); + else + t=java_type_from_string(v.var.descriptor); + std::ostringstream id_oss; id_oss << method_id << "::" << v.var.start_pc << "::" << v.var.name; irep_idt identifier(id_oss.str()); diff --git a/src/java_bytecode/java_types.cpp b/src/java_bytecode/java_types.cpp index b3f908a1ca8..e7c45dfc800 100644 --- a/src/java_bytecode/java_types.cpp +++ b/src/java_bytecode/java_types.cpp @@ -6,9 +6,6 @@ Author: Daniel Kroening, kroening@kroening.com \*******************************************************************/ -#include "java_types.h" - -#include #include #include @@ -16,6 +13,14 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include +#include + +#include "java_types.h" +#include "java_utils.h" + +#ifdef DEBUG +#include +#endif typet java_int_type() { @@ -114,7 +119,7 @@ bool is_java_array_tag(const irep_idt& tag) bool is_reference_type(const char t) { - return 'a' == t; + return 'a'==t; } typet java_type_from_char(char t) @@ -158,11 +163,32 @@ exprt java_bytecode_promotion(const exprt &expr) return typecast_exprt(expr, new_type); } -typet java_type_from_string(const std::string &src) +/// Transforms a string representation of a Java type into an internal type +/// representation thereof. +/// +/// Example use are object types like "Ljava/lang/Integer;", type variables like +/// "TE;" which require a non-empty \p class_name or generic types like +/// "Ljava/util/List;" or "Ljava/util/List;" +/// +/// \param src: the string representation as used in the class file +/// \param class_name: name of class to append to generic type variables +/// \returns internal type representation for GOTO programs +typet java_type_from_string( + const std::string &src, + const std::string &class_name) { if(src.empty()) return nil_typet(); + // a java type is encoded in different ways + // - a method type is encoded as '(...)R' where the parenthesis include the + // parameter types and R is the type of the return value + // - atomic types are encoded as single characters + // - array types are encoded as '[' followed by the type of the array + // content + // - object types are named with prefix 'L' and suffix ';', e.g., + // Ljava/lang/Object; + // - type variables are similar to object types but have the prefix 'T' switch(src[0]) { case '(': // function type @@ -174,7 +200,9 @@ typet java_type_from_string(const std::string &src) code_typet result; result.return_type()= - java_type_from_string(std::string(src, e_pos+1, std::string::npos)); + java_type_from_string( + std::string(src, e_pos+1, std::string::npos), + class_name); for(std::size_t i=1; i' + i=find_closing_delimiter(src, generic_open, '<', '>')+1; + else + i=src.find(';', i); // ends on ; break; } + + // parameter is an array else if(src[i]=='[') i++; + + // parameter is a type variable + else if(src[i]=='T') + i=src.find(';', i); // ends on ; + + // type is an atomic type (just one character) else break; } std::string sub_str=src.substr(start, i-start+1); - param.type()=java_type_from_string(sub_str); + param.type()=java_type_from_string(sub_str, class_name); if(param.type().id()==ID_symbol) param.type()=java_reference_type(param.type()); @@ -215,7 +259,7 @@ typet java_type_from_string(const std::string &src) return nil_typet(); char subtype_letter=src[1]; const typet subtype= - java_type_from_string(src.substr(1, std::string::npos)); + java_type_from_string(src.substr(1, std::string::npos), class_name); if(subtype_letter=='L' || // [L denotes a reference array of some sort. subtype_letter=='[') // Array-of-arrays subtype_letter='A'; @@ -233,7 +277,16 @@ typet java_type_from_string(const std::string &src) case 'Z': return java_boolean_type(); case 'V': return java_void_type(); case 'J': return java_long_type(); - + case 'T': + { + // parse name of type variable + INVARIANT(src[src.size()-1]==';', "Generic type name must end on ';'."); + PRECONDITION(!class_name.empty()); + irep_idt type_var_name(class_name+"::"+src.substr(1, src.size()-2)); + return java_generic_parametert( + type_var_name, + java_type_from_string("Ljava/lang/Object;").subtype()); + } case 'L': { // ends on ; @@ -241,6 +294,91 @@ typet java_type_from_string(const std::string &src) return nil_typet(); std::string class_name=src.substr(1, src.size()-2); + std::size_t f_pos=src.find('<', 1); + // get generic type information + if(f_pos!=std::string::npos) + { + std::size_t e_pos=find_closing_delimiter(src, f_pos, '<', '>'); + if(e_pos==std::string::npos) + return nil_typet(); + + // construct container type + std::string generic_container_class=src.substr(1, f_pos-1); + + for(unsigned i=0; i` a vector with two elements would be returned +/// +/// \returns vector of java types representing the generic type variables +std::vector java_generic_type_from_string( + const std::string &class_name, + const std::string &src) +{ + /// the class signature is of the form base_class + size_t signature_end=src.find('>'); + INVARIANT( + src[0]=='<' && signature_end!=std::string::npos, + "Class signature has unexpected format"); + std::string signature(src.substr(1, signature_end-1)); + + PRECONDITION(signature[signature.size()-1]==';'); + + std::vector types; + size_t var_sep=signature.find(';'); + while(var_sep!=std::string::npos) + { + size_t bound_sep=signature.find(':'); + INVARIANT(bound_sep!=std::string::npos, "No bound for type variable."); + std::string type_var_name( + "java::"+class_name+"::"+signature.substr(0, bound_sep)); + std::string bound_type(signature.substr(bound_sep+1, var_sep-bound_sep)); + + java_generic_parametert type_var_type( + type_var_name, + java_type_from_string(bound_type, class_name).subtype()); + + types.push_back(type_var_type); + signature=signature.substr(var_sep+1, std::string::npos); + var_sep=signature.find(';'); + } + return types; +} + // java/lang/Object -> java.lang.Object static std::string slash_to_dot(const std::string &src) { diff --git a/src/java_bytecode/java_types.h b/src/java_bytecode/java_types.h index 27ec5becbb4..15d4f0689b9 100644 --- a/src/java_bytecode/java_types.h +++ b/src/java_bytecode/java_types.h @@ -10,8 +10,11 @@ Author: Daniel Kroening, kroening@kroening.com #ifndef CPROVER_JAVA_BYTECODE_JAVA_TYPES_H #define CPROVER_JAVA_BYTECODE_JAVA_TYPES_H +#include #include #include +#include +#include class java_class_typet:public class_typet { @@ -66,8 +69,13 @@ bool is_reference_type(char t); // a reference typet java_type_from_char(char t); -typet java_type_from_string(const std::string &); +typet java_type_from_string( + const std::string &, + const std::string &class_name=""); char java_char_from_type(const typet &type); +std::vector java_generic_type_from_string( + const std::string &, + const std::string &); typet java_bytecode_promotion(const typet &); exprt java_bytecode_promotion(const exprt &); @@ -76,4 +84,270 @@ bool is_java_array_tag(const irep_idt& tag); bool is_valid_java_array(const struct_typet &type); + +/// class to hold a Java generic type +/// upper bound can specify type requirements +class java_generic_parametert:public reference_typet +{ +public: + typedef symbol_typet type_variablet; + typedef std::vector type_variablest; + + java_generic_parametert(const irep_idt &_type_var_name, const typet &_bound) : + // the reference_typet now needs a pointer width, here it uses the one + // defined in the reference_type function from c_types.cpp + reference_typet(_bound, config.ansi_c.pointer_width) + { + PRECONDITION(_bound.id()==ID_symbol); + set(ID_C_java_generic_parameter, true); + // bound must be symbol type + type_variables().push_back(symbol_typet(_type_var_name)); + } + + /// \return the type variable as symbol type + const type_variablet &type_variable() const + { + PRECONDITION(!type_variables().empty()); + return type_variables().front(); + } + + const type_variablest &type_variables() const + { + return (const type_variablest &)(find(ID_type_variables).get_sub()); + } + + type_variablest &type_variables() + { + return (type_variablest &)(add(ID_type_variables).get_sub()); + } +}; + +/// \param type: type the type to check +/// \return true if type is a generic Java parameter type, e.g., T in List +inline bool is_java_generic_parameter(const typet &type) +{ + return type.get_bool(ID_C_java_generic_parameter); +} + +/// \param type: source type +/// \return cast of type into a java_generic_parametert +inline const java_generic_parametert &to_java_generic_parameter( + const typet &type) +{ + PRECONDITION(is_java_generic_parameter(type)); + return static_cast(type); +} + +/// \param type: source type +/// \return cast of type into a java_generic_parameter +inline java_generic_parametert &to_java_generic_parameter(typet &type) +{ + PRECONDITION(is_java_generic_parameter(type)); + return static_cast(type); +} + +/// class to hold an instantiated type variable bound is exact, for example the +/// `java.lang.Integer` type in a `List` +class java_generic_inst_parametert:public java_generic_parametert +{ +public: + // uses empty name for base type variable java_generic_parametert as real name + // is not known here + explicit java_generic_inst_parametert(const typet &type) : + java_generic_parametert(irep_idt(), type) + { + set(ID_C_java_generic_inst_parameter, true); + } +}; + +/// \param type: the type to check +/// \return true if type is an instantiated generic Java type, e.g., the Integer +/// in List +inline bool is_java_generic_inst_parameter(const typet &type) +{ + return type.get_bool(ID_C_java_generic_inst_parameter); +} + +/// \param type: source type +/// \return cast of type into an instantiated java_generic_parameter +inline const java_generic_inst_parametert &to_java_generic_inst_parameter( + const typet &type) +{ + PRECONDITION( + type.id()==ID_pointer && + is_java_generic_inst_parameter(type)); + return static_cast(type); +} + +/// \param type: source type +/// \return cast of type into an instantiated java_generic_inst_parametert +inline java_generic_inst_parametert &to_java_generic_inst_parameter(typet &type) +{ + PRECONDITION( + type.id()==ID_pointer && + is_java_generic_inst_parameter(type)); + return static_cast(type); +} + +/// class to hold type with generic type variable, for example `java.util.List` +/// in either a reference of type List or List. The vector holds +/// the types of the type Variables, that is the vector has the length of the +/// number of type variables of the generic class. For example in `HashMap` it would contain two elements, each of type `java_generic_parametert`, +/// in `HashMap` it would contains two elements, the first of type +/// `java_generic_inst_parametert` and the second of type +/// `java_generic_parametert`. +class java_generic_typet:public reference_typet +{ +public: + typedef std::vector generic_type_variablest; + + explicit java_generic_typet(const typet &_type) : + reference_typet(_type, config.ansi_c.pointer_width) + { + set(ID_C_java_generic_type, true); + } + + + /// \return vector of type variables + const generic_type_variablest &generic_type_variables() const + { + return (const generic_type_variablest &)(find(ID_type_variables).get_sub()); + } + + /// \return vector of type variables + generic_type_variablest &generic_type_variables() + { + return (generic_type_variablest &)(add(ID_type_variables).get_sub()); + } +}; + + +/// \param type: the type to check +/// \return true if type is java type containing with generics, e.g., List +inline bool is_java_generic_type(const typet &type) +{ + return type.get_bool(ID_C_java_generic_type); +} + +/// \param type: source type +/// \return cast of type into java type with generics +inline const java_generic_typet &to_java_generic_type( + const typet &type) +{ + PRECONDITION( + type.id()==ID_pointer && + is_java_generic_type(type)); + return static_cast(type); +} + +/// \param type: source type +/// \return cast of type into java type with generics +inline java_generic_typet &to_java_generic_type(typet &type) +{ + PRECONDITION( + type.id()==ID_pointer && + is_java_generic_type(type)); + return static_cast(type); +} + +/// Class to hold a class with generics, extends the java class type with a +/// vector of java_generic_type variables. +/// +/// For example, a class definition `class MyGenericClass` +class java_generics_class_typet:public java_class_typet +{ + public: + typedef std::vector generic_typest; + + java_generics_class_typet() + { + set(ID_C_java_generics_class_type, true); + } + + const generic_typest &generic_types() const + { + return (const generic_typest &)(find(ID_generic_types).get_sub()); + } + + generic_typest &generic_types() + { + return (generic_typest &)(add(ID_generic_types).get_sub()); + } +}; + +/// \param type: the type to check +/// \return true if type is a java class type with generics +inline bool is_java_generics_class_type(const typet &type) +{ + return type.get_bool(ID_C_java_generics_class_type); +} + +/// \param type: the type to check +/// \return cast of type to java_generics_class_typet +inline const java_generics_class_typet &to_java_generics_class_type( + const java_class_typet &type) +{ + PRECONDITION(is_java_generics_class_type(type)); + return static_cast(type); +} + +/// \param type: source type +/// \return cast of type into a java class type with generics +inline java_generics_class_typet &to_java_generics_class_type( + java_class_typet &type) +{ + PRECONDITION(is_java_generics_class_type(type)); + return static_cast(type); +} + +/// Access information of instantiated type params of java instantiated type. +/// \param index: the index of the type variable +/// \param type: the type from which to extract the type variable +/// \return the type variable of t at the given index +inline const typet &java_generic_get_inst_type( + size_t index, + const java_generic_typet &type) +{ + const std::vector &gen_types= + type.generic_type_variables(); + PRECONDITION(index &gen_types=type.generic_types(); + + PRECONDITION(index &gen_types=type.generic_types(); + + PRECONDITION(index> +/// +/// \param src: the source string to scan +/// \param open_pos: the initial position of the opening delimiter from where to +/// start the search +/// \param open_char: the opening delimiter +/// \param close_char: the closing delimiter +/// \returns the index of the matching corresponding closing delimiter in \p src +size_t find_closing_delimiter( + const std::string &src, + size_t open_pos, + char open_char, + char close_char) +{ + // having the same opening and closing delimiter does not allow for hierarchic + // structuring + PRECONDITION(open_char!=close_char); + PRECONDITION(src[open_pos]==open_char); + size_t c_pos=open_pos+1; + const size_t end_pos=src.size()-1; + size_t depth=0; + + while(c_pos<=end_pos) + { + if(src[c_pos]=='<') + depth++; + else if(src[c_pos]=='>') + { + if(depth==0) + return c_pos; + depth--; + } + c_pos++; + // limit depth to sensible values + INVARIANT( + depth<=(src.size()-open_pos), + "No closing \'"+std::to_string(close_char)+ + "\' found in signature to parse."); + } + // did not find corresponding closing '>' + return std::string::npos; +} diff --git a/src/java_bytecode/java_utils.h b/src/java_bytecode/java_utils.h index ed26fe1caf6..9428494db93 100644 --- a/src/java_bytecode/java_utils.h +++ b/src/java_bytecode/java_utils.h @@ -66,4 +66,10 @@ irep_idt resolve_friendly_method_name( /// \param type: expected result type (typically expr.type().subtype()) dereference_exprt checked_dereference(const exprt &expr, const typet &type); +size_t find_closing_delimiter( + const std::string &src, + size_t position, + char open_char, + char close_char); + #endif // CPROVER_JAVA_BYTECODE_JAVA_UTILS_H diff --git a/src/util/irep_ids.def b/src/util/irep_ids.def index cb8e1c89dd3..f98ad9fda88 100644 --- a/src/util/irep_ids.def +++ b/src/util/irep_ids.def @@ -822,6 +822,12 @@ IREP_ID_ONE(array_replace) IREP_ID_ONE(switch_case_number) IREP_ID_ONE(java_array_access) IREP_ID_ONE(java_member_access) +IREP_ID_TWO(C_java_generic_parameter, #java_generic_parameter) +IREP_ID_TWO(C_java_generic_inst_parameter, #java_generic_inst_parameter) +IREP_ID_TWO(C_java_generic_type, #java_generic_type) +IREP_ID_TWO(C_java_generics_class_type, #java_generics_class_type) +IREP_ID_TWO(generic_types, #generic_types) +IREP_ID_TWO(type_variables, #type_variables) #undef IREP_ID_ONE #undef IREP_ID_TWO diff --git a/unit/Makefile b/unit/Makefile index 52c60febfda..48534bd106d 100644 --- a/unit/Makefile +++ b/unit/Makefile @@ -19,6 +19,7 @@ SRC += unit_tests.cpp \ analyses/does_remove_const/does_type_preserve_const_correctness.cpp \ analyses/does_remove_const/is_type_at_least_as_const_as.cpp \ java_bytecode/java_bytecode_convert_class/convert_abstract_class.cpp \ + java_bytecode/java_bytecode_parse_generics/parse_generic_class.cpp \ miniBDD_new.cpp \ solvers/refinement/string_constraint_generator_valueof/calculate_max_string_length.cpp \ solvers/refinement/string_constraint_generator_valueof/get_numeric_value_from_character.cpp \ diff --git a/unit/java_bytecode/java_bytecode_parse_generics/generics$bound_element.class b/unit/java_bytecode/java_bytecode_parse_generics/generics$bound_element.class new file mode 100644 index 00000000000..8076c290e12 Binary files /dev/null and b/unit/java_bytecode/java_bytecode_parse_generics/generics$bound_element.class differ diff --git a/unit/java_bytecode/java_bytecode_parse_generics/generics$element.class b/unit/java_bytecode/java_bytecode_parse_generics/generics$element.class new file mode 100644 index 00000000000..34cb36e36b4 Binary files /dev/null and b/unit/java_bytecode/java_bytecode_parse_generics/generics$element.class differ diff --git a/unit/java_bytecode/java_bytecode_parse_generics/generics.class b/unit/java_bytecode/java_bytecode_parse_generics/generics.class new file mode 100644 index 00000000000..4fde1b4d6c4 Binary files /dev/null and b/unit/java_bytecode/java_bytecode_parse_generics/generics.class differ diff --git a/unit/java_bytecode/java_bytecode_parse_generics/generics.java b/unit/java_bytecode/java_bytecode_parse_generics/generics.java new file mode 100644 index 00000000000..0a0af41c63a --- /dev/null +++ b/unit/java_bytecode/java_bytecode_parse_generics/generics.java @@ -0,0 +1,31 @@ +public class generics { + + class element { + E elem; + } + + class bound_element { + NUM elem; + NUM f() { + return elem; + } + void g(NUM e) { + elem=e; + } + } + + bound_element belem; + + Integer f(int n) { + + element e=new element<>(); + e.elem=n; + bound_element be=new bound_element<>(); + belem=new bound_element<>(); + be.elem=new Integer(n+1); + if(n>0) + return e.elem; + else + return be.elem; + } +} diff --git a/unit/java_bytecode/java_bytecode_parse_generics/parse_generic_class.cpp b/unit/java_bytecode/java_bytecode_parse_generics/parse_generic_class.cpp new file mode 100644 index 00000000000..fc0f46bbb73 --- /dev/null +++ b/unit/java_bytecode/java_bytecode_parse_generics/parse_generic_class.cpp @@ -0,0 +1,240 @@ +/*******************************************************************\ + + Module: Unit tests for parsing generic classes + + Author: DiffBlue Limited. All rights reserved. + +\*******************************************************************/ + +#include + +#include +#include + +#include +#include +#include +#include + +SCENARIO( + "java_bytecode_parse_generics", + "[core][java_bytecode][java_bytecode_parse_generics]") +{ + std::unique_ptrjava_lang(new_java_bytecode_language()); + + // Configure the path loading + cmdlinet command_line; + command_line.set( + "java-cp-include-files", + "./java_bytecode/java_bytecode_parse_generics"); + config.java.classpath.push_back( + "./java_bytecode/java_bytecode_parse_generics"); + + std::istringstream java_code_stream("ignored"); + null_message_handlert message_handler; + + // Configure the language, load the class files + java_lang->get_language_options(command_line); + java_lang->set_message_handler(message_handler); + java_lang->parse(java_code_stream, "generics.class"); + symbol_tablet new_symbol_table; + java_lang->typecheck(new_symbol_table, ""); + java_lang->final(new_symbol_table); + + GIVEN("Some class files with Generics") + { + WHEN("Parsing a class with type variable") + { + REQUIRE(new_symbol_table.has_symbol("java::generics$element")); + THEN("The symbol type should be generic") + { + const symbolt &class_symbol= + new_symbol_table.lookup("java::generics$element"); + const typet &symbol_type=class_symbol.type; + + REQUIRE(symbol_type.id()==ID_struct); + class_typet class_type=to_class_type(symbol_type); + REQUIRE(class_type.is_class()); + java_class_typet java_class_type=to_java_class_type(class_type); + REQUIRE(is_java_generics_class_type(java_class_type)); + java_generics_class_typet java_generics_class_type= + to_java_generics_class_type(java_class_type); + + const struct_union_typet::componentt &elem= + java_generics_class_type.get_component("elem"); + const typet &elem_type=java_class_type.component_type("elem"); + + REQUIRE(is_java_generic_parameter(elem_type)); + + REQUIRE(java_generics_class_type.generic_types().size()==1); + THEN("Type variable is named 'E'") + { + typet &type_var=java_generics_class_type.generic_types().front(); + REQUIRE(is_java_generic_parameter(type_var)); + java_generic_parametert generic_type_var= + to_java_generic_parameter(type_var); + REQUIRE( + generic_type_var.type_variable().get_identifier()== + "java::generics$element::E"); + typet &sub_type=generic_type_var.subtype(); + REQUIRE(sub_type.id()==ID_symbol); + symbol_typet &bound_type=to_symbol_type(sub_type); + REQUIRE(bound_type.get_identifier()=="java::java.lang.Object"); + } + } + } + } + + GIVEN("Some class files with generic type variable") + { + WHEN("Parsing a class with bounded type variable") + { + REQUIRE(new_symbol_table.has_symbol("java::generics$bound_element")); + THEN("The symbol type should be generic") + { + const symbolt &class_symbol= + new_symbol_table.lookup("java::generics$bound_element"); + const typet &symbol_type=class_symbol.type; + + REQUIRE(symbol_type.id()==ID_struct); + class_typet class_type=to_class_type(symbol_type); + REQUIRE(class_type.is_class()); + java_class_typet java_class_type=to_java_class_type(class_type); + REQUIRE(is_java_generics_class_type(java_class_type)); + java_generics_class_typet java_generics_class_type= + to_java_generics_class_type(java_class_type); + REQUIRE(java_generics_class_type.generic_types().size()==1); + typet &type_var=java_generics_class_type.generic_types().front(); + REQUIRE(is_java_generic_parameter(type_var)); + java_generic_parametert generic_type_var= + to_java_generic_parameter(type_var); + + REQUIRE( + generic_type_var.type_variable().get_identifier()== + "java::generics$bound_element::NUM"); + REQUIRE( + java_generics_class_type_var(0, java_generics_class_type)== + "java::generics$bound_element::NUM"); + THEN("Bound must be Number") + { + typet &sub_type=generic_type_var.subtype(); + REQUIRE(sub_type.id()==ID_symbol); + symbol_typet &bound_type=to_symbol_type(sub_type); + REQUIRE(bound_type.get_identifier()=="java::java.lang.Number"); + REQUIRE( + to_symbol_type( + java_generics_class_type_bound(0, java_generics_class_type)) + .get_identifier()=="java::java.lang.Number"); + } + + const struct_union_typet::componentt &elem= + java_generics_class_type.get_component("elem"); + const typet &elem_type=java_class_type.component_type("elem"); + + REQUIRE(is_java_generic_parameter(elem_type)); + } + } + } + + GIVEN("Some class files with generic type variable") + { + WHEN("Parsing a class with bounded type variable") + { + REQUIRE(new_symbol_table.has_symbol("java::generics")); + + THEN("The generic fields should be annotated with concrete types") + { + const symbolt &class_symbol=new_symbol_table.lookup("java::generics"); + const typet &symbol_type=class_symbol.type; + + REQUIRE(symbol_type.id()==ID_struct); + class_typet class_type=to_class_type(symbol_type); + REQUIRE(class_type.is_class()); + java_class_typet java_class_type=to_java_class_type(class_type); + REQUIRE(!is_java_generics_class_type(java_class_type)); + + const struct_union_typet::componentt &belem= + java_class_type.get_component("belem"); + const typet &belem_type=java_class_type.component_type("belem"); + + REQUIRE(belem_type!=nil_typet()); + REQUIRE(is_java_generic_type(belem_type)); + THEN("Field has instantiated type variable") + { + const java_generic_typet &container= + to_java_generic_type(belem_type); + + const std::vector &generic_types= + container.generic_type_variables(); + REQUIRE(generic_types.size()==1); + + const typet& inst_type=java_generic_get_inst_type(0, container); + + REQUIRE(inst_type.id()==ID_pointer); + const typet &inst_type_symbol=inst_type.subtype(); + REQUIRE(inst_type_symbol.id()==ID_symbol); + REQUIRE( + to_symbol_type(inst_type_symbol).get_identifier()== + "java::java.lang.Integer"); + } + } + } + } + + GIVEN("Some class files with Generics") + { + WHEN("Methods with generic signatures") + { + REQUIRE( + new_symbol_table + .has_symbol("java::generics$bound_element.f:()Ljava/lang/Number;")); + + THEN("The method should have generic return type") + { + const symbolt &method_symbol= + new_symbol_table + .lookup("java::generics$bound_element.f:()Ljava/lang/Number;"); + const typet &symbol_type=method_symbol.type; + + REQUIRE(symbol_type.id()==ID_code); + + const code_typet &code=to_code_type(symbol_type); + } + + REQUIRE( + new_symbol_table + .has_symbol("java::generics$bound_element.g:(Ljava/lang/Number;)V")); + + THEN("The method should have a generic parameter.") + { + const symbolt &method_symbol= + new_symbol_table + .lookup("java::generics$bound_element.g:(Ljava/lang/Number;)V"); + const typet &symbol_type=method_symbol.type; + + REQUIRE(symbol_type.id()==ID_code); + + const code_typet &code=to_code_type(symbol_type); + + bool found=false; + for(const auto &p : code.parameters()) + { + if(p.get_identifier()== + "java::generics$bound_element.g:(Ljava/lang/Number;)V::e") + { + found=true; + const typet &t=p.type(); + REQUIRE(is_java_generic_parameter(p.type())); + const java_generic_parametert &gen_type= + to_java_generic_parameter(p.type()); + const symbol_typet &type_var=gen_type.type_variable(); + REQUIRE(type_var.get_identifier()== + "java::generics$bound_element::NUM"); + break; + } + } + REQUIRE(found); + } + } + } +}