From d70afb77237ea98b4a3935e6d2dc5b21e1e85cf9 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 24 Oct 2016 12:27:56 +0100 Subject: [PATCH 01/27] Skip already-loaded Java classes --- src/java_bytecode/java_bytecode_convert_class.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/java_bytecode/java_bytecode_convert_class.cpp b/src/java_bytecode/java_bytecode_convert_class.cpp index 8d42611bfe0..b62c5746765 100644 --- a/src/java_bytecode/java_bytecode_convert_class.cpp +++ b/src/java_bytecode/java_bytecode_convert_class.cpp @@ -75,6 +75,13 @@ Function: java_bytecode_convert_classt::convert void java_bytecode_convert_classt::convert(const classt &c) { + std::string qualified_classname="java::"+id2string(c.name); + if(symbol_table.has_symbol(qualified_classname)) + { + debug() << "Skip class " << c.name << " (already loaded)" << eom; + return; + } + class_typet class_type; class_type.set_tag(c.name); @@ -107,7 +114,7 @@ void java_bytecode_convert_classt::convert(const classt &c) symbolt new_symbol; new_symbol.base_name=c.name; new_symbol.pretty_name=c.name; - new_symbol.name="java::"+id2string(c.name); + new_symbol.name=qualified_classname; class_type.set(ID_name, new_symbol.name); new_symbol.type=class_type; new_symbol.mode=ID_java; From 11ff2b283b57567d097a75844b5dc23040883303 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Tue, 1 Nov 2016 13:48:01 +0000 Subject: [PATCH 02/27] Convert Java methods only when they have a caller This means that library methods, for example, will often not be converted. For the time being virtual methods calls are handled simply, assuming that any class we know of that provides an override might be called. --- .../java_bytecode_convert_class.cpp | 35 ++++++--- .../java_bytecode_convert_class.h | 6 +- .../java_bytecode_convert_method.cpp | 57 +++++++++++++-- .../java_bytecode_convert_method.h | 12 ++- .../java_bytecode_convert_method_class.h | 12 ++- src/java_bytecode/java_bytecode_language.cpp | 73 ++++++++++++++++++- src/java_bytecode/java_entry_point.cpp | 8 +- src/java_bytecode/java_entry_point.h | 3 +- 8 files changed, 179 insertions(+), 27 deletions(-) diff --git a/src/java_bytecode/java_bytecode_convert_class.cpp b/src/java_bytecode/java_bytecode_convert_class.cpp index b62c5746765..2036bcd9bce 100644 --- a/src/java_bytecode/java_bytecode_convert_class.cpp +++ b/src/java_bytecode/java_bytecode_convert_class.cpp @@ -27,11 +27,13 @@ class java_bytecode_convert_classt:public messaget symbol_tablet &_symbol_table, message_handlert &_message_handler, bool _disable_runtime_checks, - size_t _max_array_length): + size_t _max_array_length, + lazy_methodst& _lm): messaget(_message_handler), symbol_table(_symbol_table), disable_runtime_checks(_disable_runtime_checks), - max_array_length(_max_array_length) + max_array_length(_max_array_length), + lazy_methods(_lm) { } @@ -52,6 +54,7 @@ class java_bytecode_convert_classt:public messaget symbol_tablet &symbol_table; const bool disable_runtime_checks; const size_t max_array_length; + lazy_methodst &lazy_methods; // conversion void convert(const classt &c); @@ -81,7 +84,7 @@ void java_bytecode_convert_classt::convert(const classt &c) debug() << "Skip class " << c.name << " (already loaded)" << eom; return; } - + class_typet class_type; class_type.set_tag(c.name); @@ -135,13 +138,19 @@ void java_bytecode_convert_classt::convert(const classt &c) // now do methods for(const auto &method : c.methods) - java_bytecode_convert_method( - *class_symbol, - method, - symbol_table, - get_message_handler(), - disable_runtime_checks, - max_array_length); + { + const irep_idt method_identifier= + id2string(qualified_classname)+ + "."+id2string(method.name)+ + ":"+method.signature; + java_bytecode_convert_method_lazy( + *class_symbol,method_identifier,method,symbol_table); + lazy_methods[method_identifier]= + std::make_pair(class_symbol,&method); + } + //java_bytecode_convert_method( + // *class_symbol, method, symbol_table, get_message_handler(), + // disable_runtime_checks, max_array_length); // is this a root class? if(c.extends.empty()) @@ -329,13 +338,15 @@ bool java_bytecode_convert_class( symbol_tablet &symbol_table, message_handlert &message_handler, bool disable_runtime_checks, - size_t max_array_length) + size_t max_array_length, + lazy_methodst &lazy_methods) { java_bytecode_convert_classt java_bytecode_convert_class( symbol_table, message_handler, disable_runtime_checks, - max_array_length); + max_array_length, + lazy_methods); try { diff --git a/src/java_bytecode/java_bytecode_convert_class.h b/src/java_bytecode/java_bytecode_convert_class.h index ffaaf6e34da..8c1dccc02f5 100644 --- a/src/java_bytecode/java_bytecode_convert_class.h +++ b/src/java_bytecode/java_bytecode_convert_class.h @@ -14,11 +14,15 @@ Author: Daniel Kroening, kroening@kroening.com #include "java_bytecode_parse_tree.h" +typedef std::map > + lazy_methodst; + bool java_bytecode_convert_class( const java_bytecode_parse_treet &parse_tree, symbol_tablet &symbol_table, message_handlert &message_handler, bool disable_runtime_checks, - size_t max_array_length); + size_t max_array_length, + lazy_methodst &); #endif // CPROVER_JAVA_BYTECODE_JAVA_BYTECODE_CONVERT_CLASS_H diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index f9cb121091b..cd1cfc7729a 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -21,6 +21,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include #include #include "java_bytecode_convert_method.h" @@ -177,6 +178,35 @@ const exprt java_bytecode_convert_methodt::variable( } } +void java_bytecode_convert_method_lazy( + const symbolt& class_symbol, + const irep_idt method_identifier, + const java_bytecode_parse_treet::methodt &m, + symbol_tablet& symbol_table) +{ + symbolt method_symbol; + typet member_type=java_type_from_string(m.signature); + + method_symbol.name=method_identifier; + method_symbol.base_name=m.base_name; + method_symbol.mode=ID_java; + method_symbol.location=m.source_location; + method_symbol.location.set_function(method_identifier); + + if(method_symbol.base_name=="") + { + method_symbol.pretty_name=id2string(class_symbol.pretty_name)+"."+ + id2string(class_symbol.base_name)+"()"; + member_type.set(ID_constructor,true); + } + else + method_symbol.pretty_name=id2string(class_symbol.pretty_name)+"."+ + id2string(m.base_name)+"()"; + + method_symbol.type=member_type; + symbol_table.add(method_symbol); +} + /*******************************************************************\ Function: java_bytecode_convert_methodt::convert @@ -343,10 +373,10 @@ void java_bytecode_convert_methodt::convert( if((!m.is_abstract) && (!m.is_native)) method_symbol.value=convert_instructions(m, code_type); - // do we have the method symbol already? + // Replace the existing stub symbol with the real deal: const auto s_it=symbol_table.symbols.find(method.get_name()); - if(s_it!=symbol_table.symbols.end()) - symbol_table.symbols.erase(s_it); // erase, we stubbed it + assert(s_it!=symbol_table.symbols.end()); + symbol_table.symbols.erase(s_it); symbol_table.add(method_symbol); } @@ -1109,12 +1139,25 @@ codet java_bytecode_convert_methodt::convert_instructions( symbol_table.add(symbol); } + needed_methods.push_back(arg0.get(ID_identifier)); + if(is_virtual) { // dynamic binding assert(use_this); assert(!call.arguments().empty()); call.function()=arg0; + const auto& call_class=arg0.get(ID_C_class); + assert(call_class!=irep_idt()); + const auto& call_basename=arg0.get(ID_component_name); + assert(call_basename!=irep_idt()); + auto child_classes=class_hierarchy.get_children_trans(call_class); + for(const auto& child_class : child_classes) + { + auto methodid=id2string(child_class)+"."+id2string(call_basename); + if(symbol_table.has_symbol(methodid)) + needed_methods.push_back(methodid); + } } else { @@ -2140,13 +2183,17 @@ void java_bytecode_convert_method( symbol_tablet &symbol_table, message_handlert &message_handler, bool disable_runtime_checks, - size_t max_array_length) + size_t max_array_length, + std::vector& needed_methods, + const class_hierarchyt& ch) { java_bytecode_convert_methodt java_bytecode_convert_method( symbol_table, message_handler, disable_runtime_checks, - max_array_length); + max_array_length, + needed_methods, + ch); java_bytecode_convert_method(class_symbol, method); } diff --git a/src/java_bytecode/java_bytecode_convert_method.h b/src/java_bytecode/java_bytecode_convert_method.h index 8945af95bd1..327bfd09932 100644 --- a/src/java_bytecode/java_bytecode_convert_method.h +++ b/src/java_bytecode/java_bytecode_convert_method.h @@ -14,12 +14,22 @@ Author: Daniel Kroening, kroening@kroening.com #include "java_bytecode_parse_tree.h" +class class_hierarchyt; + void java_bytecode_convert_method( const symbolt &class_symbol, const java_bytecode_parse_treet::methodt &, symbol_tablet &symbol_table, message_handlert &message_handler, bool disable_runtime_checks, - size_t max_array_length); + size_t max_array_length, + std::vector& needed_methods, + const class_hierarchyt&); + +void java_bytecode_convert_method_lazy( + const symbolt &class_symbol, + const irep_idt method_identifier, + const java_bytecode_parse_treet::methodt &, + symbol_tablet &symbol_table); #endif // CPROVER_JAVA_BYTECODE_JAVA_BYTECODE_CONVERT_METHOD_H diff --git a/src/java_bytecode/java_bytecode_convert_method_class.h b/src/java_bytecode/java_bytecode_convert_method_class.h index 43f51269aab..4a42eb9f1d3 100644 --- a/src/java_bytecode/java_bytecode_convert_method_class.h +++ b/src/java_bytecode/java_bytecode_convert_method_class.h @@ -15,12 +15,14 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include "java_bytecode_parse_tree.h" +#include "java_bytecode_convert_class.h" #include #include class symbol_tablet; class symbolt; +class class_hierarchyt; class java_bytecode_convert_methodt:public messaget { @@ -29,11 +31,15 @@ class java_bytecode_convert_methodt:public messaget symbol_tablet &_symbol_table, message_handlert &_message_handler, bool _disable_runtime_checks, - size_t _max_array_length): + size_t _max_array_length, + std::vector& _needed_methods, + const class_hierarchyt& _ch): messaget(_message_handler), symbol_table(_symbol_table), disable_runtime_checks(_disable_runtime_checks), - max_array_length(_max_array_length) + max_array_length(_max_array_length), + needed_methods(_needed_methods), + class_hierarchy(_ch) { } @@ -52,6 +58,8 @@ class java_bytecode_convert_methodt:public messaget symbol_tablet &symbol_table; const bool disable_runtime_checks; const size_t max_array_length; + std::vector& needed_methods; + const class_hierarchyt& class_hierarchy; irep_idt method_id; irep_idt current_method; diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index 1d9d9f57b41..fb0e19ef487 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -14,8 +14,11 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include + #include "java_bytecode_language.h" #include "java_bytecode_convert_class.h" +#include "java_bytecode_convert_method.h" #include "java_bytecode_internal_additions.h" #include "java_bytecode_typecheck.h" #include "java_entry_point.h" @@ -180,10 +183,15 @@ Function: java_bytecode_languaget::typecheck \*******************************************************************/ + + bool java_bytecode_languaget::typecheck( symbol_tablet &symbol_table, const std::string &module) { + std::map > + lazy_methods; + // first convert all for(java_class_loadert::class_mapt::const_iterator c_it=java_class_loader.class_map.begin(); @@ -200,10 +208,73 @@ bool java_bytecode_languaget::typecheck( symbol_table, get_message_handler(), disable_runtime_checks, - max_user_array_length)) + max_user_array_length, + lazy_methods)) return true; } + // Now incrementally elaborate methods that are reachable from this entry point: + + // Convert-method will need this to find virtual function targets. + class_hierarchyt ch; + ch(symbol_table); + + std::vector worklist1; + std::vector worklist2; + + auto main_function=get_main_symbol(symbol_table,main_class,get_message_handler(),true); + if(main_function.stop_convert) + { + // Failed, mark all functions in the given main class reachable. + const auto& methods=java_class_loader.class_map.at(main_class).parsed_class.methods; + for(const auto& method : methods) + { + const irep_idt methodid="java::"+id2string(main_class)+"."+ + id2string(method.name)+":"+ + id2string(method.signature); + worklist2.push_back(methodid); + } + } + else + worklist2.push_back(main_function.main_function.name); + + std::set already_populated; + while(worklist2.size()!=0) + { + std::swap(worklist1,worklist2); + for(const auto& mname : worklist1) + { + if(!already_populated.insert(mname).second) + continue; + auto findit=lazy_methods.find(mname); + if(findit==lazy_methods.end()) + { + debug() << "Skip " << mname << eom; + continue; + } + debug() << "Lazy methods: elaborate " << mname << eom; + const auto& parsed_method=findit->second; + java_bytecode_convert_method(*parsed_method.first,*parsed_method.second, + symbol_table,get_message_handler(), + disable_runtime_checks,max_user_array_length,worklist2,ch); + } + worklist1.clear(); + } + + // Remove symbols for methods that were declared but never used: + symbol_tablet keep_symbols; + + for(const auto& sym : symbol_table.symbols) + { + if(lazy_methods.count(sym.first) && !already_populated.count(sym.first)) + continue; + keep_symbols.add(sym.second); + } + + debug() << "Lazy methods: removed " << symbol_table.symbols.size() - keep_symbols.symbols.size() << " unreachable methods" << eom; + + symbol_table.swap(keep_symbols); + // now typecheck all if(java_bytecode_typecheck( symbol_table, get_message_handler())) diff --git a/src/java_bytecode/java_entry_point.cpp b/src/java_bytecode/java_entry_point.cpp index d4d40beff3b..3a687ad57f6 100644 --- a/src/java_bytecode/java_entry_point.cpp +++ b/src/java_bytecode/java_entry_point.cpp @@ -326,11 +326,11 @@ void java_record_outputs( } } - main_function_resultt get_main_symbol( symbol_tablet &symbol_table, const irep_idt &main_class, - message_handlert &message_handler) + message_handlert &message_handler, + bool allow_no_body) { symbolt symbol; main_function_resultt res; @@ -414,7 +414,7 @@ main_function_resultt get_main_symbol( } // check if it has a body - if(symbol.value.is_nil()) + if(symbol.value.is_nil() && !allow_no_body) { message.error() << "main method `" << main_class << "' has no body" << messaget::eom; @@ -479,7 +479,7 @@ main_function_resultt get_main_symbol( symbol=symbol_table.symbols.find(*matches.begin())->second; // check if it has a body - if(symbol.value.is_nil()) + if(symbol.value.is_nil() && !allow_no_body) { message.error() << "main method `" << main_class << "' has no body" << messaget::eom; diff --git a/src/java_bytecode/java_entry_point.h b/src/java_bytecode/java_entry_point.h index 7fa562de89a..82071aa26b1 100644 --- a/src/java_bytecode/java_entry_point.h +++ b/src/java_bytecode/java_entry_point.h @@ -28,6 +28,7 @@ typedef struct main_function_resultt get_main_symbol( symbol_tablet &symbol_table, const irep_idt &main_class, - message_handlert &); + message_handlert &, + bool allow_no_body = false); #endif // CPROVER_JAVA_BYTECODE_JAVA_ENTRY_POINT_H From fcd03f94ab8002127a48be4492b0308dec065472 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Tue, 1 Nov 2016 14:19:53 +0000 Subject: [PATCH 03/27] Remove dead global variables When lazy method conversion is in use, they might end up unused. --- src/java_bytecode/java_bytecode_language.cpp | 21 +++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index fb0e19ef487..fc5ee421343 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -171,6 +171,19 @@ bool java_bytecode_languaget::parse( return false; } +static void gather_needed_globals(const exprt& e, const symbol_tablet& symbol_table, symbol_tablet& needed) +{ + if(e.id()==ID_symbol) + { + const auto& sym=symbol_table.lookup(to_symbol_expr(e).get_identifier()); + if(sym.is_static_lifetime) + needed.add(sym); + } + else + forall_operands(opit,e) + gather_needed_globals(*opit,symbol_table,needed); +} + /*******************************************************************\ Function: java_bytecode_languaget::typecheck @@ -183,8 +196,6 @@ Function: java_bytecode_languaget::typecheck \*******************************************************************/ - - bool java_bytecode_languaget::typecheck( symbol_tablet &symbol_table, const std::string &module) @@ -266,12 +277,16 @@ bool java_bytecode_languaget::typecheck( for(const auto& sym : symbol_table.symbols) { + if(sym.second.is_static_lifetime) + continue; if(lazy_methods.count(sym.first) && !already_populated.count(sym.first)) continue; + if(sym.second.type.id()==ID_code) + gather_needed_globals(sym.second.value,symbol_table,keep_symbols); keep_symbols.add(sym.second); } - debug() << "Lazy methods: removed " << symbol_table.symbols.size() - keep_symbols.symbols.size() << " unreachable methods" << eom; + debug() << "Lazy methods: removed " << symbol_table.symbols.size() - keep_symbols.symbols.size() << " unreachable methods and globals" << eom; symbol_table.swap(keep_symbols); From 7f752bb55b258a4d4cfea6d2034071dc8e7bd5b2 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Wed, 2 Nov 2016 17:02:41 +0000 Subject: [PATCH 04/27] Restrict virtual method targets to known-created types Previously even with lazy method conversion enabled, if we had a caller to A.f and C.f overrides A.f, we would generate a possible call to C.f even if a C is never instantiated in any reachable method. This commit changes that, such that we need *both* a callsite and evidence of a C being constructed to generate the callgraph edge. --- .../java_bytecode_convert_method.cpp | 24 ++- .../java_bytecode_convert_method.h | 1 + .../java_bytecode_convert_method_class.h | 3 + src/java_bytecode/java_bytecode_language.cpp | 156 +++++++++++++++--- 4 files changed, 148 insertions(+), 36 deletions(-) diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index cd1cfc7729a..0d5198ad4b8 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -1063,7 +1063,10 @@ codet java_bytecode_convert_methodt::convert_instructions( { if(as_string(arg0.get(ID_identifier)) .find("")!=std::string::npos) + { + needed_classes.insert(classname); code_type.set(ID_constructor, true); + } else code_type.set(ID_java_super_method_call, true); } @@ -1139,30 +1142,19 @@ codet java_bytecode_convert_methodt::convert_instructions( symbol_table.add(symbol); } - needed_methods.push_back(arg0.get(ID_identifier)); - if(is_virtual) { // dynamic binding assert(use_this); assert(!call.arguments().empty()); call.function()=arg0; - const auto& call_class=arg0.get(ID_C_class); - assert(call_class!=irep_idt()); - const auto& call_basename=arg0.get(ID_component_name); - assert(call_basename!=irep_idt()); - auto child_classes=class_hierarchy.get_children_trans(call_class); - for(const auto& child_class : child_classes) - { - auto methodid=id2string(child_class)+"."+id2string(call_basename); - if(symbol_table.has_symbol(methodid)) - needed_methods.push_back(methodid); - } + // Populate needed methods later, once we know what object types can exist. } else { // static binding call.function()=symbol_exprt(arg0.get(ID_identifier), arg0.type()); + needed_methods.push_back(arg0.get(ID_identifier)); } call.function().add_source_location()=loc; @@ -1704,6 +1696,8 @@ codet java_bytecode_convert_methodt::convert_instructions( symbol_exprt symbol_expr(arg0.type()); const auto &field_name=arg0.get_string(ID_component_name); symbol_expr.set_identifier(arg0.get_string(ID_class)+"."+field_name); + if(arg0.type().id()==ID_symbol) + needed_classes.insert(to_symbol_type(arg0.type()).get_identifier()); results[0]=java_bytecode_promotion(symbol_expr); // set $assertionDisabled to false @@ -1721,6 +1715,8 @@ codet java_bytecode_convert_methodt::convert_instructions( symbol_exprt symbol_expr(arg0.type()); const auto &field_name=arg0.get_string(ID_component_name); symbol_expr.set_identifier(arg0.get_string(ID_class)+"."+field_name); + if(arg0.type().id()==ID_symbol) + needed_classes.insert(to_symbol_type(arg0.type()).get_identifier()); c=code_assignt(symbol_expr, op[0]); } else if(statement==patternt("?2?")) // i2c etc. @@ -2185,6 +2181,7 @@ void java_bytecode_convert_method( bool disable_runtime_checks, size_t max_array_length, std::vector& needed_methods, + std::set& needed_classes, const class_hierarchyt& ch) { java_bytecode_convert_methodt java_bytecode_convert_method( @@ -2193,6 +2190,7 @@ void java_bytecode_convert_method( disable_runtime_checks, max_array_length, needed_methods, + needed_classes, ch); java_bytecode_convert_method(class_symbol, method); diff --git a/src/java_bytecode/java_bytecode_convert_method.h b/src/java_bytecode/java_bytecode_convert_method.h index 327bfd09932..b6302161a43 100644 --- a/src/java_bytecode/java_bytecode_convert_method.h +++ b/src/java_bytecode/java_bytecode_convert_method.h @@ -24,6 +24,7 @@ void java_bytecode_convert_method( bool disable_runtime_checks, size_t max_array_length, std::vector& needed_methods, + std::set& needed_classes, const class_hierarchyt&); void java_bytecode_convert_method_lazy( diff --git a/src/java_bytecode/java_bytecode_convert_method_class.h b/src/java_bytecode/java_bytecode_convert_method_class.h index 4a42eb9f1d3..b49d22934b1 100644 --- a/src/java_bytecode/java_bytecode_convert_method_class.h +++ b/src/java_bytecode/java_bytecode_convert_method_class.h @@ -33,12 +33,14 @@ class java_bytecode_convert_methodt:public messaget bool _disable_runtime_checks, size_t _max_array_length, std::vector& _needed_methods, + std::set& _needed_classes, const class_hierarchyt& _ch): messaget(_message_handler), symbol_table(_symbol_table), disable_runtime_checks(_disable_runtime_checks), max_array_length(_max_array_length), needed_methods(_needed_methods), + needed_classes(_needed_classes), class_hierarchy(_ch) { } @@ -59,6 +61,7 @@ class java_bytecode_convert_methodt:public messaget const bool disable_runtime_checks; const size_t max_array_length; std::vector& needed_methods; + std::set& needed_classes; const class_hierarchyt& class_hierarchy; irep_idt method_id; diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index fc5ee421343..057859cef04 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -171,6 +171,48 @@ bool java_bytecode_languaget::parse( return false; } +static void get_virtual_method_targets( + const code_function_callt& c, + const std::set& needed_classes, + std::vector& needed_methods, + const symbol_tablet& symbol_table, + const class_hierarchyt& class_hierarchy) +{ + + const auto& called_function=c.function(); + assert(called_function.id()==ID_virtual_function); + + const auto& call_class=called_function.get(ID_C_class); + assert(call_class!=irep_idt()); + const auto& call_basename=called_function.get(ID_component_name); + assert(call_basename!=irep_idt()); + + auto child_classes=class_hierarchy.get_children_trans(call_class); + child_classes.push_back(call_class); + for(const auto& child_class : child_classes) + { + // Program-wide, is this class ever instantiated? + if(!needed_classes.count(child_class)) + continue; + auto methodid=id2string(child_class)+"."+id2string(call_basename); + if(symbol_table.has_symbol(methodid)) + needed_methods.push_back(methodid); + } +} + +static void gather_virtual_callsites(const exprt& e, std::vector& result) +{ + if(e.id()!=ID_code) + return; + const codet& c=to_code(e); + if(c.get_statement()==ID_function_call && + to_code_function_call(c).function().id()==ID_virtual_function) + result.push_back(&to_code_function_call(c)); + else + forall_operands(it,e) + gather_virtual_callsites(*it,result); +} + static void gather_needed_globals(const exprt& e, const symbol_tablet& symbol_table, symbol_tablet& needed) { if(e.id()==ID_symbol) @@ -184,6 +226,48 @@ static void gather_needed_globals(const exprt& e, const symbol_tablet& symbol_ta gather_needed_globals(*opit,symbol_table,needed); } +static void gather_field_types( + const typet& class_type, + const namespacet& ns, + std::set& needed_classes) +{ + const auto& underlying_type=to_struct_type(ns.follow(class_type)); + for(const auto& field : underlying_type.components()) + { + if(field.type().id()==ID_struct || field.type().id()==ID_symbol) + gather_field_types(field.type(),ns,needed_classes); + else if(field.type().id()==ID_pointer) + { + // Skip array primitive pointers, for example: + if(field.type().subtype().id()!=ID_symbol) + continue; + const auto& field_classid=to_symbol_type(field.type().subtype()).get_identifier(); + if(needed_classes.insert(field_classid).second) + gather_field_types(field.type().subtype(),ns,needed_classes); + } + } +} + +static void initialise_needed_classes( + const std::vector& entry_points, + const namespacet& ns, + std::set& needed_classes) +{ + for(const auto& mname : entry_points) + { + const auto& symbol=ns.lookup(mname); + const auto& mtype=to_code_type(symbol.type); + for(const auto& param : mtype.parameters()) + { + if(param.type().id()==ID_pointer) + { + needed_classes.insert(to_symbol_type(param.type().subtype()).get_identifier()); + gather_field_types(param.type().subtype(),ns,needed_classes); + } + } + } +} + /*******************************************************************\ Function: java_bytecode_languaget::typecheck @@ -230,8 +314,8 @@ bool java_bytecode_languaget::typecheck( class_hierarchyt ch; ch(symbol_table); - std::vector worklist1; - std::vector worklist2; + std::vector method_worklist1; + std::vector method_worklist2; auto main_function=get_main_symbol(symbol_table,main_class,get_message_handler(),true); if(main_function.stop_convert) @@ -243,34 +327,60 @@ bool java_bytecode_languaget::typecheck( const irep_idt methodid="java::"+id2string(main_class)+"."+ id2string(method.name)+":"+ id2string(method.signature); - worklist2.push_back(methodid); + method_worklist2.push_back(methodid); } } else - worklist2.push_back(main_function.main_function.name); + method_worklist2.push_back(main_function.main_function.name); - std::set already_populated; - while(worklist2.size()!=0) - { - std::swap(worklist1,worklist2); - for(const auto& mname : worklist1) + std::set needed_classes; + initialise_needed_classes(method_worklist2,namespacet(symbol_table),needed_classes); + + std::set methods_already_populated; + std::vector virtual_callsites; + + bool any_new_methods; + do { + + any_new_methods=false; + while(method_worklist2.size()!=0) { - if(!already_populated.insert(mname).second) - continue; - auto findit=lazy_methods.find(mname); - if(findit==lazy_methods.end()) + std::swap(method_worklist1,method_worklist2); + for(const auto& mname : method_worklist1) { - debug() << "Skip " << mname << eom; - continue; + if(!methods_already_populated.insert(mname).second) + continue; + auto findit=lazy_methods.find(mname); + if(findit==lazy_methods.end()) + { + debug() << "Skip " << mname << eom; + continue; + } + debug() << "Lazy methods: elaborate " << mname << eom; + const auto& parsed_method=findit->second; + java_bytecode_convert_method(*parsed_method.first,*parsed_method.second, + symbol_table,get_message_handler(), + disable_runtime_checks,max_user_array_length, + method_worklist2,needed_classes,ch); + gather_virtual_callsites(symbol_table.lookup(mname).value,virtual_callsites); + any_new_methods=true; } - debug() << "Lazy methods: elaborate " << mname << eom; - const auto& parsed_method=findit->second; - java_bytecode_convert_method(*parsed_method.first,*parsed_method.second, - symbol_table,get_message_handler(), - disable_runtime_checks,max_user_array_length,worklist2,ch); + method_worklist1.clear(); } - worklist1.clear(); - } + + // Given the object types we now know may be created, populate more + // possible virtual function call targets: + + debug() << "Lazy methods: add virtual method targets (" << virtual_callsites.size() << + " callsites)" << eom; + + for(const auto& callsite : virtual_callsites) + { + get_virtual_method_targets(*callsite,needed_classes,method_worklist2, + symbol_table,ch); + } + + } while(any_new_methods); // Remove symbols for methods that were declared but never used: symbol_tablet keep_symbols; @@ -279,7 +389,7 @@ bool java_bytecode_languaget::typecheck( { if(sym.second.is_static_lifetime) continue; - if(lazy_methods.count(sym.first) && !already_populated.count(sym.first)) + if(lazy_methods.count(sym.first) && !methods_already_populated.count(sym.first)) continue; if(sym.second.type.id()==ID_code) gather_needed_globals(sym.second.value,symbol_table,keep_symbols); From e1cb0c4d6f54eafa818565d64c03a76c6bc06eb7 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 21 Nov 2016 15:01:38 +0000 Subject: [PATCH 05/27] Mark all classes in root jar reachable if it doesn't declare a main function --- src/java_bytecode/java_bytecode_language.cpp | 24 ++++++++++++++------ src/java_bytecode/java_bytecode_language.h | 1 + src/java_bytecode/java_class_loader.h | 1 - 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index 057859cef04..dc6c350ba7f 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -150,6 +150,8 @@ bool java_bytecode_languaget::parse( { status() << "JAR file without entry point: loading it all" << eom; java_class_loader.load_entire_jar(path); + for(const auto& kv : java_class_loader.jar_map.at(path).entries) + main_jar_classes.push_back(kv.first); } else java_class_loader.add_jar_file(path); @@ -320,14 +322,22 @@ bool java_bytecode_languaget::typecheck( auto main_function=get_main_symbol(symbol_table,main_class,get_message_handler(),true); if(main_function.stop_convert) { - // Failed, mark all functions in the given main class reachable. - const auto& methods=java_class_loader.class_map.at(main_class).parsed_class.methods; - for(const auto& method : methods) + // Failed, mark all functions in the given main class(es) reachable. + std::vector reachable_classes; + if(!main_class.empty()) + reachable_classes.push_back(main_class); + else + reachable_classes=main_jar_classes; + for(const auto& classname : reachable_classes) { - const irep_idt methodid="java::"+id2string(main_class)+"."+ - id2string(method.name)+":"+ - id2string(method.signature); - method_worklist2.push_back(methodid); + const auto& methods=java_class_loader.class_map.at(classname).parsed_class.methods; + for(const auto& method : methods) + { + const irep_idt methodid="java::"+id2string(classname)+"."+ + id2string(method.name)+":"+ + id2string(method.signature); + method_worklist2.push_back(methodid); + } } } else diff --git a/src/java_bytecode/java_bytecode_language.h b/src/java_bytecode/java_bytecode_language.h index 5937f6f4fd4..36b28682dca 100644 --- a/src/java_bytecode/java_bytecode_language.h +++ b/src/java_bytecode/java_bytecode_language.h @@ -72,6 +72,7 @@ class java_bytecode_languaget:public languaget protected: irep_idt main_class; + std::vector main_jar_classes; java_class_loadert java_class_loader; bool assume_inputs_non_null; // assume inputs variables to be non-null diff --git a/src/java_bytecode/java_class_loader.h b/src/java_bytecode/java_class_loader.h index d3fcdd62810..b96097c504d 100644 --- a/src/java_bytecode/java_class_loader.h +++ b/src/java_bytecode/java_class_loader.h @@ -37,7 +37,6 @@ class java_class_loadert:public messaget jar_poolt jar_pool; -protected: class jar_map_entryt { public: From c10849c39dc8419c63c0fad501d7ba49e51e0ff5 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Wed, 23 Nov 2016 11:02:34 +0000 Subject: [PATCH 06/27] Add this-parameter before determining needed classes If the given entry point has a this-parameter of type A, that constitutes evidence that virtual callsites might target a class of type A. --- .../java_bytecode_convert_method.cpp | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index 0d5198ad4b8..3cc78a0a053 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -203,6 +203,18 @@ void java_bytecode_convert_method_lazy( method_symbol.pretty_name=id2string(class_symbol.pretty_name)+"."+ id2string(m.base_name)+"()"; + // do we need to add 'this' as a parameter? + if(!m.is_static) + { + code_typet &code_type=to_code_type(member_type); + code_typet::parameterst ¶meters=code_type.parameters(); + code_typet::parametert this_p; + const reference_typet object_ref_type(symbol_typet(class_symbol.name)); + this_p.type()=object_ref_type; + this_p.set_this(); + parameters.insert(parameters.begin(), this_p); + } + method_symbol.type=member_type; symbol_table.add(method_symbol); } @@ -223,29 +235,17 @@ void java_bytecode_convert_methodt::convert( const symbolt &class_symbol, const methodt &m) { - typet member_type=java_type_from_string(m.signature); - - assert(member_type.id()==ID_code); - const irep_idt method_identifier= id2string(class_symbol.name)+"."+id2string(m.name)+":"+m.signature; method_id=method_identifier; + const auto& old_sym=symbol_table.lookup(method_identifier); + + typet member_type=old_sym.type; code_typet &code_type=to_code_type(member_type); method_return_type=code_type.return_type(); code_typet::parameterst ¶meters=code_type.parameters(); - // do we need to add 'this' as a parameter? - if(!m.is_static) - { - code_typet::parametert this_p; - const reference_typet object_ref_type( - symbol_typet(class_symbol.name)); - this_p.type()=object_ref_type; - this_p.set_this(); - parameters.insert(parameters.begin(), this_p); - } - variables.clear(); // find parameter names in the local variable table: From 8d96409a92a21375d7f465b38226fabbfd6bed7c Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Wed, 30 Nov 2016 15:38:08 +0000 Subject: [PATCH 07/27] Lazy method conversion: load callee parent classes Previously lazy virtual callsite resolution would assume that in order to call B.f, it is necessary to instantiate a B and call A.f, where A is some parent of B. This commit amends that to allow creation of a C, where C is a subtype of B and does not override f itself, is enough for B.f to be callable. --- src/java_bytecode/java_bytecode_language.cpp | 84 +++++++++++++++++--- 1 file changed, 72 insertions(+), 12 deletions(-) diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index dc6c350ba7f..8e5ce448b57 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -173,14 +173,29 @@ bool java_bytecode_languaget::parse( return false; } +static irep_idt get_virtual_method_target( + const std::set& needed_classes, + const irep_idt& call_basename, + const irep_idt& classname, + const symbol_tablet& symbol_table) +{ + // Program-wide, is this class ever instantiated? + if(!needed_classes.count(classname)) + return irep_idt(); + auto methodid=id2string(classname)+"."+id2string(call_basename); + if(symbol_table.has_symbol(methodid)) + return methodid; + else + return irep_idt(); +} + static void get_virtual_method_targets( const code_function_callt& c, const std::set& needed_classes, std::vector& needed_methods, - const symbol_tablet& symbol_table, + symbol_tablet& symbol_table, const class_hierarchyt& class_hierarchy) { - const auto& called_function=c.function(); assert(called_function.id()==ID_virtual_function); @@ -189,17 +204,56 @@ static void get_virtual_method_targets( const auto& call_basename=called_function.get(ID_component_name); assert(call_basename!=irep_idt()); + auto old_size=needed_methods.size(); + auto child_classes=class_hierarchy.get_children_trans(call_class); - child_classes.push_back(call_class); for(const auto& child_class : child_classes) { - // Program-wide, is this class ever instantiated? - if(!needed_classes.count(child_class)) - continue; - auto methodid=id2string(child_class)+"."+id2string(call_basename); - if(symbol_table.has_symbol(methodid)) - needed_methods.push_back(methodid); + auto child_method=get_virtual_method_target(needed_classes,call_basename, + child_class,symbol_table); + if(child_method!=irep_idt()) + needed_methods.push_back(child_method); + } + + irep_idt parent_class_id=call_class; + while(1) + { + auto parent_method=get_virtual_method_target(needed_classes,call_basename, + parent_class_id,symbol_table); + if(parent_method!=irep_idt()) + { + needed_methods.push_back(parent_method); + break; + } + else + { + auto findit=class_hierarchy.class_map.find(parent_class_id); + if(findit==class_hierarchy.class_map.end()) + break; + else + { + const auto& entry=findit->second; + if(entry.parents.size()==0) + break; + else + parent_class_id=entry.parents[0]; + } + } } + + if(needed_methods.size()==old_size) + { + // Didn't find any candidate callee. Generate a stub. + std::string stubname=id2string(call_class)+"."+id2string(call_basename); + symbolt symbol; + symbol.name=stubname; + symbol.base_name=call_basename; + symbol.type=c.function().type(); + symbol.value.make_nil(); + symbol.mode=ID_java; + symbol_table.add(symbol); + } + } static void gather_virtual_callsites(const exprt& e, std::vector& result) @@ -253,6 +307,7 @@ static void gather_field_types( static void initialise_needed_classes( const std::vector& entry_points, const namespacet& ns, + const class_hierarchyt& ch, std::set& needed_classes) { for(const auto& mname : entry_points) @@ -263,8 +318,12 @@ static void initialise_needed_classes( { if(param.type().id()==ID_pointer) { - needed_classes.insert(to_symbol_type(param.type().subtype()).get_identifier()); - gather_field_types(param.type().subtype(),ns,needed_classes); + const auto& param_classid=to_symbol_type(param.type().subtype()).get_identifier(); + std::vector class_and_parents=ch.get_parents_trans(param_classid); + class_and_parents.push_back(param_classid); + for(const auto& classid : class_and_parents) + needed_classes.insert(classid); + gather_field_types(param.type().subtype(),ns,needed_classes); } } } @@ -344,7 +403,7 @@ bool java_bytecode_languaget::typecheck( method_worklist2.push_back(main_function.main_function.name); std::set needed_classes; - initialise_needed_classes(method_worklist2,namespacet(symbol_table),needed_classes); + initialise_needed_classes(method_worklist2,namespacet(symbol_table),ch,needed_classes); std::set methods_already_populated; std::vector virtual_callsites; @@ -386,6 +445,7 @@ bool java_bytecode_languaget::typecheck( for(const auto& callsite : virtual_callsites) { + // This will also create a stub if a virtual callsite has no targets. get_virtual_method_targets(*callsite,needed_classes,method_worklist2, symbol_table,ch); } From 2024c34c1c697e1ec203c5bf60ead40b3eb41e83 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Tue, 17 Jan 2017 11:55:35 +0000 Subject: [PATCH 08/27] Always assume String, Class and Object are callable --- src/java_bytecode/java_bytecode_language.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index 8e5ce448b57..1655347f627 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -327,6 +327,13 @@ static void initialise_needed_classes( } } } + + // Also add classes whose instances are magically + // created by the JVM and so won't be spotted by + // looking for constructors and calls as usual: + needed_classes.insert("java::java.lang.String"); + needed_classes.insert("java::java.lang.Class"); + needed_classes.insert("java::java.lang.Object"); } /*******************************************************************\ From 6700a5646b75aa1913921e27aba74906300dcf23 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Tue, 17 Jan 2017 12:01:49 +0000 Subject: [PATCH 09/27] Don't try to call an abstract function --- src/goto-programs/remove_virtual_functions.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/goto-programs/remove_virtual_functions.cpp b/src/goto-programs/remove_virtual_functions.cpp index da9cce5039f..b24621fe668 100644 --- a/src/goto-programs/remove_virtual_functions.cpp +++ b/src/goto-programs/remove_virtual_functions.cpp @@ -339,7 +339,8 @@ void remove_virtual_functionst::get_functions( component_name, functions); - functions.push_back(root_function); + if(root_function.symbol_expr!=symbol_exprt()) + functions.push_back(root_function); } /*******************************************************************\ From 79ff6675ec5d79a4e19332e454da785994d5f977 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Wed, 25 Jan 2017 15:53:40 +0000 Subject: [PATCH 10/27] Make lazy method loading optional --- .../java_bytecode_convert_class.cpp | 42 +++++++--- .../java_bytecode_convert_class.h | 7 +- .../java_bytecode_convert_method.cpp | 23 +++--- .../java_bytecode_convert_method.h | 25 +++++- .../java_bytecode_convert_method_class.h | 14 ++-- src/java_bytecode/java_bytecode_language.cpp | 76 ++++++++++++++----- src/java_bytecode/java_bytecode_language.h | 17 +++++ src/util/language.h | 9 +++ src/util/language_file.cpp | 16 ++++ src/util/language_file.h | 23 ++++++ 10 files changed, 196 insertions(+), 56 deletions(-) diff --git a/src/java_bytecode/java_bytecode_convert_class.cpp b/src/java_bytecode/java_bytecode_convert_class.cpp index 2036bcd9bce..eb642baa846 100644 --- a/src/java_bytecode/java_bytecode_convert_class.cpp +++ b/src/java_bytecode/java_bytecode_convert_class.cpp @@ -14,6 +14,7 @@ Author: Daniel Kroening, kroening@kroening.com #include "java_root_class.h" #include "java_types.h" #include "java_bytecode_convert_method.h" +#include "java_bytecode_language.h" #include #include @@ -28,12 +29,14 @@ class java_bytecode_convert_classt:public messaget message_handlert &_message_handler, bool _disable_runtime_checks, size_t _max_array_length, - lazy_methodst& _lm): + lazy_methodst& _lm, + lazy_methods_modet _lazy_methods_mode): messaget(_message_handler), symbol_table(_symbol_table), disable_runtime_checks(_disable_runtime_checks), max_array_length(_max_array_length), - lazy_methods(_lm) + lazy_methods(_lm), + lazy_methods_mode(_lazy_methods_mode) { } @@ -55,6 +58,7 @@ class java_bytecode_convert_classt:public messaget const bool disable_runtime_checks; const size_t max_array_length; lazy_methodst &lazy_methods; + lazy_methods_modet lazy_methods_mode; // conversion void convert(const classt &c); @@ -143,14 +147,30 @@ void java_bytecode_convert_classt::convert(const classt &c) id2string(qualified_classname)+ "."+id2string(method.name)+ ":"+method.signature; + // Always run the lazy pre-stage, as it symbol-table + // registers the function. java_bytecode_convert_method_lazy( - *class_symbol,method_identifier,method,symbol_table); - lazy_methods[method_identifier]= - std::make_pair(class_symbol,&method); + *class_symbol, + method_identifier, + method, + symbol_table); + if(lazy_methods_mode==LAZY_METHODS_MODE_EAGER) + { + // Upgrade to a fully-realized symbol now: + java_bytecode_convert_method( + *class_symbol, + method, + symbol_table, + get_message_handler(), + disable_runtime_checks, + max_array_length); + } + else + { + // Wait for our caller to decide what needs elaborating. + lazy_methods[method_identifier]=std::make_pair(class_symbol, &method); + } } - //java_bytecode_convert_method( - // *class_symbol, method, symbol_table, get_message_handler(), - // disable_runtime_checks, max_array_length); // is this a root class? if(c.extends.empty()) @@ -339,14 +359,16 @@ bool java_bytecode_convert_class( message_handlert &message_handler, bool disable_runtime_checks, size_t max_array_length, - lazy_methodst &lazy_methods) + lazy_methodst &lazy_methods, + lazy_methods_modet lazy_methods_mode) { java_bytecode_convert_classt java_bytecode_convert_class( symbol_table, message_handler, disable_runtime_checks, max_array_length, - lazy_methods); + lazy_methods, + lazy_methods_mode); try { diff --git a/src/java_bytecode/java_bytecode_convert_class.h b/src/java_bytecode/java_bytecode_convert_class.h index 8c1dccc02f5..5cc1526a125 100644 --- a/src/java_bytecode/java_bytecode_convert_class.h +++ b/src/java_bytecode/java_bytecode_convert_class.h @@ -13,9 +13,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include "java_bytecode_parse_tree.h" - -typedef std::map > - lazy_methodst; +#include "java_bytecode_language.h" bool java_bytecode_convert_class( const java_bytecode_parse_treet &parse_tree, @@ -23,6 +21,7 @@ bool java_bytecode_convert_class( message_handlert &message_handler, bool disable_runtime_checks, size_t max_array_length, - lazy_methodst &); + lazy_methodst &, + lazy_methods_modet); #endif // CPROVER_JAVA_BYTECODE_JAVA_BYTECODE_CONVERT_CLASS_H diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index 3cc78a0a053..0151833007e 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -21,7 +21,6 @@ Author: Daniel Kroening, kroening@kroening.com #include #include -#include #include #include "java_bytecode_convert_method.h" @@ -1064,7 +1063,8 @@ codet java_bytecode_convert_methodt::convert_instructions( if(as_string(arg0.get(ID_identifier)) .find("")!=std::string::npos) { - needed_classes.insert(classname); + if(needed_classes) + needed_classes->insert(classname); code_type.set(ID_constructor, true); } else @@ -1154,7 +1154,8 @@ codet java_bytecode_convert_methodt::convert_instructions( { // static binding call.function()=symbol_exprt(arg0.get(ID_identifier), arg0.type()); - needed_methods.push_back(arg0.get(ID_identifier)); + if(needed_methods) + needed_methods->push_back(arg0.get(ID_identifier)); } call.function().add_source_location()=loc; @@ -1696,8 +1697,8 @@ codet java_bytecode_convert_methodt::convert_instructions( symbol_exprt symbol_expr(arg0.type()); const auto &field_name=arg0.get_string(ID_component_name); symbol_expr.set_identifier(arg0.get_string(ID_class)+"."+field_name); - if(arg0.type().id()==ID_symbol) - needed_classes.insert(to_symbol_type(arg0.type()).get_identifier()); + if(needed_classes && arg0.type().id()==ID_symbol) + needed_classes->insert(to_symbol_type(arg0.type()).get_identifier()); results[0]=java_bytecode_promotion(symbol_expr); // set $assertionDisabled to false @@ -1715,8 +1716,8 @@ codet java_bytecode_convert_methodt::convert_instructions( symbol_exprt symbol_expr(arg0.type()); const auto &field_name=arg0.get_string(ID_component_name); symbol_expr.set_identifier(arg0.get_string(ID_class)+"."+field_name); - if(arg0.type().id()==ID_symbol) - needed_classes.insert(to_symbol_type(arg0.type()).get_identifier()); + if(needed_classes && arg0.type().id()==ID_symbol) + needed_classes->insert(to_symbol_type(arg0.type()).get_identifier()); c=code_assignt(symbol_expr, op[0]); } else if(statement==patternt("?2?")) // i2c etc. @@ -2180,9 +2181,8 @@ void java_bytecode_convert_method( message_handlert &message_handler, bool disable_runtime_checks, size_t max_array_length, - std::vector& needed_methods, - std::set& needed_classes, - const class_hierarchyt& ch) + std::vector *needed_methods, + std::set *needed_classes) { java_bytecode_convert_methodt java_bytecode_convert_method( symbol_table, @@ -2190,8 +2190,7 @@ void java_bytecode_convert_method( disable_runtime_checks, max_array_length, needed_methods, - needed_classes, - ch); + needed_classes); java_bytecode_convert_method(class_symbol, method); } diff --git a/src/java_bytecode/java_bytecode_convert_method.h b/src/java_bytecode/java_bytecode_convert_method.h index b6302161a43..145c7a5cc91 100644 --- a/src/java_bytecode/java_bytecode_convert_method.h +++ b/src/java_bytecode/java_bytecode_convert_method.h @@ -23,9 +23,28 @@ void java_bytecode_convert_method( message_handlert &message_handler, bool disable_runtime_checks, size_t max_array_length, - std::vector& needed_methods, - std::set& needed_classes, - const class_hierarchyt&); + std::vector *needed_methods, + std::set *needed_classes); + +// Must provide both the optional parameters or neither. +inline void java_bytecode_convert_method( + const symbolt &class_symbol, + const java_bytecode_parse_treet::methodt &method, + symbol_tablet &symbol_table, + message_handlert &message_handler, + bool disable_runtime_checks, + size_t max_array_length) +{ + java_bytecode_convert_method( + class_symbol, + method, + symbol_table, + message_handler, + disable_runtime_checks, + max_array_length, + nullptr, + nullptr); +} void java_bytecode_convert_method_lazy( const symbolt &class_symbol, diff --git a/src/java_bytecode/java_bytecode_convert_method_class.h b/src/java_bytecode/java_bytecode_convert_method_class.h index b49d22934b1..8b11c831266 100644 --- a/src/java_bytecode/java_bytecode_convert_method_class.h +++ b/src/java_bytecode/java_bytecode_convert_method_class.h @@ -22,7 +22,6 @@ Author: Daniel Kroening, kroening@kroening.com class symbol_tablet; class symbolt; -class class_hierarchyt; class java_bytecode_convert_methodt:public messaget { @@ -32,16 +31,14 @@ class java_bytecode_convert_methodt:public messaget message_handlert &_message_handler, bool _disable_runtime_checks, size_t _max_array_length, - std::vector& _needed_methods, - std::set& _needed_classes, - const class_hierarchyt& _ch): + std::vector *_needed_methods, + std::set *_needed_classes): messaget(_message_handler), symbol_table(_symbol_table), disable_runtime_checks(_disable_runtime_checks), max_array_length(_max_array_length), needed_methods(_needed_methods), - needed_classes(_needed_classes), - class_hierarchy(_ch) + needed_classes(_needed_classes) { } @@ -60,9 +57,8 @@ class java_bytecode_convert_methodt:public messaget symbol_tablet &symbol_table; const bool disable_runtime_checks; const size_t max_array_length; - std::vector& needed_methods; - std::set& needed_classes; - const class_hierarchyt& class_hierarchy; + std::vector *needed_methods; + std::set *needed_classes; irep_idt method_id; irep_idt current_method; diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index 1655347f627..42c6d0bb9ef 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -48,6 +48,12 @@ void java_bytecode_languaget::get_language_options(const cmdlinet &cmd) std::stoi(cmd.get_value("java-max-input-array-length")); if(cmd.isset("java-max-vla-length")) max_user_array_length=std::stoi(cmd.get_value("java-max-vla-length")); + if(cmd.isset("lazy-methods-context-sensitive")) + lazy_methods_mode=LAZY_METHODS_MODE_CONTEXT_SENSITIVE; + else if(cmd.isset("lazy-methods")) + lazy_methods_mode=LAZY_METHODS_MODE_CONTEXT_INSENSITIVE; + else + lazy_methods_mode=LAZY_METHODS_MODE_EAGER; } /*******************************************************************\ @@ -352,9 +358,6 @@ bool java_bytecode_languaget::typecheck( symbol_tablet &symbol_table, const std::string &module) { - std::map > - lazy_methods; - // first convert all for(java_class_loadert::class_mapt::const_iterator c_it=java_class_loader.class_map.begin(); @@ -372,13 +375,30 @@ bool java_bytecode_languaget::typecheck( get_message_handler(), disable_runtime_checks, max_user_array_length, - lazy_methods)) + lazy_methods, + lazy_methods_mode)) return true; } - // Now incrementally elaborate methods that are reachable from this entry point: + // Now incrementally elaborate methods that are reachable from this entry point. + if(lazy_methods_mode==LAZY_METHODS_MODE_CONTEXT_INSENSITIVE) + { + if(do_ci_lazy_method_conversion(symbol_table, lazy_methods)) + return true; + } + + // now typecheck all + if(java_bytecode_typecheck( + symbol_table, get_message_handler())) + return true; + + return false; +} - // Convert-method will need this to find virtual function targets. +bool java_bytecode_languaget::do_ci_lazy_method_conversion( + symbol_tablet &symbol_table, + lazy_methodst &lazy_methods) +{ class_hierarchyt ch; ch(symbol_table); @@ -432,12 +452,17 @@ bool java_bytecode_languaget::typecheck( debug() << "Skip " << mname << eom; continue; } - debug() << "Lazy methods: elaborate " << mname << eom; + debug() << "CI lazy methods: elaborate " << mname << eom; const auto& parsed_method=findit->second; - java_bytecode_convert_method(*parsed_method.first,*parsed_method.second, - symbol_table,get_message_handler(), - disable_runtime_checks,max_user_array_length, - method_worklist2,needed_classes,ch); + java_bytecode_convert_method( + *parsed_method.first, + *parsed_method.second, + symbol_table, + get_message_handler(), + disable_runtime_checks, + max_user_array_length, + &method_worklist2, + &needed_classes); gather_virtual_callsites(symbol_table.lookup(mname).value,virtual_callsites); any_new_methods=true; } @@ -447,7 +472,7 @@ bool java_bytecode_languaget::typecheck( // Given the object types we now know may be created, populate more // possible virtual function call targets: - debug() << "Lazy methods: add virtual method targets (" << virtual_callsites.size() << + debug() << "CI lazy methods: add virtual method targets (" << virtual_callsites.size() << " callsites)" << eom; for(const auto& callsite : virtual_callsites) @@ -473,18 +498,33 @@ bool java_bytecode_languaget::typecheck( keep_symbols.add(sym.second); } - debug() << "Lazy methods: removed " << symbol_table.symbols.size() - keep_symbols.symbols.size() << " unreachable methods and globals" << eom; + debug() << "CI lazy methods: removed " << symbol_table.symbols.size() - keep_symbols.symbols.size() << " unreachable methods and globals" << eom; symbol_table.swap(keep_symbols); - // now typecheck all - if(java_bytecode_typecheck( - symbol_table, get_message_handler())) - return true; - return false; } +void java_bytecode_languaget::lazy_methods_provided(std::set &methods) const +{ + for(const auto &kv : lazy_methods) + methods.insert(kv.first); +} + +void java_bytecode_languaget::convert_lazy_method( + const irep_idt &id, + symbol_tablet &symtab) +{ + const auto &lazy_method_entry=lazy_methods.at(id); + java_bytecode_convert_method( + *lazy_method_entry.first, + *lazy_method_entry.second, + symtab, + get_message_handler(), + disable_runtime_checks, + max_user_array_length); +} + /*******************************************************************\ Function: java_bytecode_languaget::final diff --git a/src/java_bytecode/java_bytecode_language.h b/src/java_bytecode/java_bytecode_language.h index 36b28682dca..750b92aa367 100644 --- a/src/java_bytecode/java_bytecode_language.h +++ b/src/java_bytecode/java_bytecode_language.h @@ -16,6 +16,16 @@ Author: Daniel Kroening, kroening@kroening.com #define MAX_NONDET_ARRAY_LENGTH_DEFAULT 5 +enum lazy_methods_modet +{ + LAZY_METHODS_MODE_EAGER, + LAZY_METHODS_MODE_CONTEXT_INSENSITIVE, + LAZY_METHODS_MODE_CONTEXT_SENSITIVE +}; + +typedef std::map > + lazy_methodst; + class java_bytecode_languaget:public languaget { public: @@ -69,8 +79,13 @@ class java_bytecode_languaget:public languaget std::set extensions() const override; void modules_provided(std::set &modules) override; + virtual void lazy_methods_provided(std::set &) const override; + virtual void convert_lazy_method( + const irep_idt &id, symbol_tablet &) override; protected: + bool do_ci_lazy_method_conversion(symbol_tablet &, lazy_methodst &); + irep_idt main_class; std::vector main_jar_classes; java_class_loadert java_class_loader; @@ -83,6 +98,8 @@ class java_bytecode_languaget:public languaget // - array size for newarray size_t max_nondet_array_length; // maximal length for non-det array creation size_t max_user_array_length; // max size for user code created arrays + lazy_methodst lazy_methods; + lazy_methods_modet lazy_methods_mode; }; languaget *new_java_bytecode_language(); diff --git a/src/util/language.h b/src/util/language.h index edd83471cd5..b19615496ec 100644 --- a/src/util/language.h +++ b/src/util/language.h @@ -49,6 +49,15 @@ class languaget:public messaget virtual void modules_provided(std::set &modules) { } + // add lazy functions provided to set + + virtual void lazy_methods_provided(std::set &methods) const + { } + + // populate a lazy method + virtual void convert_lazy_method(const irep_idt &id, symbol_tablet &) + { } + // final adjustments, e.g., initialization and call to main() virtual bool final( diff --git a/src/util/language_file.cpp b/src/util/language_file.cpp index f7a49991d63..29775ad8849 100644 --- a/src/util/language_file.cpp +++ b/src/util/language_file.cpp @@ -64,6 +64,13 @@ void language_filet::get_modules() language->modules_provided(modules); } +void language_filet::convert_lazy_method( + const irep_idt &id, + symbol_tablet &symtab) +{ + language->convert_lazy_method(id, symtab); +} + /*******************************************************************\ Function: language_filest::show_parse @@ -189,8 +196,17 @@ bool language_filest::typecheck(symbol_tablet &symbol_table) it!=filemap.end(); it++) { if(it->second.modules.empty()) + { if(it->second.language->typecheck(symbol_table, "")) return true; + // register lazy methods. + // TODO: learn about modules and generalise this + // to module-providing languages if required. + std::set lazy_method_ids; + it->second.language->lazy_methods_provided(lazy_method_ids); + for(const auto &id : lazy_method_ids) + lazymethodmap[id]=&it->second; + } } // typecheck modules diff --git a/src/util/language_file.h b/src/util/language_file.h index 533e8d5240e..b538c24e84f 100644 --- a/src/util/language_file.h +++ b/src/util/language_file.h @@ -42,6 +42,10 @@ class language_filet void get_modules(); + void convert_lazy_method( + const irep_idt &id, + symbol_tablet &symtab); + language_filet(const language_filet &rhs); language_filet():language(NULL) @@ -57,9 +61,15 @@ class language_filest:public messaget typedef std::map filemapt; filemapt filemap; + // Contains pointers into filemapt! typedef std::map modulemapt; modulemapt modulemap; + // Contains pointers into filemapt! + // This is safe-ish as long as this is std::map. + typedef std::map lazymethodmapt; + lazymethodmapt lazymethodmap; + void clear_files() { filemap.clear(); @@ -75,10 +85,23 @@ class language_filest:public messaget bool interfaces(symbol_tablet &symbol_table); + bool has_lazy_method(const irep_idt &id) + { + return lazymethodmap.count(id); + } + + void convert_lazy_method( + const irep_idt &id, + symbol_tablet &symtab) + { + return lazymethodmap.at(id)->convert_lazy_method(id, symtab); + } + void clear() { filemap.clear(); modulemap.clear(); + lazymethodmap.clear(); } protected: From 5be71063cf850e3dff16165abc87448a70746450 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 6 Feb 2017 11:18:04 +0000 Subject: [PATCH 11/27] Add CBMC lazy-methods parameter TODO: move all Java frontend parameters into something defined on java_bytecode_languaget and have all driver programs gather the frontends' defined parameters. --- src/cbmc/cbmc_parse_options.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cbmc/cbmc_parse_options.h b/src/cbmc/cbmc_parse_options.h index 87e9beb45d2..adc6e2ee517 100644 --- a/src/cbmc/cbmc_parse_options.h +++ b/src/cbmc/cbmc_parse_options.h @@ -56,6 +56,7 @@ class optionst; "(graphml-witness):" \ "(java-max-vla-length):(java-unwind-enum-static)" \ "(localize-faults)(localize-faults-method):" \ + "(lazy-methods)" \ "(fixedbv)(floatbv)(all-claims)(all-properties)" // legacy, and will eventually disappear // NOLINT(whitespace/line_length) class cbmc_parse_optionst: From d932cfde321011b506a0f2acbb31ffc181b30a16 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Thu, 9 Feb 2017 16:44:27 +0000 Subject: [PATCH 12/27] Document lazy methods --- .../java_bytecode_convert_method.cpp | 19 ++++ src/java_bytecode/java_bytecode_language.cpp | 103 ++++++++++++++++++ 2 files changed, 122 insertions(+) diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index 0151833007e..7842f38fa5e 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -177,6 +177,25 @@ const exprt java_bytecode_convert_methodt::variable( } } +/*******************************************************************\ + +Function: java_bytecode_convert_method_lazy + + Inputs: `class_symbol`: class this method belongs to + `method_identifier`: fully qualified method name, including + type signature (e.g. "x.y.z.f:(I)") + `m`: parsed method object to convert + `symbol_table`: global symbol table (will be modified) + + Outputs: + + Purpose: This creates a method symbol in the symtab, but doesn't + actually perform method conversion just yet. The caller + should call java_bytecode_convert_method later to give the + symbol/method a body. + +\*******************************************************************/ + void java_bytecode_convert_method_lazy( const symbolt& class_symbol, const irep_idt method_identifier, diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index 42c6d0bb9ef..310a5ef69dd 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -179,6 +179,27 @@ bool java_bytecode_languaget::parse( return false; } +/*******************************************************************\ + +Function: get_virtual_method_target + + Inputs: `needed_classes`: set of classes that can be instantiated. + Any potential callee not in this set will be ignored. + `call_basename`: unqualified function name with type + signature (e.g. "f:(I)") + `classname`: class name that may define or override a + function named `call_basename`. + `symbol_table`: global symtab + + Outputs: Returns the fully qualified name of `classname`'s definition + of `call_basename` if found and `classname` is present in + `needed_classes`, or irep_idt() otherwise. + + Purpose: Find a virtual callee, if one is defined and the callee type + is known to exist. + +\*******************************************************************/ + static irep_idt get_virtual_method_target( const std::set& needed_classes, const irep_idt& call_basename, @@ -195,6 +216,27 @@ static irep_idt get_virtual_method_target( return irep_idt(); } +/*******************************************************************\ + +Function: get_virtual_method_target + + Inputs: `c`: function call whose potential target functions should + be determined. + `needed_classes`: set of classes that can be instantiated. + Any potential callee not in this set will be ignored. + `symbol_table`: global symtab + `class_hierarchy`: global class hierarchy + + Outputs: Populates `needed_methods` with all possible `c` callees, + taking `needed_classes` into account (virtual function + overrides defined on classes that are not 'needed' are + ignored) + + Purpose: Find possible callees, excluding types that are not known + to be instantiated. + +\*******************************************************************/ + static void get_virtual_method_targets( const code_function_callt& c, const std::set& needed_classes, @@ -262,6 +304,19 @@ static void get_virtual_method_targets( } +/*******************************************************************\ + +Function: gather_virtual_callsites + + Inputs: `e`: expression tree to search + + Outputs: Populates `result` with pointers to each function call + within e that calls a virtual function. + + Purpose: See output + +\*******************************************************************/ + static void gather_virtual_callsites(const exprt& e, std::vector& result) { if(e.id()!=ID_code) @@ -275,6 +330,20 @@ static void gather_virtual_callsites(const exprt& e, std::vector& entry_points, const namespacet& ns, From d19448a9302a8fb3a4126484f46f58aa9683315f Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Thu, 9 Feb 2017 16:55:15 +0000 Subject: [PATCH 13/27] Style fixes --- .../java_bytecode_convert_method.cpp | 5 +- src/java_bytecode/java_bytecode_language.cpp | 98 ++++++++++++------- src/java_bytecode/java_bytecode_language.h | 6 +- 3 files changed, 73 insertions(+), 36 deletions(-) diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index 7842f38fa5e..6e12a8ab537 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -215,7 +215,7 @@ void java_bytecode_convert_method_lazy( { method_symbol.pretty_name=id2string(class_symbol.pretty_name)+"."+ id2string(class_symbol.base_name)+"()"; - member_type.set(ID_constructor,true); + member_type.set(ID_constructor, true); } else method_symbol.pretty_name=id2string(class_symbol.pretty_name)+"."+ @@ -1167,7 +1167,8 @@ codet java_bytecode_convert_methodt::convert_instructions( assert(use_this); assert(!call.arguments().empty()); call.function()=arg0; - // Populate needed methods later, once we know what object types can exist. + // Populate needed methods later, + // once we know what object types can exist. } else { diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index 310a5ef69dd..c9f1b1771d2 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -257,8 +257,12 @@ static void get_virtual_method_targets( auto child_classes=class_hierarchy.get_children_trans(call_class); for(const auto& child_class : child_classes) { - auto child_method=get_virtual_method_target(needed_classes,call_basename, - child_class,symbol_table); + auto child_method= + get_virtual_method_target( + needed_classes, + call_basename, + child_class, + symbol_table); if(child_method!=irep_idt()) needed_methods.push_back(child_method); } @@ -266,8 +270,12 @@ static void get_virtual_method_targets( irep_idt parent_class_id=call_class; while(1) { - auto parent_method=get_virtual_method_target(needed_classes,call_basename, - parent_class_id,symbol_table); + auto parent_method= + get_virtual_method_target( + needed_classes, + call_basename, + parent_class_id, + symbol_table); if(parent_method!=irep_idt()) { needed_methods.push_back(parent_method); @@ -301,7 +309,6 @@ static void get_virtual_method_targets( symbol.mode=ID_java; symbol_table.add(symbol); } - } /*******************************************************************\ @@ -317,7 +324,9 @@ Function: gather_virtual_callsites \*******************************************************************/ -static void gather_virtual_callsites(const exprt& e, std::vector& result) +static void gather_virtual_callsites( + const exprt& e, + std::vector& result) { if(e.id()!=ID_code) return; @@ -326,8 +335,8 @@ static void gather_virtual_callsites(const exprt& e, std::vector class_and_parents=ch.get_parents_trans(param_classid); + const auto& param_classid= + to_symbol_type(param.type().subtype()).get_identifier(); + std::vector class_and_parents= + ch.get_parents_trans(param_classid); class_and_parents.push_back(param_classid); for(const auto& classid : class_and_parents) needed_classes.insert(classid); - gather_field_types(param.type().subtype(),ns,needed_classes); + gather_field_types(param.type().subtype(), ns, needed_classes); } } } @@ -483,7 +498,8 @@ bool java_bytecode_languaget::typecheck( return true; } - // Now incrementally elaborate methods that are reachable from this entry point. + // Now incrementally elaborate methods + // that are reachable from this entry point. if(lazy_methods_mode==LAZY_METHODS_MODE_CONTEXT_INSENSITIVE) { if(do_ci_lazy_method_conversion(symbol_table, lazy_methods)) @@ -508,7 +524,8 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( std::vector method_worklist1; std::vector method_worklist2; - auto main_function=get_main_symbol(symbol_table,main_class,get_message_handler(),true); + auto main_function= + get_main_symbol(symbol_table, main_class, get_message_handler(), true); if(main_function.stop_convert) { // Failed, mark all functions in the given main class(es) reachable. @@ -519,7 +536,8 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( reachable_classes=main_jar_classes; for(const auto& classname : reachable_classes) { - const auto& methods=java_class_loader.class_map.at(classname).parsed_class.methods; + const auto& methods= + java_class_loader.class_map.at(classname).parsed_class.methods; for(const auto& method : methods) { const irep_idt methodid="java::"+id2string(classname)+"."+ @@ -533,18 +551,22 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( method_worklist2.push_back(main_function.main_function.name); std::set needed_classes; - initialise_needed_classes(method_worklist2,namespacet(symbol_table),ch,needed_classes); + initialise_needed_classes( + method_worklist2, + namespacet(symbol_table), + ch, + needed_classes); std::set methods_already_populated; std::vector virtual_callsites; bool any_new_methods; - do { - + do + { any_new_methods=false; while(method_worklist2.size()!=0) { - std::swap(method_worklist1,method_worklist2); + std::swap(method_worklist1, method_worklist2); for(const auto& mname : method_worklist1) { if(!methods_already_populated.insert(mname).second) @@ -566,7 +588,9 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( max_user_array_length, &method_worklist2, &needed_classes); - gather_virtual_callsites(symbol_table.lookup(mname).value,virtual_callsites); + gather_virtual_callsites( + symbol_table.lookup(mname).value, + virtual_callsites); any_new_methods=true; } method_worklist1.clear(); @@ -575,14 +599,16 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( // Given the object types we now know may be created, populate more // possible virtual function call targets: - debug() << "CI lazy methods: add virtual method targets (" << virtual_callsites.size() << - " callsites)" << eom; + debug() << "CI lazy methods: add virtual method targets (" + << virtual_callsites.size() + << " callsites)" + << eom; for(const auto& callsite : virtual_callsites) { // This will also create a stub if a virtual callsite has no targets. - get_virtual_method_targets(*callsite,needed_classes,method_worklist2, - symbol_table,ch); + get_virtual_method_targets(*callsite, needed_classes, method_worklist2, + symbol_table, ch); } } while(any_new_methods); @@ -594,14 +620,20 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( { if(sym.second.is_static_lifetime) continue; - if(lazy_methods.count(sym.first) && !methods_already_populated.count(sym.first)) + if(lazy_methods.count(sym.first) && + !methods_already_populated.count(sym.first)) + { continue; + } if(sym.second.type.id()==ID_code) - gather_needed_globals(sym.second.value,symbol_table,keep_symbols); + gather_needed_globals(sym.second.value, symbol_table, keep_symbols); keep_symbols.add(sym.second); } - debug() << "CI lazy methods: removed " << symbol_table.symbols.size() - keep_symbols.symbols.size() << " unreachable methods and globals" << eom; + debug() << "CI lazy methods: removed " + << symbol_table.symbols.size() - keep_symbols.symbols.size() + << " unreachable methods and globals" + << eom; symbol_table.swap(keep_symbols); diff --git a/src/java_bytecode/java_bytecode_language.h b/src/java_bytecode/java_bytecode_language.h index 750b92aa367..afdd275ddf6 100644 --- a/src/java_bytecode/java_bytecode_language.h +++ b/src/java_bytecode/java_bytecode_language.h @@ -23,7 +23,11 @@ enum lazy_methods_modet LAZY_METHODS_MODE_CONTEXT_SENSITIVE }; -typedef std::map > +typedef std::pair< + const symbolt*, + const java_bytecode_parse_treet::methodt*> + lazy_method_valuet; +typedef std::map lazy_methodst; class java_bytecode_languaget:public languaget From 7c669b277e4e097e862917198859d383467d696a Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Thu, 9 Feb 2017 17:08:15 +0000 Subject: [PATCH 14/27] Add lazy loading tests --- regression/cbmc-java/lazyloading1/A.class | Bin 0 -> 222 bytes regression/cbmc-java/lazyloading1/B.class | Bin 0 -> 222 bytes regression/cbmc-java/lazyloading1/test.class | Bin 0 -> 292 bytes regression/cbmc-java/lazyloading1/test.desc | 8 +++++ regression/cbmc-java/lazyloading1/test.java | 21 +++++++++++++ regression/cbmc-java/lazyloading2/A.class | Bin 0 -> 222 bytes regression/cbmc-java/lazyloading2/B.class | Bin 0 -> 222 bytes regression/cbmc-java/lazyloading2/test.class | Bin 0 -> 310 bytes regression/cbmc-java/lazyloading2/test.desc | 8 +++++ regression/cbmc-java/lazyloading2/test.java | 23 ++++++++++++++ regression/cbmc-java/lazyloading3/A.class | Bin 0 -> 222 bytes regression/cbmc-java/lazyloading3/B.class | Bin 0 -> 222 bytes regression/cbmc-java/lazyloading3/C.class | Bin 0 -> 197 bytes regression/cbmc-java/lazyloading3/D.class | Bin 0 -> 197 bytes regression/cbmc-java/lazyloading3/test.class | Bin 0 -> 296 bytes regression/cbmc-java/lazyloading3/test.desc | 8 +++++ regression/cbmc-java/lazyloading3/test.java | 30 +++++++++++++++++++ 17 files changed, 98 insertions(+) create mode 100644 regression/cbmc-java/lazyloading1/A.class create mode 100644 regression/cbmc-java/lazyloading1/B.class create mode 100644 regression/cbmc-java/lazyloading1/test.class create mode 100644 regression/cbmc-java/lazyloading1/test.desc create mode 100644 regression/cbmc-java/lazyloading1/test.java create mode 100644 regression/cbmc-java/lazyloading2/A.class create mode 100644 regression/cbmc-java/lazyloading2/B.class create mode 100644 regression/cbmc-java/lazyloading2/test.class create mode 100644 regression/cbmc-java/lazyloading2/test.desc create mode 100644 regression/cbmc-java/lazyloading2/test.java create mode 100644 regression/cbmc-java/lazyloading3/A.class create mode 100644 regression/cbmc-java/lazyloading3/B.class create mode 100644 regression/cbmc-java/lazyloading3/C.class create mode 100644 regression/cbmc-java/lazyloading3/D.class create mode 100644 regression/cbmc-java/lazyloading3/test.class create mode 100644 regression/cbmc-java/lazyloading3/test.desc create mode 100644 regression/cbmc-java/lazyloading3/test.java diff --git a/regression/cbmc-java/lazyloading1/A.class b/regression/cbmc-java/lazyloading1/A.class new file mode 100644 index 0000000000000000000000000000000000000000..d6a62ca1ce465a6ae3fc35f9fa702cfa834ca298 GIT binary patch literal 222 zcmZXNy9&ZU5Jm6gVa!WyEd)!oFpZ@MB3OwQihUAS*$@+m8~@8nu3 zOQ{W4iHjO%@ih)$!DeDk=;%Aj!QtNc Vnm$}Ke`uF|tv@Ve*yV=-?*m^uqBqm`X_Sbr8+Ekm=tM6r!7<>R9O5A6#ILZBY z?k_p7_u~m*hK>glEeF1XHlZ<3i&QNL=4gB*SgZ0R3EfRv$ZdU&WOWcx&JHC6Z(r6` zEZ3A-v~jNyqM}hnO`)!EBM>9Vjr$$4#Ipfe)OD%LdK@30-UQ)r^KJ#@ zu}ke_w@&PKW|9?^aBW;F(65`_c5X+`YsJidW-m*#H!+IP3b!%tpc(dtpyI98%A_By zX|l9)O44U7&_s^E0|Hha5fM-*5CiaCXkv-e1G}*2pAV2P5Pq|~C2#H1Xc2xA%}16OcPq|~C2#H1Xc2xB@U16OcPq|~C2#H1Xc2xA%}16OcK literal 0 HcmV?d00001 diff --git a/regression/cbmc-java/lazyloading3/B.class b/regression/cbmc-java/lazyloading3/B.class new file mode 100644 index 0000000000000000000000000000000000000000..9a4ab54d369d7175755e23dfabef5f15440bf383 GIT binary patch literal 222 zcmX^0Z`VEs1_l!bJ}w4k25xo+9(D#^Mg}&U%)HDJJ4Oa(4b3n{1{UZ1lvG9rexJ;| zRKL>Pq|~C2#H1Xc2xB@U16Occ5zb#DL54!6!+VDk(Sy*z4g7E1P33$hZ57`kdu7p zkmU1yy#XxXyU^j;=-KEKND%aBvm{unv`E#OaWXv-jIcaQf*+-Y+|}1uR!0%%?6`-r zt`fOT8*r4osd*;u!h;2y?}dax!$B^JOR$eKnJA2*@l$}f44D6k7ue}Ajkq_NIeUU0 PZS1gGtvO_MnKX2N4w)Zy literal 0 HcmV?d00001 diff --git a/regression/cbmc-java/lazyloading3/D.class b/regression/cbmc-java/lazyloading3/D.class new file mode 100644 index 0000000000000000000000000000000000000000..7e16bd6527d6835776fcc14eb5132a4d94fcf1b2 GIT binary patch literal 197 zcmX^0Z`VEs1_l!bel7-P1|D_>UUmjPMh3FqHk-`6%n~~wS3@(5k%7fI zKP8osf!`-HFV(L!Hz~C!Brz!mD8dz-Us{x$>Xr%OaF(PNm*{0BmL>8quraU$&2wR7 z5CHM@a}x8?_5G8wQj<#<6d0I*mNGCf0x=5%E0hJ)02JW>@??QDNRCx&I|JiJurw!- QWCIII07)(&kBNaB01$#7eEKI~GBKD~_yhb= z#y2)@GV|WfelxS*Kc8O!?r>>P#z_@5)D2G2&}eG36ry{d`{F^N)a@-4!esmC6l$r@ z-DJPX+-`2OH7nu9`kZ(-O@;!R*>=BMy0MqXYT;gr+m(H_F(PPK|Bm2*(m!oDlp{D)`eOP3$G;8oD9r!> literal 0 HcmV?d00001 diff --git a/regression/cbmc-java/lazyloading3/test.desc b/regression/cbmc-java/lazyloading3/test.desc new file mode 100644 index 00000000000..c0011248128 --- /dev/null +++ b/regression/cbmc-java/lazyloading3/test.desc @@ -0,0 +1,8 @@ +CORE +test.class +--lazy-methods --verbosity 10 --function test.main +^EXIT=0$ +^SIGNAL=0$ +elaborate java::A\.f:\(\)V +-- +elaborate java::B\.g:\(\)V diff --git a/regression/cbmc-java/lazyloading3/test.java b/regression/cbmc-java/lazyloading3/test.java new file mode 100644 index 00000000000..6d3129d1261 --- /dev/null +++ b/regression/cbmc-java/lazyloading3/test.java @@ -0,0 +1,30 @@ +// This test checks that because `main` has a parameter of type C, which has a field of type A, +// A::f is considered reachable, but B::g is not. + +public class test +{ + public static void main(C c) + { + c.a.f(); + } +} + +class A +{ + public void f() {} +} + +class B +{ + public void g() {} +} + +class C +{ + A a; +} + +class D +{ + B b; +} From b909a547b0f3f30239f0c2debcae202b5566f3d7 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 13 Feb 2017 11:08:22 +0000 Subject: [PATCH 15/27] Improve code style No functional changes intended --- src/goto-cc/compile.cpp | 6 +- src/goto-programs/get_goto_model.cpp | 4 +- .../java_bytecode_convert_class.cpp | 4 +- .../java_bytecode_convert_method.cpp | 16 +-- .../java_bytecode_convert_method.h | 2 +- src/java_bytecode/java_bytecode_language.cpp | 98 +++++++++---------- src/java_bytecode/java_entry_point.h | 2 +- src/langapi/language_ui.cpp | 4 +- src/util/language_file.cpp | 46 ++++----- src/util/language_file.h | 33 ++++--- 10 files changed, 110 insertions(+), 105 deletions(-) diff --git a/src/goto-cc/compile.cpp b/src/goto-cc/compile.cpp index 7ab9c5eae3d..3027bc5bf0b 100644 --- a/src/goto-cc/compile.cpp +++ b/src/goto-cc/compile.cpp @@ -540,8 +540,8 @@ bool compilet::parse(const std::string &file_name) language_filet language_file; - std::pair - res=language_files.filemap.insert( + std::pair + res=language_files.file_map.insert( std::pair(file_name, language_file)); language_filet &lf=res.first->second; @@ -732,7 +732,7 @@ bool compilet::parse_source(const std::string &file_name) return true; // so we remove it from the list afterwards - language_files.filemap.erase(file_name); + language_files.file_map.erase(file_name); return false; } diff --git a/src/goto-programs/get_goto_model.cpp b/src/goto-programs/get_goto_model.cpp index 5afef435596..02aae876030 100644 --- a/src/goto-programs/get_goto_model.cpp +++ b/src/goto-programs/get_goto_model.cpp @@ -75,8 +75,8 @@ bool get_goto_modelt::operator()(const std::vector &files) return true; } - std::pair - result=language_files.filemap.insert( + std::pair + result=language_files.file_map.insert( std::pair(filename, language_filet())); language_filet &lf=result.first->second; diff --git a/src/java_bytecode/java_bytecode_convert_class.cpp b/src/java_bytecode/java_bytecode_convert_class.cpp index eb642baa846..440b0ce32cf 100644 --- a/src/java_bytecode/java_bytecode_convert_class.cpp +++ b/src/java_bytecode/java_bytecode_convert_class.cpp @@ -29,13 +29,13 @@ class java_bytecode_convert_classt:public messaget message_handlert &_message_handler, bool _disable_runtime_checks, size_t _max_array_length, - lazy_methodst& _lm, + lazy_methodst& _lazy_methods, lazy_methods_modet _lazy_methods_mode): messaget(_message_handler), symbol_table(_symbol_table), disable_runtime_checks(_disable_runtime_checks), max_array_length(_max_array_length), - lazy_methods(_lm), + lazy_methods(_lazy_methods), lazy_methods_mode(_lazy_methods_mode) { } diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index 6e12a8ab537..498f880c965 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -197,10 +197,10 @@ Function: java_bytecode_convert_method_lazy \*******************************************************************/ void java_bytecode_convert_method_lazy( - const symbolt& class_symbol, - const irep_idt method_identifier, + const symbolt &class_symbol, + const irep_idt &method_identifier, const java_bytecode_parse_treet::methodt &m, - symbol_tablet& symbol_table) + symbol_tablet &symbol_table) { symbolt method_symbol; typet member_type=java_type_from_string(m.signature); @@ -213,12 +213,14 @@ void java_bytecode_convert_method_lazy( if(method_symbol.base_name=="") { - method_symbol.pretty_name=id2string(class_symbol.pretty_name)+"."+ - id2string(class_symbol.base_name)+"()"; + method_symbol.pretty_name= + id2string(class_symbol.pretty_name)+"."+ + id2string(class_symbol.base_name)+"()"; member_type.set(ID_constructor, true); } else - method_symbol.pretty_name=id2string(class_symbol.pretty_name)+"."+ + method_symbol.pretty_name= + id2string(class_symbol.pretty_name)+"."+ id2string(m.base_name)+"()"; // do we need to add 'this' as a parameter? @@ -257,7 +259,7 @@ void java_bytecode_convert_methodt::convert( id2string(class_symbol.name)+"."+id2string(m.name)+":"+m.signature; method_id=method_identifier; - const auto& old_sym=symbol_table.lookup(method_identifier); + const auto &old_sym=symbol_table.lookup(method_identifier); typet member_type=old_sym.type; code_typet &code_type=to_code_type(member_type); diff --git a/src/java_bytecode/java_bytecode_convert_method.h b/src/java_bytecode/java_bytecode_convert_method.h index 145c7a5cc91..68b0dd4e0a8 100644 --- a/src/java_bytecode/java_bytecode_convert_method.h +++ b/src/java_bytecode/java_bytecode_convert_method.h @@ -48,7 +48,7 @@ inline void java_bytecode_convert_method( void java_bytecode_convert_method_lazy( const symbolt &class_symbol, - const irep_idt method_identifier, + const irep_idt &method_identifier, const java_bytecode_parse_treet::methodt &, symbol_tablet &symbol_table); diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index c9f1b1771d2..4512ec97709 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -156,7 +156,7 @@ bool java_bytecode_languaget::parse( { status() << "JAR file without entry point: loading it all" << eom; java_class_loader.load_entire_jar(path); - for(const auto& kv : java_class_loader.jar_map.at(path).entries) + for(const auto &kv : java_class_loader.jar_map.at(path).entries) main_jar_classes.push_back(kv.first); } else @@ -201,10 +201,10 @@ Function: get_virtual_method_target \*******************************************************************/ static irep_idt get_virtual_method_target( - const std::set& needed_classes, - const irep_idt& call_basename, - const irep_idt& classname, - const symbol_tablet& symbol_table) + const std::set &needed_classes, + const irep_idt &call_basename, + const irep_idt &classname, + const symbol_tablet &symbol_table) { // Program-wide, is this class ever instantiated? if(!needed_classes.count(classname)) @@ -238,24 +238,24 @@ Function: get_virtual_method_target \*******************************************************************/ static void get_virtual_method_targets( - const code_function_callt& c, - const std::set& needed_classes, - std::vector& needed_methods, - symbol_tablet& symbol_table, - const class_hierarchyt& class_hierarchy) + const code_function_callt &c, + const std::set &needed_classes, + std::vector &needed_methods, + symbol_tablet &symbol_table, + const class_hierarchyt &class_hierarchy) { - const auto& called_function=c.function(); + const auto &called_function=c.function(); assert(called_function.id()==ID_virtual_function); - const auto& call_class=called_function.get(ID_C_class); + const auto &call_class=called_function.get(ID_C_class); assert(call_class!=irep_idt()); - const auto& call_basename=called_function.get(ID_component_name); + const auto &call_basename=called_function.get(ID_component_name); assert(call_basename!=irep_idt()); auto old_size=needed_methods.size(); auto child_classes=class_hierarchy.get_children_trans(call_class); - for(const auto& child_class : child_classes) + for(const auto &child_class : child_classes) { auto child_method= get_virtual_method_target( @@ -288,8 +288,8 @@ static void get_virtual_method_targets( break; else { - const auto& entry=findit->second; - if(entry.parents.size()==0) + const auto &entry=findit->second; + if(entry.parents.empty()) break; else parent_class_id=entry.parents[0]; @@ -325,12 +325,12 @@ Function: gather_virtual_callsites \*******************************************************************/ static void gather_virtual_callsites( - const exprt& e, - std::vector& result) + const exprt &e, + std::vector &result) { if(e.id()!=ID_code) return; - const codet& c=to_code(e); + const codet &c=to_code(e); if(c.get_statement()==ID_function_call && to_code_function_call(c).function().id()==ID_virtual_function) result.push_back(&to_code_function_call(c)); @@ -354,13 +354,13 @@ Function: gather_needed_globals \*******************************************************************/ static void gather_needed_globals( - const exprt& e, - const symbol_tablet& symbol_table, - symbol_tablet& needed) + const exprt &e, + const symbol_tablet &symbol_table, + symbol_tablet &needed) { if(e.id()==ID_symbol) { - const auto& sym=symbol_table.lookup(to_symbol_expr(e).get_identifier()); + const auto &sym=symbol_table.lookup(to_symbol_expr(e).get_identifier()); if(sym.is_static_lifetime) needed.add(sym); } @@ -387,12 +387,12 @@ Function: gather_field_types \*******************************************************************/ static void gather_field_types( - const typet& class_type, - const namespacet& ns, - std::set& needed_classes) + const typet &class_type, + const namespacet &ns, + std::set &needed_classes) { - const auto& underlying_type=to_struct_type(ns.follow(class_type)); - for(const auto& field : underlying_type.components()) + const auto &underlying_type=to_struct_type(ns.follow(class_type)); + for(const auto &field : underlying_type.components()) { if(field.type().id()==ID_struct || field.type().id()==ID_symbol) gather_field_types(field.type(), ns, needed_classes); @@ -401,7 +401,7 @@ static void gather_field_types( // Skip array primitive pointers, for example: if(field.type().subtype().id()!=ID_symbol) continue; - const auto& field_classid= + const auto &field_classid= to_symbol_type(field.type().subtype()).get_identifier(); if(needed_classes.insert(field_classid).second) gather_field_types(field.type().subtype(), ns, needed_classes); @@ -411,7 +411,7 @@ static void gather_field_types( /*******************************************************************\ -Function: initialise_needed_classes +Function: initialize_needed_classes Inputs: `entry_points`: list of fully-qualified function names that we should assume are reachable @@ -426,26 +426,26 @@ Function: initialise_needed_classes \*******************************************************************/ -static void initialise_needed_classes( - const std::vector& entry_points, - const namespacet& ns, - const class_hierarchyt& ch, - std::set& needed_classes) +static void initialize_needed_classes( + const std::vector &entry_points, + const namespacet &ns, + const class_hierarchyt &ch, + std::set &needed_classes) { - for(const auto& mname : entry_points) + for(const auto &mname : entry_points) { - const auto& symbol=ns.lookup(mname); - const auto& mtype=to_code_type(symbol.type); - for(const auto& param : mtype.parameters()) + const auto &symbol=ns.lookup(mname); + const auto &mtype=to_code_type(symbol.type); + for(const auto ¶m : mtype.parameters()) { if(param.type().id()==ID_pointer) { - const auto& param_classid= + const auto ¶m_classid= to_symbol_type(param.type().subtype()).get_identifier(); std::vector class_and_parents= ch.get_parents_trans(param_classid); class_and_parents.push_back(param_classid); - for(const auto& classid : class_and_parents) + for(const auto &classid : class_and_parents) needed_classes.insert(classid); gather_field_types(param.type().subtype(), ns, needed_classes); } @@ -534,11 +534,11 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( reachable_classes.push_back(main_class); else reachable_classes=main_jar_classes; - for(const auto& classname : reachable_classes) + for(const auto &classname : reachable_classes) { - const auto& methods= + const auto &methods= java_class_loader.class_map.at(classname).parsed_class.methods; - for(const auto& method : methods) + for(const auto &method : methods) { const irep_idt methodid="java::"+id2string(classname)+"."+ id2string(method.name)+":"+ @@ -551,7 +551,7 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( method_worklist2.push_back(main_function.main_function.name); std::set needed_classes; - initialise_needed_classes( + initialize_needed_classes( method_worklist2, namespacet(symbol_table), ch, @@ -567,7 +567,7 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( while(method_worklist2.size()!=0) { std::swap(method_worklist1, method_worklist2); - for(const auto& mname : method_worklist1) + for(const auto &mname : method_worklist1) { if(!methods_already_populated.insert(mname).second) continue; @@ -578,7 +578,7 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( continue; } debug() << "CI lazy methods: elaborate " << mname << eom; - const auto& parsed_method=findit->second; + const auto &parsed_method=findit->second; java_bytecode_convert_method( *parsed_method.first, *parsed_method.second, @@ -604,7 +604,7 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( << " callsites)" << eom; - for(const auto& callsite : virtual_callsites) + for(const auto &callsite : virtual_callsites) { // This will also create a stub if a virtual callsite has no targets. get_virtual_method_targets(*callsite, needed_classes, method_worklist2, @@ -616,7 +616,7 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( // Remove symbols for methods that were declared but never used: symbol_tablet keep_symbols; - for(const auto& sym : symbol_table.symbols) + for(const auto &sym : symbol_table.symbols) { if(sym.second.is_static_lifetime) continue; diff --git a/src/java_bytecode/java_entry_point.h b/src/java_bytecode/java_entry_point.h index 82071aa26b1..e6575734d80 100644 --- a/src/java_bytecode/java_entry_point.h +++ b/src/java_bytecode/java_entry_point.h @@ -29,6 +29,6 @@ main_function_resultt get_main_symbol( symbol_tablet &symbol_table, const irep_idt &main_class, message_handlert &, - bool allow_no_body = false); + bool allow_no_body=false); #endif // CPROVER_JAVA_BYTECODE_JAVA_ENTRY_POINT_H diff --git a/src/langapi/language_ui.cpp b/src/langapi/language_ui.cpp index 3775c3c6125..45198cbffd2 100644 --- a/src/langapi/language_ui.cpp +++ b/src/langapi/language_ui.cpp @@ -104,8 +104,8 @@ bool language_uit::parse(const std::string &filename) return true; } - std::pair - result=language_files.filemap.insert( + std::pair + result=language_files.file_map.insert( std::pair(filename, language_filet())); language_filet &lf=result.first->second; diff --git a/src/util/language_file.cpp b/src/util/language_file.cpp index 29775ad8849..efd7b8d53b6 100644 --- a/src/util/language_file.cpp +++ b/src/util/language_file.cpp @@ -66,9 +66,9 @@ void language_filet::get_modules() void language_filet::convert_lazy_method( const irep_idt &id, - symbol_tablet &symtab) + symbol_tablet &symbol_table) { - language->convert_lazy_method(id, symtab); + language->convert_lazy_method(id, symbol_table); } /*******************************************************************\ @@ -85,8 +85,8 @@ Function: language_filest::show_parse void language_filest::show_parse(std::ostream &out) { - for(filemapt::iterator it=filemap.begin(); - it!=filemap.end(); it++) + for(file_mapt::iterator it=file_map.begin(); + it!=file_map.end(); it++) it->second.language->show_parse(out); } @@ -104,8 +104,8 @@ Function: language_filest::parse bool language_filest::parse() { - for(filemapt::iterator it=filemap.begin(); - it!=filemap.end(); it++) + for(file_mapt::iterator it=file_map.begin(); + it!=file_map.end(); it++) { // open file @@ -151,8 +151,8 @@ bool language_filest::typecheck(symbol_tablet &symbol_table) { // typecheck interfaces - for(filemapt::iterator it=filemap.begin(); - it!=filemap.end(); it++) + for(file_mapt::iterator it=file_map.begin(); + it!=file_map.end(); it++) { if(it->second.language->interfaces(symbol_table)) return true; @@ -162,8 +162,8 @@ bool language_filest::typecheck(symbol_tablet &symbol_table) unsigned collision_counter=0; - for(filemapt::iterator fm_it=filemap.begin(); - fm_it!=filemap.end(); fm_it++) + for(file_mapt::iterator fm_it=file_map.begin(); + fm_it!=file_map.end(); fm_it++) { const language_filet::modulest &modules= fm_it->second.modules; @@ -176,7 +176,7 @@ bool language_filest::typecheck(symbol_tablet &symbol_table) // these may collide, and then get renamed std::string module_name=*mo_it; - while(modulemap.find(module_name)!=modulemap.end()) + while(module_map.find(module_name)!=module_map.end()) { module_name=*mo_it+"#"+std::to_string(collision_counter); collision_counter++; @@ -185,15 +185,15 @@ bool language_filest::typecheck(symbol_tablet &symbol_table) language_modulet module; module.file=&fm_it->second; module.name=module_name; - modulemap.insert( + module_map.insert( std::pair(module.name, module)); } } // typecheck files - for(filemapt::iterator it=filemap.begin(); - it!=filemap.end(); it++) + for(file_mapt::iterator it=file_map.begin(); + it!=file_map.end(); it++) { if(it->second.modules.empty()) { @@ -205,14 +205,14 @@ bool language_filest::typecheck(symbol_tablet &symbol_table) std::set lazy_method_ids; it->second.language->lazy_methods_provided(lazy_method_ids); for(const auto &id : lazy_method_ids) - lazymethodmap[id]=&it->second; + lazy_method_map[id]=&it->second; } } // typecheck modules - for(modulemapt::iterator it=modulemap.begin(); - it!=modulemap.end(); it++) + for(module_mapt::iterator it=module_map.begin(); + it!=module_map.end(); it++) { if(typecheck_module(symbol_table, it->second)) return true; @@ -238,8 +238,8 @@ bool language_filest::final( { std::set languages; - for(filemapt::iterator it=filemap.begin(); - it!=filemap.end(); it++) + for(file_mapt::iterator it=file_map.begin(); + it!=file_map.end(); it++) { if(languages.insert(it->second.language->id()).second) if(it->second.language->final(symbol_table)) @@ -264,8 +264,8 @@ Function: language_filest::interfaces bool language_filest::interfaces( symbol_tablet &symbol_table) { - for(filemapt::iterator it=filemap.begin(); - it!=filemap.end(); it++) + for(file_mapt::iterator it=file_map.begin(); + it!=file_map.end(); it++) { if(it->second.language->interfaces(symbol_table)) return true; @@ -292,9 +292,9 @@ bool language_filest::typecheck_module( { // check module map - modulemapt::iterator it=modulemap.find(module); + module_mapt::iterator it=module_map.find(module); - if(it==modulemap.end()) + if(it==module_map.end()) { error() << "found no file that provides module " << module << eom; return true; diff --git a/src/util/language_file.h b/src/util/language_file.h index b538c24e84f..d3184d48697 100644 --- a/src/util/language_file.h +++ b/src/util/language_file.h @@ -44,7 +44,7 @@ class language_filet void convert_lazy_method( const irep_idt &id, - symbol_tablet &symtab); + symbol_tablet &symbol_table); language_filet(const language_filet &rhs); @@ -58,21 +58,21 @@ class language_filet class language_filest:public messaget { public: - typedef std::map filemapt; - filemapt filemap; + typedef std::map file_mapt; + file_mapt file_map; - // Contains pointers into filemapt! - typedef std::map modulemapt; - modulemapt modulemap; + // Contains pointers into file_mapt! + typedef std::map module_mapt; + module_mapt module_map; // Contains pointers into filemapt! // This is safe-ish as long as this is std::map. - typedef std::map lazymethodmapt; - lazymethodmapt lazymethodmap; + typedef std::map lazy_method_mapt; + lazy_method_mapt lazy_method_map; void clear_files() { - filemap.clear(); + file_map.clear(); } bool parse(); @@ -87,21 +87,24 @@ class language_filest:public messaget bool has_lazy_method(const irep_idt &id) { - return lazymethodmap.count(id); + return lazy_method_map.count(id); } + // The method must have been added to the symbol table and registered + // in lazy_method_map (currently always in language_filest::typecheck) + // for this to be legal. void convert_lazy_method( const irep_idt &id, - symbol_tablet &symtab) + symbol_tablet &symbol_table) { - return lazymethodmap.at(id)->convert_lazy_method(id, symtab); + return lazy_method_map.at(id)->convert_lazy_method(id, symbol_table); } void clear() { - filemap.clear(); - modulemap.clear(); - lazymethodmap.clear(); + file_map.clear(); + module_map.clear(); + lazy_method_map.clear(); } protected: From 145c2d4be896444f5805cefedc64a029ab40d855 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Wed, 15 Feb 2017 11:34:20 +0000 Subject: [PATCH 16/27] Use safe pointers for optional arguments This basic safe-pointer class doesn't do any lifetime management; rather it just enforces explicit null-checks and throws if they don't go as expected. --- .../java_bytecode_convert_method.cpp | 12 ++-- .../java_bytecode_convert_method.h | 9 +-- .../java_bytecode_convert_method_class.h | 9 +-- src/java_bytecode/java_bytecode_language.cpp | 4 +- src/util/safe_pointer.h | 64 +++++++++++++++++++ 5 files changed, 84 insertions(+), 14 deletions(-) create mode 100644 src/util/safe_pointer.h diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index 498f880c965..099ab695481 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -1720,7 +1720,10 @@ codet java_bytecode_convert_methodt::convert_instructions( const auto &field_name=arg0.get_string(ID_component_name); symbol_expr.set_identifier(arg0.get_string(ID_class)+"."+field_name); if(needed_classes && arg0.type().id()==ID_symbol) - needed_classes->insert(to_symbol_type(arg0.type()).get_identifier()); + { + needed_classes->insert( + to_symbol_type(arg0.type()).get_identifier()); + } results[0]=java_bytecode_promotion(symbol_expr); // set $assertionDisabled to false @@ -1739,7 +1742,8 @@ codet java_bytecode_convert_methodt::convert_instructions( const auto &field_name=arg0.get_string(ID_component_name); symbol_expr.set_identifier(arg0.get_string(ID_class)+"."+field_name); if(needed_classes && arg0.type().id()==ID_symbol) - needed_classes->insert(to_symbol_type(arg0.type()).get_identifier()); + needed_classes->insert( + to_symbol_type(arg0.type()).get_identifier()); c=code_assignt(symbol_expr, op[0]); } else if(statement==patternt("?2?")) // i2c etc. @@ -2203,8 +2207,8 @@ void java_bytecode_convert_method( message_handlert &message_handler, bool disable_runtime_checks, size_t max_array_length, - std::vector *needed_methods, - std::set *needed_classes) + safe_pointer > needed_methods, + safe_pointer > needed_classes) { java_bytecode_convert_methodt java_bytecode_convert_method( symbol_table, diff --git a/src/java_bytecode/java_bytecode_convert_method.h b/src/java_bytecode/java_bytecode_convert_method.h index 68b0dd4e0a8..e81881f44e1 100644 --- a/src/java_bytecode/java_bytecode_convert_method.h +++ b/src/java_bytecode/java_bytecode_convert_method.h @@ -11,6 +11,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include #include "java_bytecode_parse_tree.h" @@ -23,8 +24,8 @@ void java_bytecode_convert_method( message_handlert &message_handler, bool disable_runtime_checks, size_t max_array_length, - std::vector *needed_methods, - std::set *needed_classes); + safe_pointer > needed_methods, + safe_pointer > needed_classes); // Must provide both the optional parameters or neither. inline void java_bytecode_convert_method( @@ -42,8 +43,8 @@ inline void java_bytecode_convert_method( message_handler, disable_runtime_checks, max_array_length, - nullptr, - nullptr); + safe_pointer >::create_null(), + safe_pointer >::create_null()); } void java_bytecode_convert_method_lazy( diff --git a/src/java_bytecode/java_bytecode_convert_method_class.h b/src/java_bytecode/java_bytecode_convert_method_class.h index 8b11c831266..0d5476d12f7 100644 --- a/src/java_bytecode/java_bytecode_convert_method_class.h +++ b/src/java_bytecode/java_bytecode_convert_method_class.h @@ -13,6 +13,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include +#include #include #include "java_bytecode_parse_tree.h" #include "java_bytecode_convert_class.h" @@ -31,8 +32,8 @@ class java_bytecode_convert_methodt:public messaget message_handlert &_message_handler, bool _disable_runtime_checks, size_t _max_array_length, - std::vector *_needed_methods, - std::set *_needed_classes): + safe_pointer > _needed_methods, + safe_pointer > _needed_classes): messaget(_message_handler), symbol_table(_symbol_table), disable_runtime_checks(_disable_runtime_checks), @@ -57,8 +58,8 @@ class java_bytecode_convert_methodt:public messaget symbol_tablet &symbol_table; const bool disable_runtime_checks; const size_t max_array_length; - std::vector *needed_methods; - std::set *needed_classes; + safe_pointer > needed_methods; + safe_pointer > needed_classes; irep_idt method_id; irep_idt current_method; diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index 4512ec97709..03b5a51a4a2 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -586,8 +586,8 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( get_message_handler(), disable_runtime_checks, max_user_array_length, - &method_worklist2, - &needed_classes); + safe_pointer >::create_non_null(&method_worklist2), + safe_pointer >::create_non_null(&needed_classes)); gather_virtual_callsites( symbol_table.lookup(mname).value, virtual_callsites); diff --git a/src/util/safe_pointer.h b/src/util/safe_pointer.h new file mode 100644 index 00000000000..47e52ac13fe --- /dev/null +++ b/src/util/safe_pointer.h @@ -0,0 +1,64 @@ +/*******************************************************************\ + +Module: Simple checked pointers + +Author: Chris Smowton, chris@smowton.net + +\*******************************************************************/ + +#ifndef CPROVER_UTIL_SAFE_POINTER_H +#define CPROVER_UTIL_SAFE_POINTER_H + +template class safe_pointer +{ + public: + operator bool() const + { + return !!ptr; + } + + T *get() const + { + assert(ptr && "dereferenced a null safe pointer"); + return ptr; + } + + T &operator*() const + { + return *get(); + } + + T *operator->() const + { + return get(); + } + + static safe_pointer create_null() + { + return safe_pointer(); + } + + static safe_pointer create_non_null( + T *target) + { + assert(target && "initialized safe pointer with null"); + return safe_pointer(target); + } + + static safe_pointer create_maybe_null( + T *target) + { + return safe_pointer(target); + } + + protected: + T *ptr; + + explicit safe_pointer(T *target) : ptr(target) + {} + + safe_pointer() : ptr(nullptr) + {} +}; + +#endif From 8e38c0e9970b6fbe6d8fc70f53e3f92cc6ca671e Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Wed, 15 Feb 2017 12:58:30 +0000 Subject: [PATCH 17/27] Add lazy conversion documentation Also fix some trivial linting errors. No functional changes. --- .../java_bytecode_convert_method.cpp | 2 + src/java_bytecode/java_bytecode_language.cpp | 86 +++++++++++++++++-- src/java_bytecode/java_bytecode_language.h | 4 +- 3 files changed, 82 insertions(+), 10 deletions(-) diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index 099ab695481..5e85451398a 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -1742,8 +1742,10 @@ codet java_bytecode_convert_methodt::convert_instructions( const auto &field_name=arg0.get_string(ID_component_name); symbol_expr.set_identifier(arg0.get_string(ID_class)+"."+field_name); if(needed_classes && arg0.type().id()==ID_symbol) + { needed_classes->insert( to_symbol_type(arg0.type()).get_identifier()); + } c=code_assignt(symbol_expr, op[0]); } else if(statement==patternt("?2?")) // i2c etc. diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index 03b5a51a4a2..e9d5a87186b 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -502,6 +502,7 @@ bool java_bytecode_languaget::typecheck( // that are reachable from this entry point. if(lazy_methods_mode==LAZY_METHODS_MODE_CONTEXT_INSENSITIVE) { + // ci: context-insensitive. if(do_ci_lazy_method_conversion(symbol_table, lazy_methods)) return true; } @@ -514,6 +515,32 @@ bool java_bytecode_languaget::typecheck( return false; } +/*******************************************************************\ + +Function: java_bytecode_languaget::do_ci_lazy_method_conversion + + Inputs: `symbol_table`: global symbol table + `lazy_methods`: map from method names to relevant symbol + and parsed-method objects. + + Outputs: Elaborates lazily-converted methods that may be reachable + starting from the main entry point (usually provided with + the --function command-line option) (side-effect on the + symbol_table). Returns false on success. + + Purpose: Uses a simple context-insensitive ('ci') analysis to + determine which methods may be reachable from the main + entry point. In brief, static methods are reachable if we + find a callsite in another reachable site, while virtual + methods are reachable if we find a virtual callsite + targeting a compatible type *and* a constructor callsite + indicating an object of that type may be instantiated (or + evidence that an object of that type exists before the + main function is entered, such as being passed as a + parameter). + +\*******************************************************************/ + bool java_bytecode_languaget::do_ci_lazy_method_conversion( symbol_tablet &symbol_table, lazy_methodst &lazy_methods) @@ -558,7 +585,7 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( needed_classes); std::set methods_already_populated; - std::vector virtual_callsites; + std::vector virtual_callsites; bool any_new_methods; do @@ -586,8 +613,10 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( get_message_handler(), disable_runtime_checks, max_user_array_length, - safe_pointer >::create_non_null(&method_worklist2), - safe_pointer >::create_non_null(&needed_classes)); + safe_pointer >::create_non_null( + &method_worklist2), + safe_pointer >::create_non_null( + &needed_classes)); gather_virtual_callsites( symbol_table.lookup(mname).value, virtual_callsites); @@ -607,11 +636,15 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( for(const auto &callsite : virtual_callsites) { // This will also create a stub if a virtual callsite has no targets. - get_virtual_method_targets(*callsite, needed_classes, method_worklist2, - symbol_table, ch); + get_virtual_method_targets( + *callsite, + needed_classes, + method_worklist2, + symbol_table, + ch); } - - } while(any_new_methods); + } + while(any_new_methods); // Remove symbols for methods that were declared but never used: symbol_tablet keep_symbols; @@ -640,12 +673,49 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( return false; } -void java_bytecode_languaget::lazy_methods_provided(std::set &methods) const +/*******************************************************************\ + +Function: java_bytecode_languaget::lazy_methods_provided + + Inputs: None + + Outputs: Populates `methods` with the complete list of lazy methods + that are available to convert (those which are valid + parameters for `convert_lazy_method`) + + Purpose: Provide feedback to `language_filest` so that when asked + for a lazy method, it can delegate to this instance of + java_bytecode_languaget. + +\*******************************************************************/ + +void java_bytecode_languaget::lazy_methods_provided( + std::set &methods) const { for(const auto &kv : lazy_methods) methods.insert(kv.first); } +/*******************************************************************\ + +Function: java_bytecode_languaget::convert_lazy_method + + Inputs: `id`: method ID to convert + `symtab`: global symbol table + + Outputs: Amends the symbol table entry for function `id`, which + should be a lazy method provided by this instance of + `java_bytecode_languaget`. It should initially have a nil + value. After this method completes, it will have a value + representing the method body, identical to that produced + using eager method conversion. + + Purpose: Promote a lazy-converted method (one whose type is known + but whose body hasn't been converted) into a fully- + elaborated one. + +\*******************************************************************/ + void java_bytecode_languaget::convert_lazy_method( const irep_idt &id, symbol_tablet &symtab) diff --git a/src/java_bytecode/java_bytecode_language.h b/src/java_bytecode/java_bytecode_language.h index afdd275ddf6..ea177e0226a 100644 --- a/src/java_bytecode/java_bytecode_language.h +++ b/src/java_bytecode/java_bytecode_language.h @@ -24,8 +24,8 @@ enum lazy_methods_modet }; typedef std::pair< - const symbolt*, - const java_bytecode_parse_treet::methodt*> + const symbolt *, + const java_bytecode_parse_treet::methodt *> lazy_method_valuet; typedef std::map lazy_methodst; From 73e0479edb181b9a6a4ff7bb8e4113eff37844c6 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Wed, 15 Feb 2017 17:28:55 +0000 Subject: [PATCH 18/27] Improve failed test printer --- regression/failed-tests-printer.pl | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/regression/failed-tests-printer.pl b/regression/failed-tests-printer.pl index 832ba2d2c64..40767185d5c 100755 --- a/regression/failed-tests-printer.pl +++ b/regression/failed-tests-printer.pl @@ -4,23 +4,25 @@ open LOG,") { chomp; if (/^Test '(.+)'/) { $current_test = $1; - $ignore = 0; - } elsif (1 == $ignore) { - next; + $printed_this_test = 0; } elsif (/\[FAILED\]\s*$/) { - $ignore = 1; - print "Failed test: $current_test\n"; - my $outf = `sed -n '2p' $current_test/test.desc`; - $outf =~ s/\..*$/.out/; - system("cat $current_test/$outf"); - print "\n\n"; + if(0 == $printed_this_test) { + $printed_this_test = 1; + print "\n\n"; + print "Failed test: $current_test\n"; + my $outf = `sed -n '2p' $current_test/test.desc`; + $outf =~ s/\..*$/.out/; + system("cat $current_test/$outf"); + print "\n\nFailed test.desc lines:\n"; + } + print "$_\n"; } } From 6ef7327177bd8f6090f632251d7fa90bf29bb4aa Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Wed, 5 Oct 2016 11:45:28 +0100 Subject: [PATCH 19/27] Conversion utf8 to utf16 and pretty-printing of Java strings Added two functions for utf8 to utf16 conversion function depending on whether we use little or big endian. Added a function utf16_little_endian_to_ascii to display nicely java strings as an ascii sequence. --- src/util/unicode.cpp | 40 ++++++++++++++++++++++++++++++++++++++++ src/util/unicode.h | 4 ++++ 2 files changed, 44 insertions(+) diff --git a/src/util/unicode.cpp b/src/util/unicode.cpp index 82acd36d4ee..a98a895e6b8 100644 --- a/src/util/unicode.cpp +++ b/src/util/unicode.cpp @@ -7,6 +7,8 @@ Author: Daniel Kroening, kroening@kroening.com \*******************************************************************/ #include +#include +#include #include "unicode.h" @@ -258,3 +260,41 @@ const char **narrow_argv(int argc, const wchar_t **argv_wide) return argv_narrow; } + +std::wstring utf8_to_utf16_big_endian(const std::string& in) +{ + std::wstring_convert > converter; + return converter.from_bytes(in); +} + +std::wstring utf8_to_utf16_little_endian(const std::string& in) +{ + const std::codecvt_mode mode=std::codecvt_mode::little_endian; + + // default largest value codecvt_utf8_utf16 reads without error is 0x10ffff + // see: http://en.cppreference.com/w/cpp/locale/codecvt_utf8_utf16 + const unsigned long maxcode=0x10ffff; + + typedef std::codecvt_utf8_utf16 codecvt_utf8_utf16t; + std::wstring_convert converter; + return converter.from_bytes(in); +} + +std::string utf16_little_endian_to_ascii(const std::wstring& in) +{ + std::string result; + std::locale loc; + for(const auto c : in) + { + if(c<=255 && isprint(c, loc)) + result+=(unsigned char)c; + else + { + result+="\\u"; + char hex[5]; + snprintf(hex, sizeof(hex), "%04x", (wchar_t)c); + result+=hex; + } + } + return result; +} diff --git a/src/util/unicode.h b/src/util/unicode.h index edad95039f0..1e5040344d0 100644 --- a/src/util/unicode.h +++ b/src/util/unicode.h @@ -22,6 +22,10 @@ std::wstring widen(const std::string &s); std::string utf32_to_utf8(const std::basic_string &s); std::string utf16_to_utf8(const std::basic_string &s); +std::wstring utf8_to_utf16_big_endian(const std::string&); +std::wstring utf8_to_utf16_little_endian(const std::string&); +std::string utf16_little_endian_to_ascii(const std::wstring& in); + const char **narrow_argv(int argc, const wchar_t **argv_wide); #endif // CPROVER_UTIL_UNICODE_H From 0541fcdc697c44085e37d743a5ca2f5b5291bf06 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 13 Feb 2017 13:03:11 +0000 Subject: [PATCH 20/27] Require libstdc++ from g++-5 to build with Clang --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 6af73e31878..0e1e3abba67 100644 --- a/.travis.yml +++ b/.travis.yml @@ -28,6 +28,7 @@ matrix: packages: - libwww-perl - clang-3.7 + - libstdc++-5-dev - libubsan0 before_install: - mkdir bin ; ln -s /usr/bin/clang-3.7 bin/gcc From e433cd45296e1f6da87dcd542c683eabf5f4cbea Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 13 Feb 2017 14:22:02 +0000 Subject: [PATCH 21/27] Rework utf16-to-ascii to avoid snprintf There isn't a standard snprintf in MS toolchains until VS 2015, so use C++ iostreams stuff instead. --- src/util/unicode.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/util/unicode.cpp b/src/util/unicode.cpp index a98a895e6b8..34727ab0816 100644 --- a/src/util/unicode.cpp +++ b/src/util/unicode.cpp @@ -9,6 +9,8 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include +#include +#include #include "unicode.h" @@ -282,19 +284,20 @@ std::wstring utf8_to_utf16_little_endian(const std::string& in) std::string utf16_little_endian_to_ascii(const std::wstring& in) { - std::string result; + std::ostringstream result; std::locale loc; for(const auto c : in) { if(c<=255 && isprint(c, loc)) - result+=(unsigned char)c; + result << (unsigned char)c; else { - result+="\\u"; - char hex[5]; - snprintf(hex, sizeof(hex), "%04x", (wchar_t)c); - result+=hex; + result << "\\u" + << std::hex + << std::setw(4) + << std::setfill('0') + << (unsigned int)c; } } - return result; + return result.str(); } From 927d54a914dd73e558544070ac40d3cfe7d67d67 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Wed, 22 Feb 2017 10:32:42 +0000 Subject: [PATCH 22/27] Style unicode.cpp/h No functional changes intended --- src/util/unicode.cpp | 37 +++++++++++++++++++++++++++++++++++++ src/util/unicode.h | 6 +++--- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/src/util/unicode.cpp b/src/util/unicode.cpp index 34727ab0816..1e280783aff 100644 --- a/src/util/unicode.cpp +++ b/src/util/unicode.cpp @@ -263,12 +263,36 @@ const char **narrow_argv(int argc, const wchar_t **argv_wide) return argv_narrow; } +/*******************************************************************\ + +Function: utf8_to_utf16_big_endian + + Inputs: String in UTF-8 format + + Outputs: String in UTF-16BE format + + Purpose: Note this requires g++-5 libstdc++ / libc++ / MSVC2010+ + +\*******************************************************************/ + std::wstring utf8_to_utf16_big_endian(const std::string& in) { std::wstring_convert > converter; return converter.from_bytes(in); } +/*******************************************************************\ + +Function: utf8_to_utf16_little_endian + + Inputs: String in UTF-8 format + + Outputs: String in UTF-16LE format + + Purpose: Note this requires g++-5 libstdc++ / libc++ / MSVC2010+ + +\*******************************************************************/ + std::wstring utf8_to_utf16_little_endian(const std::string& in) { const std::codecvt_mode mode=std::codecvt_mode::little_endian; @@ -282,6 +306,19 @@ std::wstring utf8_to_utf16_little_endian(const std::string& in) return converter.from_bytes(in); } +/*******************************************************************\ + +Function: utf16_little_endian_to_ascii + + Inputs: String in UTF-16LE format + + Outputs: String in US-ASCII format, with \uxxxx escapes for other + characters + + Purpose: + +\*******************************************************************/ + std::string utf16_little_endian_to_ascii(const std::wstring& in) { std::ostringstream result; diff --git a/src/util/unicode.h b/src/util/unicode.h index 1e5040344d0..c4bcab617d4 100644 --- a/src/util/unicode.h +++ b/src/util/unicode.h @@ -22,9 +22,9 @@ std::wstring widen(const std::string &s); std::string utf32_to_utf8(const std::basic_string &s); std::string utf16_to_utf8(const std::basic_string &s); -std::wstring utf8_to_utf16_big_endian(const std::string&); -std::wstring utf8_to_utf16_little_endian(const std::string&); -std::string utf16_little_endian_to_ascii(const std::wstring& in); +std::wstring utf8_to_utf16_big_endian(const std::string &); +std::wstring utf8_to_utf16_little_endian(const std::string &); +std::string utf16_little_endian_to_ascii(const std::wstring &in); const char **narrow_argv(int argc, const wchar_t **argv_wide); From 3cb7bca6ff90a8eab25759edba4301d0f2ebfd54 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 14 Feb 2017 16:17:58 +0000 Subject: [PATCH 23/27] Cleaning dependencies of the string solver `src/util` is a more appropriate place for the `string_exprt` class We changed the constructor with one type argument to be similar to other `exprt` constructors. This constructor was used to generate a struct with fresh symbols but this is no longer the case, so we use a function fresh string instead. Dependencies in the solver that were not needed have been removed. For some functions this requieres us to now pass the string type as argument. --- .../refinement/refined_string_type.cpp | 11 +- src/solvers/refinement/refined_string_type.h | 32 +--- src/solvers/refinement/string_constraint.h | 14 +- .../refinement/string_constraint_generator.h | 57 +++---- ...tring_constraint_generator_code_points.cpp | 36 ++-- ...string_constraint_generator_comparison.cpp | 41 +++-- .../string_constraint_generator_concat.cpp | 23 ++- .../string_constraint_generator_constants.cpp | 18 +- .../string_constraint_generator_indexof.cpp | 52 +++--- .../string_constraint_generator_insert.cpp | 19 ++- .../string_constraint_generator_main.cpp | 161 ++++++++---------- .../string_constraint_generator_testing.cpp | 23 +-- ...ng_constraint_generator_transformation.cpp | 87 ++++++---- .../string_constraint_generator_valueof.cpp | 128 ++++++++------ src/solvers/refinement/string_expr.cpp | 55 ------ src/solvers/refinement/string_refinement.cpp | 59 ++++--- src/solvers/refinement/string_refinement.h | 5 +- .../refinement => util}/string_expr.h | 56 +++--- 18 files changed, 411 insertions(+), 466 deletions(-) delete mode 100644 src/solvers/refinement/string_expr.cpp rename src/{solvers/refinement => util}/string_expr.h (71%) diff --git a/src/solvers/refinement/refined_string_type.cpp b/src/solvers/refinement/refined_string_type.cpp index cfe44ba4c29..012d30eb8de 100644 --- a/src/solvers/refinement/refined_string_type.cpp +++ b/src/solvers/refinement/refined_string_type.cpp @@ -10,10 +10,10 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com \*******************************************************************/ -#include -#include #include +#include "refined_string_type.h" + /*******************************************************************\ Constructor: refined_string_typet::refined_string_typet @@ -22,11 +22,12 @@ Constructor: refined_string_typet::refined_string_typet \*******************************************************************/ -refined_string_typet::refined_string_typet(typet char_type) +refined_string_typet::refined_string_typet( + const typet &index_type, const typet &char_type) { - infinity_exprt infinite_index(refined_string_typet::index_type()); + infinity_exprt infinite_index(index_type); array_typet char_array(char_type, infinite_index); - components().emplace_back("length", refined_string_typet::index_type()); + components().emplace_back("length", index_type); components().emplace_back("content", char_array); } diff --git a/src/solvers/refinement/refined_string_type.h b/src/solvers/refinement/refined_string_type.h index 5ad67cc2c31..3ecc0ac3a9f 100644 --- a/src/solvers/refinement/refined_string_type.h +++ b/src/solvers/refinement/refined_string_type.h @@ -17,14 +17,12 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #include #include #include -#include -#include // Internal type used for string refinement class refined_string_typet: public struct_typet { public: - explicit refined_string_typet(typet char_type); + refined_string_typet(const typet &index_type, const typet &char_type); // Type for the content (list of characters) of a string const array_typet &get_content_type() const @@ -33,7 +31,7 @@ class refined_string_typet: public struct_typet return to_array_type(components()[1].type()); } - const typet &get_char_type() + const typet &get_char_type() const { assert(components().size()==2); return components()[0].type(); @@ -44,16 +42,6 @@ class refined_string_typet: public struct_typet return get_content_type().size().type(); } - static typet index_type() - { - return signed_int_type(); - } - - static typet java_index_type() - { - return java_int_type(); - } - // For C the unrefined string type is __CPROVER_string, for java it is a // pointer to a struct with tag java.lang.String @@ -67,22 +55,6 @@ class refined_string_typet: public struct_typet static bool is_java_char_sequence_type(const typet &type); - static typet get_char_type(const exprt &expr) - { - if(is_c_string_type(expr.type())) - return char_type(); - else - return java_char_type(); - } - - static typet get_index_type(const exprt &expr) - { - if(is_c_string_type(expr.type())) - return index_type(); - else - return java_index_type(); - } - static bool is_unrefined_string_type(const typet &type) { return ( diff --git a/src/solvers/refinement/string_constraint.h b/src/solvers/refinement/string_constraint.h index 1f5a56ae9b7..61a6f3ebdd6 100644 --- a/src/solvers/refinement/string_constraint.h +++ b/src/solvers/refinement/string_constraint.h @@ -111,7 +111,7 @@ class string_not_contains_constraintt: public exprt { public: // string_not contains_constraintt are formula of the form: - // forall x in [lb,ub[. p(x) => exists y in [lb,ub[. s1[x+y] != s2[y] + // forall x in [lb,ub[. p(x) => exists y in [lb,ub[. s0[x+y] != s1[y] string_not_contains_constraintt( exprt univ_lower_bound, @@ -119,8 +119,8 @@ class string_not_contains_constraintt: public exprt exprt premise, exprt exists_bound_inf, exprt exists_bound_sup, - exprt s0, - exprt s1) : + const string_exprt &s0, + const string_exprt &s1): exprt(ID_string_not_contains_constraint) { copy_to_operands(univ_lower_bound, univ_bound_sup, premise); @@ -153,14 +153,14 @@ class string_not_contains_constraintt: public exprt return operands()[4]; } - const exprt &s0() const + const string_exprt &s0() const { - return operands()[5]; + return to_string_expr(operands()[5]); } - const exprt &s1() const + const string_exprt &s1() const { - return operands()[6]; + return to_string_expr(operands()[6]); } }; diff --git a/src/solvers/refinement/string_constraint_generator.h b/src/solvers/refinement/string_constraint_generator.h index a78f1473510..2a56f09fddd 100644 --- a/src/solvers/refinement/string_constraint_generator.h +++ b/src/solvers/refinement/string_constraint_generator.h @@ -13,9 +13,8 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #ifndef CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_GENERATOR_H #define CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_GENERATOR_H -#include +#include #include -#include #include class string_constraint_generatort @@ -26,7 +25,7 @@ class string_constraint_generatort // to the axiom list. string_constraint_generatort(): - mode(ID_unknown), refined_string_type(char_type()) + mode(ID_unknown) { } void set_mode(irep_idt _mode) @@ -34,25 +33,10 @@ class string_constraint_generatort // only C and java modes supported assert((_mode==ID_java) || (_mode==ID_C)); mode=_mode; - refined_string_type=refined_string_typet(get_char_type()); } irep_idt &get_mode() { return mode; } - typet get_char_type() const; - typet get_index_type() const - { - if(mode==ID_java) - return refined_string_typet::java_index_type(); - assert(mode==ID_C); - return refined_string_typet::index_type(); - } - - const refined_string_typet &get_refined_string_type() const - { - return refined_string_type; - } - // Axioms are of three kinds: universally quantified string constraint, // not contains string constraints and simple formulas. std::list axioms; @@ -73,9 +57,14 @@ class string_constraint_generatort return index_exprt(witness.at(c), univ_val); } - symbol_exprt fresh_exist_index(const irep_idt &prefix); - symbol_exprt fresh_univ_index(const irep_idt &prefix); + static unsigned next_symbol_id; + + static symbol_exprt fresh_symbol( + const irep_idt &prefix, const typet &type=bool_typet()); + symbol_exprt fresh_exist_index(const irep_idt &prefix, const typet &type); + symbol_exprt fresh_univ_index(const irep_idt &prefix, const typet &type); symbol_exprt fresh_boolean(const irep_idt &prefix); + string_exprt fresh_string(const refined_string_typet &type); // We maintain a map from symbols to strings. std::map symbol_to_string; @@ -95,7 +84,7 @@ class string_constraint_generatort exprt add_axioms_for_function_application( const function_application_exprt &expr); - constant_exprt constant_char(int i) const; + static constant_exprt constant_char(int i, const typet &char_type); private: // The integer with the longest string is Integer.MIN_VALUE which is -2^31, @@ -107,7 +96,7 @@ class string_constraint_generatort const std::size_t MAX_FLOAT_LENGTH=15; const std::size_t MAX_DOUBLE_LENGTH=30; - irep_idt extract_java_string(const symbol_exprt &s) const; + static irep_idt extract_java_string(const symbol_exprt &s); exprt axiom_for_is_positive_index(const exprt &x); @@ -123,7 +112,6 @@ class string_constraint_generatort exprt add_axioms_for_contains(const function_application_exprt &f); exprt add_axioms_for_equals(const function_application_exprt &f); exprt add_axioms_for_equals_ignore_case(const function_application_exprt &f); - exprt add_axioms_for_data(const function_application_exprt &f); // Add axioms corresponding to the String.hashCode java function // The specification is partial: the actual value is not actually computed @@ -153,7 +141,8 @@ class string_constraint_generatort string_exprt add_axioms_for_concat_float(const function_application_exprt &f); string_exprt add_axioms_for_concat_code_point( const function_application_exprt &f); - string_exprt add_axioms_for_constant(irep_idt sval); + string_exprt add_axioms_for_constant( + irep_idt sval, const refined_string_typet &ref_type); string_exprt add_axioms_for_delete( const string_exprt &str, const exprt &start, const exprt &end); string_exprt add_axioms_for_delete(const function_application_exprt &expr); @@ -173,15 +162,19 @@ class string_constraint_generatort const function_application_exprt &f); string_exprt add_axioms_from_literal(const function_application_exprt &f); string_exprt add_axioms_from_int(const function_application_exprt &f); - string_exprt add_axioms_from_int(const exprt &i, size_t max_size); - string_exprt add_axioms_from_int_hex(const exprt &i); + string_exprt add_axioms_from_int( + const exprt &i, size_t max_size, const refined_string_typet &ref_type); + string_exprt add_axioms_from_int_hex( + const exprt &i, const refined_string_typet &ref_type); string_exprt add_axioms_from_int_hex(const function_application_exprt &f); string_exprt add_axioms_from_long(const function_application_exprt &f); string_exprt add_axioms_from_long(const exprt &i, size_t max_size); string_exprt add_axioms_from_bool(const function_application_exprt &f); - string_exprt add_axioms_from_bool(const exprt &i); + string_exprt add_axioms_from_bool( + const exprt &i, const refined_string_typet &ref_type); string_exprt add_axioms_from_char(const function_application_exprt &f); - string_exprt add_axioms_from_char(const exprt &i); + string_exprt add_axioms_from_char( + const exprt &i, const refined_string_typet &ref_type); string_exprt add_axioms_from_char_array(const function_application_exprt &f); string_exprt add_axioms_from_char_array( const exprt &length, @@ -257,7 +250,8 @@ class string_constraint_generatort // TODO: not working correctly at the moment string_exprt add_axioms_for_value_of(const function_application_exprt &f); - string_exprt add_axioms_for_code_point(const exprt &code_point); + string_exprt add_axioms_for_code_point( + const exprt &code_point, const refined_string_typet &ref_type); string_exprt add_axioms_for_java_char_array(const exprt &char_array); string_exprt add_axioms_for_if(const if_exprt &expr); exprt add_axioms_for_char_literal(const function_application_exprt &f); @@ -287,9 +281,6 @@ class string_constraint_generatort // Tells which language is used. C and Java are supported irep_idt mode; - // Type of strings used in the refinement - refined_string_typet refined_string_type; - // assert that the number of argument is equal to nb and extract them static const function_application_exprt::argumentst &args( const function_application_exprt &expr, size_t nb) @@ -299,7 +290,7 @@ class string_constraint_generatort return args; } - exprt int_of_hex_char(exprt chr) const; + exprt int_of_hex_char(const exprt &chr) const; exprt is_high_surrogate(const exprt &chr) const; exprt is_low_surrogate(const exprt &chr) const; static exprt character_equals_ignore_case( diff --git a/src/solvers/refinement/string_constraint_generator_code_points.cpp b/src/solvers/refinement/string_constraint_generator_code_points.cpp index 807587cbc45..7c035a0d3e4 100644 --- a/src/solvers/refinement/string_constraint_generator_code_points.cpp +++ b/src/solvers/refinement/string_constraint_generator_code_points.cpp @@ -23,9 +23,9 @@ Function: string_constraint_generatort::add_axioms_for_code_point \*******************************************************************/ string_exprt string_constraint_generatort::add_axioms_for_code_point( - const exprt &code_point) + const exprt &code_point, const refined_string_typet &ref_type) { - string_exprt res(get_char_type()); + string_exprt res=fresh_string(ref_type); const typet &type=code_point.type(); assert(type.id()==ID_signedbv); @@ -50,7 +50,7 @@ string_exprt string_constraint_generatort::add_axioms_for_code_point( implies_exprt a2(not_exprt(small), res.axiom_for_has_length(2)); axioms.push_back(a2); - typecast_exprt code_point_as_char(code_point, get_char_type()); + typecast_exprt code_point_as_char(code_point, ref_type.get_char_type()); implies_exprt a3(small, equal_exprt(res[0], code_point_as_char)); axioms.push_back(a3); @@ -58,13 +58,13 @@ string_exprt string_constraint_generatort::add_axioms_for_code_point( hexD800, div_exprt(minus_exprt(code_point, hex010000), hex0400)); implies_exprt a4( not_exprt(small), - equal_exprt(res[0], typecast_exprt(first_char, get_char_type()))); + equal_exprt(res[0], typecast_exprt(first_char, ref_type.get_char_type()))); axioms.push_back(a4); plus_exprt second_char(hexDC00, mod_exprt(code_point, hex0400)); implies_exprt a5( not_exprt(small), - equal_exprt(res[1], typecast_exprt(second_char, get_char_type()))); + equal_exprt(res[1], typecast_exprt(second_char, ref_type.get_char_type()))); axioms.push_back(a5); return res; @@ -88,8 +88,8 @@ Function: string_constraint_generatort::is_high_surrogate exprt string_constraint_generatort::is_high_surrogate(const exprt &chr) const { return and_exprt( - binary_relation_exprt(chr, ID_ge, constant_char(0xD800)), - binary_relation_exprt(chr, ID_le, constant_char(0xDBFF))); + binary_relation_exprt(chr, ID_ge, constant_char(0xD800, chr.type())), + binary_relation_exprt(chr, ID_le, constant_char(0xDBFF, chr.type()))); } /*******************************************************************\ @@ -110,8 +110,8 @@ Function: string_constraint_generatort::is_low_surrogate exprt string_constraint_generatort::is_low_surrogate(const exprt &chr) const { return and_exprt( - binary_relation_exprt(chr, ID_ge, constant_char(0xDC00)), - binary_relation_exprt(chr, ID_le, constant_char(0xDFFF))); + binary_relation_exprt(chr, ID_ge, constant_char(0xDC00, chr.type())), + binary_relation_exprt(chr, ID_le, constant_char(0xDFFF, chr.type()))); } /*******************************************************************\ @@ -163,8 +163,8 @@ exprt string_constraint_generatort::add_axioms_for_code_point_at( string_exprt str=add_axioms_for_string_expr(args(f, 2)[0]); const exprt &pos=args(f, 2)[1]; - symbol_exprt result=string_exprt::fresh_symbol("char", return_type); - exprt index1=from_integer(1, get_index_type()); + symbol_exprt result=fresh_symbol("char", return_type); + exprt index1=from_integer(1, str.length().type()); const exprt &char1=str[pos]; const exprt &char2=str[plus_exprt(pos, index1)]; exprt char1_as_int=typecast_exprt(char1, return_type); @@ -198,13 +198,13 @@ exprt string_constraint_generatort::add_axioms_for_code_point_before( assert(args.size()==2); typet return_type=f.type(); assert(return_type.id()==ID_signedbv); - symbol_exprt result=string_exprt::fresh_symbol("char", return_type); + symbol_exprt result=fresh_symbol("char", return_type); string_exprt str=add_axioms_for_string_expr(args[0]); const exprt &char1= - str[minus_exprt(args[1], from_integer(2, get_index_type()))]; + str[minus_exprt(args[1], from_integer(2, str.length().type()))]; const exprt &char2= - str[minus_exprt(args[1], from_integer(1, get_index_type()))]; + str[minus_exprt(args[1], from_integer(1, str.length().type()))]; exprt char1_as_int=typecast_exprt(char1, return_type); exprt char2_as_int=typecast_exprt(char2, return_type); @@ -238,10 +238,9 @@ exprt string_constraint_generatort::add_axioms_for_code_point_count( const exprt &begin=args(f, 3)[1]; const exprt &end=args(f, 3)[2]; const typet &return_type=f.type(); - symbol_exprt result= - string_exprt::fresh_symbol("code_point_count", return_type); + symbol_exprt result=fresh_symbol("code_point_count", return_type); minus_exprt length(end, begin); - div_exprt minimum(length, from_integer(2, get_index_type())); + div_exprt minimum(length, from_integer(2, length.type())); axioms.push_back(binary_relation_exprt(result, ID_le, length)); axioms.push_back(binary_relation_exprt(result, ID_ge, minimum)); @@ -270,8 +269,7 @@ exprt string_constraint_generatort::add_axioms_for_offset_by_code_point( const exprt &index=args(f, 3)[1]; const exprt &offset=args(f, 3)[2]; const typet &return_type=f.type(); - symbol_exprt result= - string_exprt::fresh_symbol("offset_by_code_point", return_type); + symbol_exprt result=fresh_symbol("offset_by_code_point", return_type); exprt minimum=plus_exprt(index, offset); exprt maximum=plus_exprt(index, plus_exprt(offset, offset)); diff --git a/src/solvers/refinement/string_constraint_generator_comparison.cpp b/src/solvers/refinement/string_constraint_generator_comparison.cpp index 1cec8cf5442..321282ee9c1 100644 --- a/src/solvers/refinement/string_constraint_generator_comparison.cpp +++ b/src/solvers/refinement/string_constraint_generator_comparison.cpp @@ -31,6 +31,7 @@ exprt string_constraint_generatort::add_axioms_for_equals( string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); string_exprt s2=add_axioms_for_string_expr(args(f, 2)[1]); + typet index_type=s1.length().type(); // We want to write: // eq <=> (s1.length=s2.length &&forall i |s1|=|s2| @@ -130,14 +133,15 @@ exprt string_constraint_generatort::add_axioms_for_equals_ignore_case( implies_exprt a1(eq, s1.axiom_for_has_same_length_as(s2)); axioms.push_back(a1); - symbol_exprt qvar=fresh_univ_index("QA_equal_ignore_case"); + symbol_exprt qvar=fresh_univ_index("QA_equal_ignore_case", index_type); exprt constr2=character_equals_ignore_case( s1[qvar], s2[qvar], char_a, char_A, char_Z); string_constraintt a2(qvar, s1.length(), eq, constr2); axioms.push_back(a2); - symbol_exprt witness=fresh_exist_index("witness_unequal_ignore_case"); - exprt zero=from_integer(0, get_index_type()); + symbol_exprt witness=fresh_exist_index( + "witness_unequal_ignore_case", index_type); + exprt zero=from_integer(0, witness.type()); and_exprt bound_witness( binary_relation_exprt(witness, ID_lt, s1.length()), binary_relation_exprt(witness, ID_ge, zero)); @@ -172,12 +176,13 @@ exprt string_constraint_generatort::add_axioms_for_hash_code( { string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); typet return_type=f.type(); + typet index_type=str.length().type(); // initialisation of the missing pool variable std::map::iterator it; for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) if(hash.find(it->second)==hash.end()) - hash[it->second]=string_exprt::fresh_symbol("hash", return_type); + hash[it->second]=fresh_symbol("hash", return_type); // for each string s. either: // c1: hash(str)=hash(s) @@ -187,7 +192,7 @@ exprt string_constraint_generatort::add_axioms_for_hash_code( // WARNING: the specification may be incomplete for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) { - symbol_exprt i=fresh_exist_index("index_hash"); + symbol_exprt i=fresh_exist_index("index_hash", index_type); equal_exprt c1(hash[it->second], hash[str]); not_exprt c2(equal_exprt(it->second.length(), str.length())); and_exprt c3( @@ -220,7 +225,8 @@ exprt string_constraint_generatort::add_axioms_for_compare_to( string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); string_exprt s2=add_axioms_for_string_expr(args(f, 2)[1]); const typet &return_type=f.type(); - symbol_exprt res=string_exprt::fresh_symbol("compare_to", return_type); + symbol_exprt res=fresh_symbol("compare_to", return_type); + typet index_type=s1.length().type(); // In the lexicographic comparison, x is the first point where the two // strings differ. @@ -241,11 +247,11 @@ exprt string_constraint_generatort::add_axioms_for_compare_to( implies_exprt a1(res_null, s1.axiom_for_has_same_length_as(s2)); axioms.push_back(a1); - symbol_exprt i=fresh_univ_index("QA_compare_to"); + symbol_exprt i=fresh_univ_index("QA_compare_to", index_type); string_constraintt a2(i, s1.length(), res_null, equal_exprt(s1[i], s2[i])); axioms.push_back(a2); - symbol_exprt x=fresh_exist_index("index_compare_to"); + symbol_exprt x=fresh_exist_index("index_compare_to", index_type); equal_exprt ret_char_diff( res, minus_exprt( @@ -300,20 +306,19 @@ symbol_exprt string_constraint_generatort::add_axioms_for_intern( { string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); const typet &return_type=f.type(); + typet index_type=str.length().type(); // initialisation of the missing pool variable std::map::iterator it; for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) if(pool.find(it->second)==pool.end()) - pool[it->second]=string_exprt::fresh_symbol("pool", return_type); + pool[it->second]=fresh_symbol("pool", return_type); // intern(str)=s_0 || s_1 || ... // for each string s. // intern(str)=intern(s) || |str|!=|s| // || (|str|==|s| &&exists i<|s|. s[i]!=str[i]) - // symbol_exprt intern=string_exprt::fresh_symbol("intern",return_type); - exprt disj=false_exprt(); for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) disj=or_exprt( @@ -326,7 +331,7 @@ symbol_exprt string_constraint_generatort::add_axioms_for_intern( for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) if(it->second!=str) { - symbol_exprt i=fresh_exist_index("index_intern"); + symbol_exprt i=fresh_exist_index("index_intern", index_type); axioms.push_back( or_exprt( equal_exprt(pool[it->second], pool[str]), diff --git a/src/solvers/refinement/string_constraint_generator_concat.cpp b/src/solvers/refinement/string_constraint_generator_concat.cpp index c6a1a7e52e7..a7d9c4921c4 100644 --- a/src/solvers/refinement/string_constraint_generator_concat.cpp +++ b/src/solvers/refinement/string_constraint_generator_concat.cpp @@ -25,7 +25,8 @@ Function: string_constraint_generatort::add_axioms_for_concat string_exprt string_constraint_generatort::add_axioms_for_concat( const string_exprt &s1, const string_exprt &s2) { - string_exprt res(get_char_type()); + const refined_string_typet &ref_type=to_refined_string_type(s1.type()); + string_exprt res=fresh_string(ref_type); // We add axioms: // a1 : |res|=|s1|+|s2| @@ -40,11 +41,11 @@ string_exprt string_constraint_generatort::add_axioms_for_concat( axioms.push_back(s1.axiom_for_is_shorter_than(res)); axioms.push_back(s2.axiom_for_is_shorter_than(res)); - symbol_exprt idx=fresh_univ_index("QA_index_concat"); + symbol_exprt idx=fresh_univ_index("QA_index_concat", res.length().type()); string_constraintt a4(idx, s1.length(), equal_exprt(s1[idx], res[idx])); axioms.push_back(a4); - symbol_exprt idx2=fresh_univ_index("QA_index_concat2"); + symbol_exprt idx2=fresh_univ_index("QA_index_concat2", res.length().type()); equal_exprt res_eq(s2[idx2], res[plus_exprt(idx2, s1.length())]); string_constraintt a5(idx2, s2.length(), res_eq); axioms.push_back(a5); @@ -93,8 +94,10 @@ Function: string_constraint_generatort::add_axioms_for_concat_int string_exprt string_constraint_generatort::add_axioms_for_concat_int( const function_application_exprt &f) { + const refined_string_typet &ref_type=to_refined_string_type(f.type()); string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_from_int(args(f, 2)[1], MAX_INTEGER_LENGTH); + string_exprt s2=add_axioms_from_int( + args(f, 2)[1], MAX_INTEGER_LENGTH, ref_type); return add_axioms_for_concat(s1, s2); } @@ -114,8 +117,9 @@ Function: string_constraint_generatort::add_axioms_for_long string_exprt string_constraint_generatort::add_axioms_for_concat_long( const function_application_exprt &f) { + const refined_string_typet &ref_type=to_refined_string_type(f.type()); string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_from_int(args(f, 2)[1], MAX_LONG_LENGTH); + string_exprt s2=add_axioms_from_int(args(f, 2)[1], MAX_LONG_LENGTH, ref_type); return add_axioms_for_concat(s1, s2); } @@ -135,7 +139,8 @@ string_exprt string_constraint_generatort::add_axioms_for_concat_bool( const function_application_exprt &f) { string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_from_bool(args(f, 2)[1]); + const refined_string_typet &ref_type=to_refined_string_type(s1.type()); + string_exprt s2=add_axioms_from_bool(args(f, 2)[1], ref_type); return add_axioms_for_concat(s1, s2); } @@ -155,7 +160,8 @@ string_exprt string_constraint_generatort::add_axioms_for_concat_char( const function_application_exprt &f) { string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_from_char(args(f, 2)[1]); + const refined_string_typet &ref_type=to_refined_string_type(s1.type()); + string_exprt s2=add_axioms_from_char(args(f, 2)[1], ref_type); return add_axioms_for_concat(s1, s2); } @@ -217,6 +223,7 @@ string_exprt string_constraint_generatort::add_axioms_for_concat_code_point( const function_application_exprt &f) { string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_for_code_point(args(f, 2)[1]); + const refined_string_typet &ref_type=to_refined_string_type(s1.type()); + string_exprt s2=add_axioms_for_code_point(args(f, 2)[1], ref_type); return add_axioms_for_concat(s1, s2); } diff --git a/src/solvers/refinement/string_constraint_generator_constants.cpp b/src/solvers/refinement/string_constraint_generator_constants.cpp index aa3466a2e57..90769747949 100644 --- a/src/solvers/refinement/string_constraint_generator_constants.cpp +++ b/src/solvers/refinement/string_constraint_generator_constants.cpp @@ -25,7 +25,7 @@ Function: string_constraint_generatort::extract_java_string \*******************************************************************/ irep_idt string_constraint_generatort::extract_java_string( - const symbol_exprt &s) const + const symbol_exprt &s) { std::string tmp=id2string(s.get_identifier()); std::string prefix("java::java.lang.String.Literal."); @@ -48,9 +48,9 @@ Function: string_constraint_generatort::add_axioms_for_constant \*******************************************************************/ string_exprt string_constraint_generatort::add_axioms_for_constant( - irep_idt sval) + irep_idt sval, const refined_string_typet &ref_type) { - string_exprt res(get_char_type()); + string_exprt res=fresh_string(ref_type); std::string c_str=id2string(sval); std::wstring str; @@ -65,13 +65,13 @@ string_exprt string_constraint_generatort::add_axioms_for_constant( for(std::size_t i=0; i str[n]!=c // a5 : forall m, from_index<=n<|str|. !contains => str[m]!=c - exprt minus1=from_integer(-1, get_index_type()); + exprt minus1=from_integer(-1, index_type); and_exprt a1( binary_relation_exprt(index, ID_ge, minus1), binary_relation_exprt(index, ID_lt, str.length())); @@ -51,12 +52,12 @@ exprt string_constraint_generatort::add_axioms_for_index_of( equal_exprt(str[index], c))); axioms.push_back(a3); - symbol_exprt n=fresh_univ_index("QA_index_of"); + symbol_exprt n=fresh_univ_index("QA_index_of", index_type); string_constraintt a4( n, from_index, index, contains, not_exprt(equal_exprt(str[n], c))); axioms.push_back(a4); - symbol_exprt m=fresh_univ_index("QA_index_of"); + symbol_exprt m=fresh_univ_index("QA_index_of", index_type); string_constraintt a5( m, from_index, @@ -86,7 +87,8 @@ exprt string_constraint_generatort::add_axioms_for_index_of_string( const string_exprt &substring, const exprt &from_index) { - symbol_exprt offset=fresh_exist_index("index_of"); + const typet &index_type=str.length().type(); + symbol_exprt offset=fresh_exist_index("index_of", index_type); symbol_exprt contains=fresh_boolean("contains_substring"); // We add axioms: @@ -104,10 +106,10 @@ exprt string_constraint_generatort::add_axioms_for_index_of_string( implies_exprt a2( not_exprt(contains), - equal_exprt(offset, from_integer(-1, get_index_type()))); + equal_exprt(offset, from_integer(-1, index_type))); axioms.push_back(a2); - symbol_exprt qvar=fresh_univ_index("QA_index_of_string"); + symbol_exprt qvar=fresh_univ_index("QA_index_of_string", index_type); string_constraintt a3( qvar, substring.length(), @@ -123,7 +125,8 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of_string( const string_exprt &substring, const exprt &from_index) { - symbol_exprt offset=fresh_exist_index("index_of"); + const typet &index_type=str.length().type(); + symbol_exprt offset=fresh_exist_index("index_of", index_type); symbol_exprt contains=fresh_boolean("contains_substring"); // We add axioms: @@ -141,10 +144,10 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of_string( implies_exprt a2( not_exprt(contains), - equal_exprt(offset, from_integer(-1, get_index_type()))); + equal_exprt(offset, from_integer(-1, index_type))); axioms.push_back(a2); - symbol_exprt qvar=fresh_univ_index("QA_index_of_string"); + symbol_exprt qvar=fresh_univ_index("QA_index_of_string", index_type); equal_exprt constr3(str[plus_exprt(qvar, offset)], substring[qvar]); string_constraintt a3(qvar, substring.length(), contains, constr3); axioms.push_back(a3); @@ -170,13 +173,14 @@ exprt string_constraint_generatort::add_axioms_for_index_of( const function_application_exprt &f) { const function_application_exprt::argumentst &args=f.arguments(); - assert(f.type()==get_index_type()); string_exprt str=add_axioms_for_string_expr(args[0]); const exprt &c=args[1]; + const refined_string_typet &ref_type=to_refined_string_type(str.type()); + assert(f.type()==ref_type.get_index_type()); exprt from_index; if(args.size()==2) - from_index=from_integer(0, get_index_type()); + from_index=from_integer(0, ref_type.get_index_type()); else if(args.size()==3) from_index=args[2]; else @@ -189,13 +193,15 @@ exprt string_constraint_generatort::add_axioms_for_index_of( } else return add_axioms_for_index_of( - str, typecast_exprt(c, get_char_type()), from_index); + str, typecast_exprt(c, ref_type.get_char_type()), from_index); } exprt string_constraint_generatort::add_axioms_for_last_index_of( const string_exprt &str, const exprt &c, const exprt &from_index) { - symbol_exprt index=fresh_exist_index("last_index_of"); + const refined_string_typet &ref_type=to_refined_string_type(str.type()); + const typet &index_type=ref_type.get_index_type(); + symbol_exprt index=fresh_exist_index("last_index_of", index_type); symbol_exprt contains=fresh_boolean("contains_in_last_index_of"); // We add axioms: @@ -205,8 +211,8 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( // a4 : forall n. i+1 <= n < from_index +1 &&contains => s[n]!=c // a5 : forall m. 0 <= m < from_index +1 &&!contains => s[m]!=c - exprt index1=from_integer(1, get_index_type()); - exprt minus1=from_integer(-1, get_index_type()); + exprt index1=from_integer(1, index_type); + exprt minus1=from_integer(-1, index_type); exprt from_index_plus_one=plus_exprt(from_index, index1); and_exprt a1( binary_relation_exprt(index, ID_ge, minus1), @@ -223,7 +229,7 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( equal_exprt(str[index], c))); axioms.push_back(a3); - symbol_exprt n=fresh_univ_index("QA_last_index_of"); + symbol_exprt n=fresh_univ_index("QA_last_index_of", index_type); string_constraintt a4( n, plus_exprt(index, index1), @@ -232,7 +238,7 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( not_exprt(equal_exprt(str[n], c))); axioms.push_back(a4); - symbol_exprt m=fresh_univ_index("QA_last_index_of"); + symbol_exprt m=fresh_univ_index("QA_last_index_of", index_type); string_constraintt a5( m, from_index_plus_one, @@ -261,13 +267,14 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( const function_application_exprt &f) { const function_application_exprt::argumentst &args=f.arguments(); - assert(f.type()==get_index_type()); string_exprt str=add_axioms_for_string_expr(args[0]); exprt c=args[1]; + const refined_string_typet &ref_type=to_refined_string_type(str.type()); exprt from_index; + assert(f.type()==ref_type.get_index_type()); if(args.size()==2) - from_index=minus_exprt(str.length(), from_integer(1, get_index_type())); + from_index=minus_exprt(str.length(), from_integer(1, str.length().type())); else if(args.size()==3) from_index=args[2]; else @@ -280,6 +287,5 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( } else return add_axioms_for_last_index_of( - str, typecast_exprt(c, get_char_type()), from_index); + str, typecast_exprt(c, ref_type.get_char_type()), from_index); } - diff --git a/src/solvers/refinement/string_constraint_generator_insert.cpp b/src/solvers/refinement/string_constraint_generator_insert.cpp index 8073ab4cb1f..6a080a5dc6c 100644 --- a/src/solvers/refinement/string_constraint_generator_insert.cpp +++ b/src/solvers/refinement/string_constraint_generator_insert.cpp @@ -24,9 +24,9 @@ Function: string_constraint_generatort::add_axioms_for_insert string_exprt string_constraint_generatort::add_axioms_for_insert( const string_exprt &s1, const string_exprt &s2, const exprt &offset) { - assert(offset.type()==get_index_type()); + assert(offset.type()==s1.length().type()); string_exprt pref=add_axioms_for_substring( - s1, from_integer(0, get_index_type()), offset); + s1, from_integer(0, offset.type()), offset); string_exprt suf=add_axioms_for_substring(s1, offset, s1.length()); string_exprt concat1=add_axioms_for_concat(pref, s2); return add_axioms_for_concat(concat1, suf); @@ -69,8 +69,10 @@ Function: string_constraint_generatort::add_axioms_for_insert_int string_exprt string_constraint_generatort::add_axioms_for_insert_int( const function_application_exprt &f) { + const refined_string_typet &ref_type=to_refined_string_type(f.type()); string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); - string_exprt s2=add_axioms_from_int(args(f, 3)[2], MAX_INTEGER_LENGTH); + string_exprt s2=add_axioms_from_int( + args(f, 3)[2], MAX_INTEGER_LENGTH, ref_type); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -90,8 +92,9 @@ Function: string_constraint_generatort::add_axioms_for_insert_long string_exprt string_constraint_generatort::add_axioms_for_insert_long( const function_application_exprt &f) { + const refined_string_typet &ref_type=to_refined_string_type(f.type()); string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); - string_exprt s2=add_axioms_from_int(args(f, 3)[2], MAX_LONG_LENGTH); + string_exprt s2=add_axioms_from_int(args(f, 3)[2], MAX_LONG_LENGTH, ref_type); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -111,8 +114,9 @@ Function: string_constraint_generatort::add_axioms_for_insert_bool string_exprt string_constraint_generatort::add_axioms_for_insert_bool( const function_application_exprt &f) { + const refined_string_typet &ref_type=to_refined_string_type(f.type()); string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); - string_exprt s2=add_axioms_from_bool(args(f, 3)[2]); + string_exprt s2=add_axioms_from_bool(args(f, 3)[2], ref_type); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -133,7 +137,8 @@ string_exprt string_constraint_generatort::add_axioms_for_insert_char( const function_application_exprt &f) { string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); - string_exprt s2=add_axioms_from_char(args(f, 3)[2]); + const refined_string_typet &ref_type=to_refined_string_type(s1.type()); + string_exprt s2=add_axioms_from_char(args(f, 3)[2], ref_type); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -208,7 +213,7 @@ string_exprt string_constraint_generatort::add_axioms_for_insert_char_array( { assert(f.arguments().size()==4); count=f.arguments()[2]; - offset=from_integer(0, get_index_type()); + offset=from_integer(0, count.type()); } string_exprt str=add_axioms_for_string_expr(f.arguments()[0]); diff --git a/src/solvers/refinement/string_constraint_generator_main.cpp b/src/solvers/refinement/string_constraint_generator_main.cpp index e76ae3f1f3d..5a269e5ad3b 100644 --- a/src/solvers/refinement/string_constraint_generator_main.cpp +++ b/src/solvers/refinement/string_constraint_generator_main.cpp @@ -17,12 +17,15 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #include #include +unsigned string_constraint_generatort::next_symbol_id=1; + /*******************************************************************\ Function: string_constraint_generatort::constant_char - Inputs: integer representing a character, we do not use char type here - because java characters use more than 8 bits. + Inputs: integer representing a character, and a type for characters; + we do not use char type here because in some languages + (for instance java) characters use more than 8 bits. Outputs: constant expression corresponding to the character. @@ -30,29 +33,32 @@ Function: string_constraint_generatort::constant_char \*******************************************************************/ -constant_exprt string_constraint_generatort::constant_char(int i) const +constant_exprt string_constraint_generatort::constant_char( + int i, const typet &char_type) { - return from_integer(i, get_char_type()); + return from_integer(i, char_type); } /*******************************************************************\ -Function: string_constraint_generatort::get_char_type +Function: string_constraint_generator::fresh_symbol + + Inputs: a prefix and a type - Outputs: a type for characters + Outputs: a symbol of type tp whose name starts with + "string_refinement#" followed by prefix - Purpose: returns the type of characters that is adapted to the current mode + Purpose: generate a new symbol expression of the given type with some prefix \*******************************************************************/ -typet string_constraint_generatort::get_char_type() const +symbol_exprt string_constraint_generatort::fresh_symbol( + const irep_idt &prefix, const typet &type) { - if(mode==ID_C) - return char_type(); - else if(mode==ID_java) - return java_char_type(); - else - assert(false); // only C and java modes supported + std::ostringstream buf; + buf << "string_refinement#" << prefix << "#" << (next_symbol_id++); + irep_idt name(buf.str()); + return symbol_exprt(name, type); } /*******************************************************************\ @@ -69,9 +75,9 @@ Function: string_constraint_generatort::fresh_univ_index \*******************************************************************/ symbol_exprt string_constraint_generatort::fresh_univ_index( - const irep_idt &prefix) + const irep_idt &prefix, const typet &type) { - return string_exprt::fresh_symbol(prefix, get_index_type()); + return fresh_symbol(prefix, type); } /*******************************************************************\ @@ -87,9 +93,9 @@ Function: string_constraint_generatort::fresh_exist_index \*******************************************************************/ symbol_exprt string_constraint_generatort::fresh_exist_index( - const irep_idt &prefix) + const irep_idt &prefix, const typet &type) { - symbol_exprt s=string_exprt::fresh_symbol(prefix, get_index_type()); + symbol_exprt s=fresh_symbol(prefix, type); index_symbols.push_back(s); return s; } @@ -109,13 +115,35 @@ Function: string_constraint_generatort::fresh_boolean symbol_exprt string_constraint_generatort::fresh_boolean( const irep_idt &prefix) { - symbol_exprt b=string_exprt::fresh_symbol(prefix, bool_typet()); + symbol_exprt b=fresh_symbol(prefix, bool_typet()); boolean_symbols.push_back(b); return b; } /*******************************************************************\ +Function: string_constraint_generatort::fresh_string + + Inputs: a type for string + + Outputs: a string expression + + Purpose: construct a string expression whose length and content are new + variables + +\*******************************************************************/ + +string_exprt string_constraint_generatort::fresh_string( + const refined_string_typet &type) +{ + symbol_exprt length= + fresh_symbol("string_length", type.get_index_type()); + symbol_exprt content=fresh_symbol("string_content", type.get_content_type()); + return string_exprt(length, content, type); +} + +/*******************************************************************\ + Function: string_constraint_generatort::add_axioms_for_string_expr Inputs: an expression of type string @@ -137,7 +165,6 @@ string_exprt string_constraint_generatort::add_axioms_for_string_expr( { exprt res=add_axioms_for_function_application( to_function_application_expr(unrefined_string)); - assert(res.type()==refined_string_type); s=to_string_expr(res); } else if(unrefined_string.id()==ID_symbol) @@ -158,7 +185,6 @@ string_exprt string_constraint_generatort::add_axioms_for_string_expr( { exprt arg=to_typecast_expr(unrefined_string).op(); exprt res=add_axioms_for_string_expr(arg); - assert(res.type()==refined_string_typet(get_char_type())); s=to_string_expr(res); } else @@ -169,7 +195,7 @@ string_exprt string_constraint_generatort::add_axioms_for_string_expr( } axioms.push_back( - s.axiom_for_is_longer_than(from_integer(0, get_index_type()))); + s.axiom_for_is_longer_than(from_integer(0, s.length().type()))); return s; } @@ -188,22 +214,24 @@ Function: string_constraint_generatort::add_axioms_for_if string_exprt string_constraint_generatort::add_axioms_for_if( const if_exprt &expr) { - string_exprt res(get_char_type()); assert( refined_string_typet::is_unrefined_string_type(expr.true_case().type())); string_exprt t=add_axioms_for_string_expr(expr.true_case()); assert( refined_string_typet::is_unrefined_string_type(expr.false_case().type())); string_exprt f=add_axioms_for_string_expr(expr.false_case()); + const refined_string_typet &ref_type=to_refined_string_type(t.type()); + const typet &index_type=ref_type.get_index_type(); + string_exprt res=fresh_string(ref_type); axioms.push_back( implies_exprt(expr.cond(), res.axiom_for_has_same_length_as(t))); - symbol_exprt qvar=fresh_univ_index("QA_string_if_true"); + symbol_exprt qvar=fresh_univ_index("QA_string_if_true", index_type); equal_exprt qequal(res[qvar], t[qvar]); axioms.push_back(string_constraintt(qvar, t.length(), expr.cond(), qequal)); axioms.push_back( implies_exprt(not_exprt(expr.cond()), res.axiom_for_has_same_length_as(f))); - symbol_exprt qvar2=fresh_univ_index("QA_string_if_false"); + symbol_exprt qvar2=fresh_univ_index("QA_string_if_false", index_type); equal_exprt qequal2(res[qvar2], f[qvar2]); string_constraintt sc2(qvar2, f.length(), not_exprt(expr.cond()), qequal2); axioms.push_back(sc2); @@ -229,8 +257,9 @@ string_exprt string_constraint_generatort::find_or_add_string_of_symbol( const symbol_exprt &sym) { irep_idt id=sym.get_identifier(); - auto entry=symbol_to_string.insert( - std::make_pair(id, string_exprt(get_char_type()))); + const refined_string_typet &ref_type=to_refined_string_type(sym.type()); + string_exprt str=fresh_string(ref_type); + auto entry=symbol_to_string.insert(std::make_pair(id, str)); return entry.first->second; } @@ -376,11 +405,10 @@ exprt string_constraint_generatort::add_axioms_for_function_application( return add_axioms_for_delete_char_at(expr); else if(id==ID_cprover_string_replace_func) return add_axioms_for_replace(expr); - else if(id==ID_cprover_string_data_func) - return add_axioms_for_data(expr); else { - std::string msg("string_exprt::function_application: unknown symbol :"); + std::string msg( + "string_constraint_generator::function_application: unknown symbol :"); msg+=id2string(id); throw msg; } @@ -403,7 +431,8 @@ string_exprt string_constraint_generatort::add_axioms_for_copy( const function_application_exprt &f) { string_exprt s1=add_axioms_for_string_expr(args(f, 1)[0]); - string_exprt res(get_char_type()); + const refined_string_typet &ref_type=to_refined_string_type(s1.type()); + string_exprt res=fresh_string(ref_type); // We add axioms: // a1 : |res|=|s1| @@ -411,7 +440,7 @@ string_exprt string_constraint_generatort::add_axioms_for_copy( axioms.push_back(res.axiom_for_has_same_length_as(s1)); - symbol_exprt idx=fresh_univ_index("QA_index_copy"); + symbol_exprt idx=fresh_univ_index("QA_index_copy", ref_type.get_index_type()); string_constraintt a2(idx, s1.length(), equal_exprt(s1[idx], res[idx])); axioms.push_back(a2); return res; @@ -432,7 +461,8 @@ Function: string_constraint_generatort::add_axioms_for_java_char_array string_exprt string_constraint_generatort::add_axioms_for_java_char_array( const exprt &char_array) { - string_exprt res(get_char_type()); + string_exprt res=fresh_string( + refined_string_typet(java_int_type(), java_char_type())); exprt arr=to_address_of_expr(char_array).object(); exprt len=member_exprt(arr, "length", res.length().type()); exprt cont=member_exprt(arr, "data", res.content().type()); @@ -462,55 +492,6 @@ exprt string_constraint_generatort::add_axioms_for_length( /*******************************************************************\ -Function: string_constraint_generatort::add_axioms_for_data - - Inputs: function application with three arguments: one string, a java - Array object and the corresponding data field - - Outputs: an expression of type void - - Purpose: add axioms stating that the content of the string argument is - equal to the content of the array argument - -\*******************************************************************/ - -exprt string_constraint_generatort::add_axioms_for_data( - const function_application_exprt &f) -{ - string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); - const exprt &tab_data=args(f, 3)[1]; - const exprt &data=args(f, 3)[2]; - symbol_exprt qvar=fresh_univ_index("QA_string_data"); - - // translating data[qvar] to the correct expression - // which is (signed int)byte_extract_little_endian - // (data, (2l*qvar) + POINTER_OFFSET(byte_extract_little_endian - // (tab.data, 0l, unsigned short int *)), unsigned short int) - mult_exprt qvar2( - from_integer(2, java_long_type()), - typecast_exprt(qvar, java_long_type())); - byte_extract_exprt extract( - ID_byte_extract_little_endian, - tab_data, - from_integer(0, java_long_type()), - pointer_typet(java_char_type())); - plus_exprt arg2(qvar2, pointer_offset(extract)); - - byte_extract_exprt extract2( - ID_byte_extract_little_endian, data, arg2, java_char_type()); - exprt char_in_tab=typecast_exprt(extract2, get_char_type()); - - string_constraintt eq( - qvar, str.length(), equal_exprt(str[qvar], char_in_tab)); - axioms.push_back(eq); - - exprt void_expr; - void_expr.type()=void_typet(); - return void_expr; -} - -/*******************************************************************\ - Function: string_constraint_generatort::add_axioms_from_char_array Inputs: a length expression, an array expression, a offset index, and a @@ -530,13 +511,16 @@ string_exprt string_constraint_generatort::add_axioms_from_char_array( const exprt &offset, const exprt &count) { - string_exprt str(get_char_type()); + const typet &char_type=to_array_type(data.type()).subtype(); + const typet &index_type=length.type(); + refined_string_typet ref_type(index_type, char_type); + string_exprt str=fresh_string(ref_type); // We add axioms: // a1 : forall q < count. str[q] = data[q+offset] // a2 : |str| = count - symbol_exprt qvar=fresh_univ_index("QA_string_of_char_array"); + symbol_exprt qvar=fresh_univ_index("QA_string_of_char_array", index_type); exprt char_in_tab=data; assert(char_in_tab.id()==ID_index); char_in_tab.op1()=plus_exprt(qvar, offset); @@ -576,7 +560,7 @@ string_exprt string_constraint_generatort::add_axioms_from_char_array( { assert(f.arguments().size()==2); count=f.arguments()[0]; - offset=from_integer(0, get_index_type()); + offset=from_integer(0, count.type()); } const exprt &tab_length=f.arguments()[0]; const exprt &data=f.arguments()[1]; @@ -598,7 +582,7 @@ Function: string_constraint_generatort::add_axioms_for_is_positive_index exprt string_constraint_generatort::axiom_for_is_positive_index(const exprt &x) { return binary_relation_exprt( - x, ID_ge, from_integer(0, get_index_type())); + x, ID_ge, from_integer(0, x.type())); } /*******************************************************************\ @@ -630,7 +614,7 @@ exprt string_constraint_generatort::add_axioms_for_char_literal( const string_constantt s=to_string_constant(arg.op0().op0().op0()); irep_idt sval=s.get_value(); assert(sval.size()==1); - return from_integer(unsigned(sval[0]), get_char_type()); + return from_integer(unsigned(sval[0]), arg.type()); } else { @@ -655,7 +639,8 @@ exprt string_constraint_generatort::add_axioms_for_char_at( const function_application_exprt &f) { string_exprt str=add_axioms_for_string_expr(args(f, 2)[0]); - symbol_exprt char_sym=string_exprt::fresh_symbol("char", get_char_type()); + const refined_string_typet &ref_type=to_refined_string_type(str.type()); + symbol_exprt char_sym=fresh_symbol("char", ref_type.get_char_type()); axioms.push_back(equal_exprt(char_sym, str[args(f, 2)[1]])); return char_sym; } diff --git a/src/solvers/refinement/string_constraint_generator_testing.cpp b/src/solvers/refinement/string_constraint_generator_testing.cpp index 6df308c533e..8b1c7bca9d4 100644 --- a/src/solvers/refinement/string_constraint_generator_testing.cpp +++ b/src/solvers/refinement/string_constraint_generator_testing.cpp @@ -27,6 +27,7 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( const string_exprt &prefix, const string_exprt &str, const exprt &offset) { symbol_exprt isprefix=fresh_boolean("isprefix"); + const typet &index_type=str.length().type(); // We add axioms: // a1 : isprefix => |str| >= |prefix|+offset @@ -41,7 +42,7 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( str.axiom_for_is_longer_than(plus_exprt(prefix.length(), offset))); axioms.push_back(a1); - symbol_exprt qvar=fresh_univ_index("QA_isprefix"); + symbol_exprt qvar=fresh_univ_index("QA_isprefix", index_type); string_constraintt a2( qvar, prefix.length(), @@ -49,7 +50,7 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( equal_exprt(str[plus_exprt(qvar, offset)], prefix[qvar])); axioms.push_back(a2); - symbol_exprt witness=fresh_exist_index("witness_not_isprefix"); + symbol_exprt witness=fresh_exist_index("witness_not_isprefix", index_type); and_exprt witness_diff( axiom_for_is_positive_index(witness), and_exprt( @@ -91,7 +92,7 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( string_exprt s1=add_axioms_for_string_expr(args[swap_arguments?0:1]); exprt offset; if(args.size()==2) - offset=from_integer(0, get_index_type()); + offset=from_integer(0, s0.length().type()); else if(args.size()==3) offset=args[2]; return typecast_exprt(add_axioms_for_is_prefix(s0, s1, offset), f.type()); @@ -151,6 +152,7 @@ exprt string_constraint_generatort::add_axioms_for_is_suffix( typecast_exprt tc_issuffix(issuffix, f.type()); string_exprt s0=add_axioms_for_string_expr(args[swap_arguments?1:0]); string_exprt s1=add_axioms_for_string_expr(args[swap_arguments?0:1]); + const typet &index_type=s0.length().type(); // We add axioms: // a1 : issufix => s0.length >= s1.length @@ -164,14 +166,14 @@ exprt string_constraint_generatort::add_axioms_for_is_suffix( implies_exprt a1(issuffix, s1.axiom_for_is_longer_than(s0)); axioms.push_back(a1); - symbol_exprt qvar=fresh_univ_index("QA_suffix"); + symbol_exprt qvar=fresh_univ_index("QA_suffix", index_type); exprt qvar_shifted=plus_exprt( qvar, minus_exprt(s1.length(), s0.length())); string_constraintt a2( qvar, s0.length(), issuffix, equal_exprt(s0[qvar], s1[qvar_shifted])); axioms.push_back(a2); - symbol_exprt witness=fresh_exist_index("witness_not_suffix"); + symbol_exprt witness=fresh_exist_index("witness_not_suffix", index_type); exprt shifted=plus_exprt( witness, minus_exprt(s1.length(), s0.length())); or_exprt constr3( @@ -207,6 +209,7 @@ exprt string_constraint_generatort::add_axioms_for_contains( typecast_exprt tc_contains(contains, f.type()); string_exprt s0=add_axioms_for_string_expr(args(f, 2)[0]); string_exprt s1=add_axioms_for_string_expr(args(f, 2)[1]); + const typet &index_type=s0.type(); // We add axioms: // a1 : contains => s0.length >= s1.length @@ -219,14 +222,14 @@ exprt string_constraint_generatort::add_axioms_for_contains( implies_exprt a1(contains, s0.axiom_for_is_longer_than(s1)); axioms.push_back(a1); - symbol_exprt startpos=fresh_exist_index("startpos_contains"); + symbol_exprt startpos=fresh_exist_index("startpos_contains", index_type); minus_exprt length_diff(s0.length(), s1.length()); and_exprt a2( axiom_for_is_positive_index(startpos), binary_relation_exprt(startpos, ID_le, length_diff)); axioms.push_back(a2); - symbol_exprt qvar=fresh_univ_index("QA_contains"); + symbol_exprt qvar=fresh_univ_index("QA_contains", index_type); exprt qvar_shifted=plus_exprt(qvar, startpos); string_constraintt a3( qvar, s1.length(), contains, equal_exprt(s1[qvar], s0[qvar_shifted])); @@ -236,10 +239,10 @@ exprt string_constraint_generatort::add_axioms_for_contains( // forall startpos <= |s0|-|s1|. (!contains &&|s0| >= |s1| ) // ==> exists witness<|s1|. s1[witness]!=s0[startpos+witness] string_not_contains_constraintt a4( - from_integer(0, get_index_type()), - plus_exprt(from_integer(1, get_index_type()), length_diff), + from_integer(0, index_type), + plus_exprt(from_integer(1, index_type), length_diff), and_exprt(not_exprt(contains), s0.axiom_for_is_longer_than(s1)), - from_integer(0, get_index_type()), s1.length(), s0, s1); + from_integer(0, index_type), s1.length(), s0, s1); axioms.push_back(a4); return tc_contains; diff --git a/src/solvers/refinement/string_constraint_generator_transformation.cpp b/src/solvers/refinement/string_constraint_generator_transformation.cpp index c361c9d4e48..83735e0d5b5 100644 --- a/src/solvers/refinement/string_constraint_generator_transformation.cpp +++ b/src/solvers/refinement/string_constraint_generator_transformation.cpp @@ -31,7 +31,8 @@ string_exprt string_constraint_generatort::add_axioms_for_set_length( { string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); exprt k=args(f, 2)[1]; - string_exprt res(get_char_type()); + const refined_string_typet &ref_type=to_refined_string_type(s1.type()); + string_exprt res=fresh_string(ref_type); // We add axioms: // a1 : |res|=k @@ -39,7 +40,8 @@ string_exprt string_constraint_generatort::add_axioms_for_set_length( axioms.push_back(res.axiom_for_has_length(k)); - symbol_exprt idx=fresh_univ_index("QA_index_set_length"); + symbol_exprt idx=fresh_univ_index( + "QA_index_set_length", ref_type.get_index_type()); string_constraintt a2( idx, k, and_exprt( implies_exprt( @@ -47,7 +49,7 @@ string_exprt string_constraint_generatort::add_axioms_for_set_length( equal_exprt(s1[idx], res[idx])), implies_exprt( s1.axiom_for_is_shorter_than(idx), - equal_exprt(s1[idx], constant_char(0))))); + equal_exprt(s1[idx], constant_char(0, ref_type.get_char_type()))))); axioms.push_back(a2); return res; @@ -106,10 +108,12 @@ Function: string_constraint_generatort::add_axioms_for_substring string_exprt string_constraint_generatort::add_axioms_for_substring( const string_exprt &str, const exprt &start, const exprt &end) { - symbol_exprt idx=fresh_exist_index("index_substring"); - assert(start.type()==get_index_type()); - assert(end.type()==get_index_type()); - string_exprt res(get_char_type()); + const refined_string_typet &ref_type=to_refined_string_type(str.type()); + const typet &index_type=ref_type.get_index_type(); + symbol_exprt idx=fresh_exist_index("index_substring", index_type); + assert(start.type()==index_type); + assert(end.type()==index_type); + string_exprt res=fresh_string(ref_type); // We add axioms: // a1 : start < end => |res| = end - start @@ -122,8 +126,7 @@ string_exprt string_constraint_generatort::add_axioms_for_substring( res.axiom_for_has_length(minus_exprt(end, start))); axioms.push_back(a1); - exprt is_empty=res.axiom_for_has_length( - from_integer(0, get_index_type())); + exprt is_empty=res.axiom_for_has_length(from_integer(0, index_type)); implies_exprt a2(binary_relation_exprt(start, ID_ge, end), is_empty); axioms.push_back(a2); @@ -152,9 +155,11 @@ string_exprt string_constraint_generatort::add_axioms_for_trim( const function_application_exprt &expr) { string_exprt str=add_axioms_for_string_expr(args(expr, 1)[0]); - string_exprt res(get_char_type()); - symbol_exprt idx=fresh_exist_index("index_trim"); - exprt space_char=constant_char(' '); + const refined_string_typet &ref_type=to_refined_string_type(str.type()); + const typet &index_type=ref_type.get_index_type(); + string_exprt res=fresh_string(ref_type); + symbol_exprt idx=fresh_exist_index("index_trim", index_type); + exprt space_char=constant_char(' ', ref_type.get_char_type()); // We add axioms: // a1 : m + |s1| <= |str| @@ -171,25 +176,25 @@ string_exprt string_constraint_generatort::add_axioms_for_trim( exprt a1=str.axiom_for_is_longer_than(plus_exprt(idx, res.length())); axioms.push_back(a1); - binary_relation_exprt a2(idx, ID_ge, from_integer(0, get_index_type())); + binary_relation_exprt a2(idx, ID_ge, from_integer(0, index_type)); axioms.push_back(a2); exprt a3=str.axiom_for_is_longer_than(idx); axioms.push_back(a3); exprt a4=res.axiom_for_is_longer_than( - from_integer(0, get_index_type())); + from_integer(0, index_type)); axioms.push_back(a4); exprt a5=res.axiom_for_is_shorter_than(str); axioms.push_back(a5); - symbol_exprt n=fresh_univ_index("QA_index_trim"); + symbol_exprt n=fresh_univ_index("QA_index_trim", index_type); binary_relation_exprt non_print(str[n], ID_le, space_char); string_constraintt a6(n, idx, non_print); axioms.push_back(a6); - symbol_exprt n2=fresh_univ_index("QA_index_trim2"); + symbol_exprt n2=fresh_univ_index("QA_index_trim2", index_type); minus_exprt bound(str.length(), plus_exprt(idx, res.length())); binary_relation_exprt eqn2( str[plus_exprt(idx, plus_exprt(res.length(), n2))], @@ -199,13 +204,13 @@ string_exprt string_constraint_generatort::add_axioms_for_trim( string_constraintt a7(n2, bound, eqn2); axioms.push_back(a7); - symbol_exprt n3=fresh_univ_index("QA_index_trim3"); + symbol_exprt n3=fresh_univ_index("QA_index_trim3", index_type); equal_exprt eqn3(res[n3], str[plus_exprt(n3, idx)]); string_constraintt a8(n3, res.length(), eqn3); axioms.push_back(a8); minus_exprt index_before( - plus_exprt(idx, res.length()), from_integer(1, get_index_type())); + plus_exprt(idx, res.length()), from_integer(1, index_type)); binary_relation_exprt no_space_before(str[index_before], ID_gt, space_char); or_exprt a9( equal_exprt(idx, str.length()), @@ -232,11 +237,14 @@ string_exprt string_constraint_generatort::add_axioms_for_to_lower_case( const function_application_exprt &expr) { string_exprt str=add_axioms_for_string_expr(args(expr, 1)[0]); - string_exprt res(get_char_type()); - exprt char_a=constant_char('a'); - exprt char_A=constant_char('A'); - exprt char_z=constant_char('z'); - exprt char_Z=constant_char('Z'); + const refined_string_typet &ref_type=to_refined_string_type(str.type()); + const typet &char_type=ref_type.get_char_type(); + const typet &index_type=ref_type.get_index_type(); + string_exprt res=fresh_string(ref_type); + exprt char_a=constant_char('a', char_type); + exprt char_A=constant_char('A', char_type); + exprt char_z=constant_char('z', char_type); + exprt char_Z=constant_char('Z', char_type); // TODO: add support for locales using case mapping information // from the UnicodeData file. @@ -251,7 +259,7 @@ string_exprt string_constraint_generatort::add_axioms_for_to_lower_case( exprt a1=res.axiom_for_has_same_length_as(str); axioms.push_back(a1); - symbol_exprt idx=fresh_univ_index("QA_lower_case"); + symbol_exprt idx=fresh_univ_index("QA_lower_case", index_type); exprt is_upper_case=and_exprt( binary_relation_exprt(char_A, ID_le, str[idx]), binary_relation_exprt(str[idx], ID_le, char_Z)); @@ -282,11 +290,14 @@ string_exprt string_constraint_generatort::add_axioms_for_to_upper_case( const function_application_exprt &expr) { string_exprt str=add_axioms_for_string_expr(args(expr, 1)[0]); - string_exprt res(get_char_type()); - exprt char_a=constant_char('a'); - exprt char_A=constant_char('A'); - exprt char_z=constant_char('z'); - exprt char_Z=constant_char('Z'); + const refined_string_typet &ref_type=to_refined_string_type(str.type()); + const typet &char_type=ref_type.get_char_type(); + const typet &index_type=ref_type.get_index_type(); + string_exprt res=fresh_string(ref_type); + exprt char_a=constant_char('a', char_type); + exprt char_A=constant_char('A', char_type); + exprt char_z=constant_char('z', char_type); + exprt char_Z=constant_char('Z', char_type); // TODO: add support for locales using case mapping information // from the UnicodeData file. @@ -299,7 +310,7 @@ string_exprt string_constraint_generatort::add_axioms_for_to_upper_case( exprt a1=res.axiom_for_has_same_length_as(str); axioms.push_back(a1); - symbol_exprt idx=fresh_univ_index("QA_upper_case"); + symbol_exprt idx=fresh_univ_index("QA_upper_case", index_type); exprt is_lower_case=and_exprt( binary_relation_exprt(char_a, ID_le, str[idx]), binary_relation_exprt(str[idx], ID_le, char_z)); @@ -334,8 +345,9 @@ Function: string_constraint_generatort::add_axioms_for_char_set string_exprt string_constraint_generatort::add_axioms_for_char_set( const function_application_exprt &f) { - string_exprt res(get_char_type()); string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + const refined_string_typet &ref_type=to_refined_string_type(str.type()); + string_exprt res=fresh_string(ref_type); with_exprt sarrnew(str.content(), args(f, 3)[1], args(f, 3)[2]); // We add axiom: @@ -367,9 +379,10 @@ string_exprt string_constraint_generatort::add_axioms_for_replace( const function_application_exprt &f) { string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + const refined_string_typet &ref_type=to_refined_string_type(str.type()); const exprt &old_char=args(f, 3)[1]; const exprt &new_char=args(f, 3)[2]; - string_exprt res(get_char_type()); + string_exprt res=fresh_string(ref_type); // We add axioms: // a1 : |res| = |str| @@ -379,7 +392,7 @@ string_exprt string_constraint_generatort::add_axioms_for_replace( axioms.push_back(res.axiom_for_has_same_length_as(str)); - symbol_exprt qvar=fresh_univ_index("QA_replace"); + symbol_exprt qvar=fresh_univ_index("QA_replace", ref_type.get_index_type()); implies_exprt case1( equal_exprt(str[qvar], old_char), equal_exprt(res[qvar], new_char)); @@ -409,7 +422,7 @@ string_exprt string_constraint_generatort::add_axioms_for_delete_char_at( const function_application_exprt &f) { string_exprt str=add_axioms_for_string_expr(args(f, 2)[0]); - exprt index_one=from_integer(1, get_index_type()); + exprt index_one=from_integer(1, str.length().type()); return add_axioms_for_delete( str, args(f, 2)[1], plus_exprt(args(f, 2)[1], index_one)); } @@ -431,10 +444,10 @@ Function: string_constraint_generatort::add_axioms_for_delete string_exprt string_constraint_generatort::add_axioms_for_delete( const string_exprt &str, const exprt &start, const exprt &end) { - assert(start.type()==get_index_type()); - assert(end.type()==get_index_type()); + assert(start.type()==str.length().type()); + assert(end.type()==str.length().type()); string_exprt str1=add_axioms_for_substring( - str, from_integer(0, get_index_type()), start); + str, from_integer(0, str.length().type()), start); string_exprt str2=add_axioms_for_substring(str, end, str.length()); return add_axioms_for_concat(str1, str2); } diff --git a/src/solvers/refinement/string_constraint_generator_valueof.cpp b/src/solvers/refinement/string_constraint_generator_valueof.cpp index 3712b297386..d1030e5e085 100644 --- a/src/solvers/refinement/string_constraint_generator_valueof.cpp +++ b/src/solvers/refinement/string_constraint_generator_valueof.cpp @@ -25,7 +25,8 @@ Function: string_constraint_generatort::add_axioms_from_int string_exprt string_constraint_generatort::add_axioms_from_int( const function_application_exprt &expr) { - return add_axioms_from_int(args(expr, 1)[0], MAX_INTEGER_LENGTH); + const refined_string_typet &ref_type=to_refined_string_type(expr.type()); + return add_axioms_from_int(args(expr, 1)[0], MAX_INTEGER_LENGTH, ref_type); } /*******************************************************************\ @@ -43,7 +44,8 @@ Function: string_constraint_generatort::add_axioms_from_long string_exprt string_constraint_generatort::add_axioms_from_long( const function_application_exprt &expr) { - return add_axioms_from_int(args(expr, 1)[0], MAX_LONG_LENGTH); + const refined_string_typet &ref_type=to_refined_string_type(expr.type()); + return add_axioms_from_int(args(expr, 1)[0], MAX_LONG_LENGTH, ref_type); } /*******************************************************************\ @@ -99,14 +101,16 @@ Function: string_constraint_generatort::add_axioms_from_float string_exprt string_constraint_generatort::add_axioms_from_float( const exprt &f, bool double_precision) { - typet char_type=get_char_type(); - string_exprt res(char_type); - const exprt &index24=from_integer(24, get_index_type()); + const refined_string_typet &ref_type=to_refined_string_type(f.type()); + const typet &index_type=ref_type.get_index_type(); + const typet &char_type=ref_type.get_char_type(); + string_exprt res=fresh_string(ref_type); + const exprt &index24=from_integer(24, ref_type.get_index_type()); axioms.push_back(res.axiom_for_is_shorter_than(index24)); - string_exprt magnitude(char_type); - string_exprt sign_string(char_type); - string_exprt nan_string=add_axioms_for_constant("NaN"); + string_exprt magnitude=fresh_string(ref_type); + string_exprt sign_string=fresh_string(ref_type); + string_exprt nan_string=add_axioms_for_constant("NaN", ref_type); // We add the axioms: // a1 : If the argument is NaN, the result length is that of "NaN". @@ -127,7 +131,7 @@ string_exprt string_constraint_generatort::add_axioms_from_float( implies_exprt a1(isnan, magnitude.axiom_for_has_same_length_as(nan_string)); axioms.push_back(a1); - symbol_exprt qvar=fresh_univ_index("QA_equal_nan"); + symbol_exprt qvar=fresh_univ_index("QA_equal_nan", index_type); string_constraintt a2( qvar, nan_string.length(), @@ -150,20 +154,21 @@ string_exprt string_constraint_generatort::add_axioms_from_float( implies_exprt a4(not_exprt(isneg), sign_string.axiom_for_has_length(0)); axioms.push_back(a4); - implies_exprt a5(isneg, equal_exprt(sign_string[0], constant_char('-'))); + implies_exprt a5( + isneg, equal_exprt(sign_string[0], constant_char('-', char_type))); axioms.push_back(a5); // If m is infinity, it is represented by the characters "Infinity"; // thus, positive infinity produces the result "Infinity" and negative // infinity produces the result "-Infinity". - string_exprt infinity_string=add_axioms_for_constant("Infinity"); + string_exprt infinity_string=add_axioms_for_constant("Infinity", ref_type); exprt isinf=float_bvt().isinf(f, fspec); implies_exprt a6( isinf, magnitude.axiom_for_has_same_length_as(infinity_string)); axioms.push_back(a6); - symbol_exprt qvar_inf=fresh_univ_index("QA_equal_infinity"); + symbol_exprt qvar_inf=fresh_univ_index("QA_equal_infinity", index_type); equal_exprt meq(magnitude[qvar_inf], infinity_string[qvar_inf]); string_constraintt a7(qvar_inf, infinity_string.length(), isinf, meq); axioms.push_back(a7); @@ -171,12 +176,12 @@ string_exprt string_constraint_generatort::add_axioms_from_float( // If m is zero, it is represented by the characters "0.0"; thus, negative // zero produces the result "-0.0" and positive zero produces "0.0". - string_exprt zero_string=add_axioms_for_constant("0.0"); + string_exprt zero_string=add_axioms_for_constant("0.0", ref_type); exprt iszero=float_bvt().is_zero(f, fspec); implies_exprt a8(iszero, magnitude.axiom_for_has_same_length_as(zero_string)); axioms.push_back(a8); - symbol_exprt qvar_zero=fresh_univ_index("QA_equal_zero"); + symbol_exprt qvar_zero=fresh_univ_index("QA_equal_zero", index_type); equal_exprt eq_zero(magnitude[qvar_zero], zero_string[qvar_zero]); string_constraintt a9(qvar_zero, zero_string.length(), iszero, eq_zero); axioms.push_back(a9); @@ -200,7 +205,8 @@ Function: string_constraint_generatort::add_axioms_from_bool string_exprt string_constraint_generatort::add_axioms_from_bool( const function_application_exprt &f) { - return add_axioms_from_bool(args(f, 1)[0]); + const refined_string_typet &ref_type=to_refined_string_type(f.type()); + return add_axioms_from_bool(args(f, 1)[0], ref_type); } @@ -218,17 +224,17 @@ Function: string_constraint_generatort::add_axioms_from_bool \*******************************************************************/ string_exprt string_constraint_generatort::add_axioms_from_bool( - const exprt &b) + const exprt &b, const refined_string_typet &ref_type) { - typet char_type=get_char_type(); - string_exprt res(char_type); + string_exprt res=fresh_string(ref_type); + const typet &index_type=ref_type.get_index_type(); assert(b.type()==bool_typet() || b.type().id()==ID_c_bool); typecast_exprt eq(b, bool_typet()); - string_exprt true_string=add_axioms_for_constant("true"); - string_exprt false_string=add_axioms_for_constant("false"); + string_exprt true_string=add_axioms_for_constant("true", ref_type); + string_exprt false_string=add_axioms_for_constant("false", ref_type); // We add axioms: // a1 : eq => res = |"true"| @@ -238,7 +244,7 @@ string_exprt string_constraint_generatort::add_axioms_from_bool( implies_exprt a1(eq, res.axiom_for_has_same_length_as(true_string)); axioms.push_back(a1); - symbol_exprt qvar=fresh_univ_index("QA_equal_true"); + symbol_exprt qvar=fresh_univ_index("QA_equal_true", index_type); string_constraintt a2( qvar, true_string.length(), @@ -250,7 +256,7 @@ string_exprt string_constraint_generatort::add_axioms_from_bool( not_exprt(eq), res.axiom_for_has_same_length_as(false_string)); axioms.push_back(a3); - symbol_exprt qvar1=fresh_univ_index("QA_equal_false"); + symbol_exprt qvar1=fresh_univ_index("QA_equal_false", index_type); string_constraintt a4( qvar, false_string.length(), @@ -294,17 +300,19 @@ Function: string_constraint_generatort::add_axioms_from_int \*******************************************************************/ string_exprt string_constraint_generatort::add_axioms_from_int( - const exprt &i, size_t max_size) + const exprt &i, size_t max_size, const refined_string_typet &ref_type) { - string_exprt res(get_char_type()); + string_exprt res=fresh_string(ref_type); const typet &type=i.type(); assert(type.id()==ID_signedbv); exprt ten=from_integer(10, type); - exprt zero_char=constant_char('0'); - exprt nine_char=constant_char('9'); - exprt minus_char=constant_char('-'); - exprt zero=from_integer(0, get_index_type()); - exprt max=from_integer(max_size, get_index_type()); + const typet &char_type=ref_type.get_char_type(); + const typet &index_type=ref_type.get_index_type(); + exprt zero_char=constant_char('0', char_type); + exprt nine_char=constant_char('9', char_type); + exprt minus_char=constant_char('-', char_type); + exprt zero=from_integer(0, index_type); + exprt max=from_integer(max_size, index_type); // We add axioms: // a1 : 0 <|res|<=max_size @@ -371,7 +379,7 @@ string_exprt string_constraint_generatort::add_axioms_from_int( not_exprt(r0_zero)); axioms.push_back(a5); - exprt one=from_integer(1, get_index_type()); + exprt one=from_integer(1, index_type); equal_exprt r1_zero(res[one], zero_char); implies_exprt a6( and_exprt(premise, starts_with_minus), @@ -404,14 +412,14 @@ Function: string_constraint_generatort::int_of_hex_char \*******************************************************************/ -exprt string_constraint_generatort::int_of_hex_char(exprt chr) const +exprt string_constraint_generatort::int_of_hex_char(const exprt &chr) const { - exprt zero_char=constant_char('0'); - exprt nine_char=constant_char('9'); - exprt a_char=constant_char('a'); + exprt zero_char=constant_char('0', chr.type()); + exprt nine_char=constant_char('9', chr.type()); + exprt a_char=constant_char('a', chr.type()); return if_exprt( binary_relation_exprt(chr, ID_gt, nine_char), - plus_exprt(constant_char(10), minus_exprt(chr, a_char)), + plus_exprt(constant_char(10, chr.type()), minus_exprt(chr, a_char)), minus_exprt(chr, zero_char)); } @@ -429,17 +437,19 @@ Function: string_constraint_generatort::add_axioms_from_int_hex \*******************************************************************/ string_exprt string_constraint_generatort::add_axioms_from_int_hex( - const exprt &i) + const exprt &i, const refined_string_typet &ref_type) { - string_exprt res(get_char_type()); + string_exprt res=fresh_string(ref_type); const typet &type=i.type(); assert(type.id()==ID_signedbv); - exprt sixteen=from_integer(16, type); - exprt minus_char=constant_char('-'); - exprt zero_char=constant_char('0'); - exprt nine_char=constant_char('9'); - exprt a_char=constant_char('a'); - exprt f_char=constant_char('f'); + const typet &index_type=ref_type.get_index_type(); + const typet &char_type=ref_type.get_char_type(); + exprt sixteen=from_integer(16, index_type); + exprt minus_char=constant_char('-', char_type); + exprt zero_char=constant_char('0', char_type); + exprt nine_char=constant_char('9', char_type); + exprt a_char=constant_char('a', char_type); + exprt f_char=constant_char('f', char_type); size_t max_size=8; axioms.push_back( @@ -494,7 +504,8 @@ Function: string_constraint_generatort::add_axioms_for_int_hex string_exprt string_constraint_generatort::add_axioms_from_int_hex( const function_application_exprt &f) { - return add_axioms_from_int_hex(args(f, 1)[0]); + const refined_string_typet &ref_type=to_refined_string_type(f.type()); + return add_axioms_from_int_hex(args(f, 1)[0], ref_type); } /*******************************************************************\ @@ -512,7 +523,8 @@ Function: string_constraint_generatort::add_axioms_from_char string_exprt string_constraint_generatort::add_axioms_from_char( const function_application_exprt &f) { - return add_axioms_from_char(args(f, 1)[0]); + const refined_string_typet &ref_type=to_refined_string_type(f.type()); + return add_axioms_from_char(args(f, 1)[0], ref_type); } /*******************************************************************\ @@ -523,14 +535,15 @@ Function: string_constraint_generatort::add_axioms_from_char Outputs: a new string expression - Purpose: add axioms stating that the returned string as length 1 and + Purpose: add axioms stating that the returned string has length 1 and the character it contains correspond to the input expression \*******************************************************************/ -string_exprt string_constraint_generatort::add_axioms_from_char(const exprt &c) +string_exprt string_constraint_generatort::add_axioms_from_char( + const exprt &c, const refined_string_typet &ref_type) { - string_exprt res(get_char_type()); + string_exprt res=fresh_string(ref_type); and_exprt lemma(equal_exprt(res[0], c), res.axiom_for_has_length(1)); axioms.push_back(lemma); return res; @@ -555,7 +568,8 @@ string_exprt string_constraint_generatort::add_axioms_for_value_of( const function_application_exprt::argumentst &args=f.arguments(); if(args.size()==3) { - string_exprt res(get_char_type()); + const refined_string_typet &ref_type=to_refined_string_type(f.type()); + string_exprt res=fresh_string(ref_type); exprt char_array=args[0]; exprt offset=args[1]; exprt count=args[2]; @@ -567,7 +581,8 @@ string_exprt string_constraint_generatort::add_axioms_for_value_of( string_exprt str=add_axioms_for_java_char_array(char_array); axioms.push_back(res.axiom_for_has_length(count)); - symbol_exprt idx=fresh_univ_index("QA_index_value_of"); + symbol_exprt idx=fresh_univ_index( + "QA_index_value_of", ref_type.get_index_type()); equal_exprt eq(str[plus_exprt(idx, offset)], res[idx]); string_constraintt a2(idx, count, eq); axioms.push_back(a2); @@ -596,12 +611,13 @@ exprt string_constraint_generatort::add_axioms_for_parse_int( const function_application_exprt &f) { string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); - typet type=f.type(); - symbol_exprt i=string_exprt::fresh_symbol("parsed_int", type); - - exprt zero_char=constant_char('0'); - exprt minus_char=constant_char('-'); - exprt plus_char=constant_char('+'); + const typet &type=f.type(); + symbol_exprt i=fresh_symbol("parsed_int", type); + const refined_string_typet &ref_type=to_refined_string_type(str.type()); + const typet &char_type=ref_type.get_char_type(); + exprt zero_char=constant_char('0', char_type); + exprt minus_char=constant_char('-', char_type); + exprt plus_char=constant_char('+', char_type); assert(type.id()==ID_signedbv); exprt ten=from_integer(10, type); diff --git a/src/solvers/refinement/string_expr.cpp b/src/solvers/refinement/string_expr.cpp deleted file mode 100644 index 0c284621d8d..00000000000 --- a/src/solvers/refinement/string_expr.cpp +++ /dev/null @@ -1,55 +0,0 @@ -/*******************************************************************\ - -Module: String expressions for the string solver - -Author: Romain Brenguier, romain.brenguier@diffblue.com - -\*******************************************************************/ - -#include - -unsigned string_exprt::next_symbol_id=1; - -/*******************************************************************\ - -Function: string_exprt::fresh_symbol - - Inputs: a prefix and a type - - Outputs: a symbol of type tp whose name starts with - "string_refinement#" followed by prefix - - Purpose: generate a new symbol expression of the given type with some prefix - -\*******************************************************************/ - -symbol_exprt string_exprt::fresh_symbol( - const irep_idt &prefix, const typet &type) -{ - std::ostringstream buf; - buf << "string_refinement#" << prefix << "#" << (next_symbol_id++); - irep_idt name(buf.str()); - return symbol_exprt(name, type); -} - -/*******************************************************************\ - -Constructor: string_exprt - - Inputs: a type for characters - - Purpose: construct a string expression whose length and content are new - variables - -\*******************************************************************/ - -string_exprt::string_exprt(typet char_type): - struct_exprt(refined_string_typet(char_type)) -{ - refined_string_typet t(char_type); - symbol_exprt length= - fresh_symbol("string_length", refined_string_typet::index_type()); - symbol_exprt content=fresh_symbol("string_content", t.get_content_type()); - move_to_operands(length, content); -} - diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index cb406cbacb2..16b89bc43c8 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -10,11 +10,11 @@ Author: Alberto Griggio, alberto.griggio@gmail.com \*******************************************************************/ +#include #include #include #include #include -#include #include #include @@ -254,7 +254,7 @@ Function: string_refinementt::dec_solve Outputs: result of the decision procedure - Purpose: use a refinement loop to instantiate universal axioms, + Purpose: use a refinement loop to instantiate universal axioms, call the sat solver, and instantiate more indexes if needed. \*******************************************************************/ @@ -271,11 +271,11 @@ decision_proceduret::resultt string_refinementt::dec_solve() { string_not_contains_constraintt nc_axiom= to_string_not_contains_constraint(axiom); - array_typet witness_type - (refined_string_typet::index_type(), - infinity_exprt(refined_string_typet::index_type())); + refined_string_typet rtype=to_refined_string_type(nc_axiom.s0().type()); + const typet &index_type=rtype.get_index_type(); + array_typet witness_type(index_type, infinity_exprt(index_type)); generator.witness[nc_axiom]= - string_exprt::fresh_symbol("not_contains_witness", witness_type); + generator.fresh_symbol("not_contains_witness", witness_type); not_contains_axioms.push_back(nc_axiom); } else @@ -464,23 +464,20 @@ Function: string_refinementt::get_array exprt string_refinementt::get_array(const exprt &arr, const exprt &size) { exprt val=get(arr); - typet chart; - if(arr.type().subtype()==generator.get_char_type()) - chart=generator.get_char_type(); - else - assert(false); + typet char_type=arr.type().subtype(); + typet index_type=size.type(); if(val.id()=="array-list") { - array_typet ret_type(chart, infinity_exprt(generator.get_index_type())); - exprt ret=array_of_exprt(generator.constant_char(0), ret_type); + array_typet ret_type(char_type, infinity_exprt(index_type)); + exprt ret=array_of_exprt(from_integer(0, char_type), ret_type); for(size_t i=0; i &m, bool negated) const { - exprt sum=from_integer(0, generator.get_index_type()); + exprt sum=nil_exprt(); mp_integer constants=0; + typet index_type; + if(m.empty()) + return nil_exprt(); + else + index_type=m.begin()->first.type(); for(const auto &term : m) { @@ -705,14 +707,14 @@ exprt string_refinementt::sum_over_map( switch(second) { case -1: - if(sum==from_integer(0, generator.get_index_type())) + if(sum.is_nil()) sum=unary_minus_exprt(t); else sum=minus_exprt(sum, t); break; case 1: - if(sum==from_integer(0, generator.get_index_type())) + if(sum.is_nil()) sum=t; else sum=plus_exprt(sum, t); @@ -733,8 +735,11 @@ exprt string_refinementt::sum_over_map( } } - exprt index_const=from_integer(constants, generator.get_index_type()); - return plus_exprt(sum, index_const); + exprt index_const=from_integer(constants, index_type); + if(sum.is_not_nil()) + return plus_exprt(sum, index_const); + else + return index_const; } /*******************************************************************\ @@ -910,7 +915,7 @@ void string_refinementt::initial_index_set(const string_constraintt &axiom) exprt e(i); minus_exprt kminus1( axiom.upper_bound(), - from_integer(1, generator.get_index_type())); + from_integer(1, axiom.upper_bound().type())); replace_expr(qvar, kminus1, e); current_index_set[s].insert(e); index_set[s].insert(e); @@ -1041,7 +1046,7 @@ exprt string_refinementt::instantiate( exprt bounds=and_exprt( axiom.univ_within_bounds(), binary_relation_exprt( - from_integer(0, generator.get_index_type()), ID_le, val)); + from_integer(0, val.type()), ID_le, val)); replace_expr(axiom.univ_var(), r, bounds); return implies_exprt(bounds, instance); } @@ -1077,7 +1082,7 @@ void string_refinementt::instantiate_not_contains( new_lemmas.push_back(lemma); // we put bounds on the witnesses: // 0 <= v <= |s0| - |s1| ==> 0 <= v+w[v] < |s0| && 0 <= w[v] < |s1| - exprt zero=from_integer(0, generator.get_index_type()); + exprt zero=from_integer(0, val.type()); binary_relation_exprt c1(zero, ID_le, plus_exprt(val, witness)); binary_relation_exprt c2 (to_string_expr(s0).length(), ID_gt, plus_exprt(val, witness)); diff --git a/src/solvers/refinement/string_refinement.h b/src/solvers/refinement/string_refinement.h index aac4eed6dab..be0cd332bc3 100644 --- a/src/solvers/refinement/string_refinement.h +++ b/src/solvers/refinement/string_refinement.h @@ -13,11 +13,8 @@ Author: Alberto Griggio, alberto.griggio@gmail.com #ifndef CPROVER_SOLVERS_REFINEMENT_STRING_REFINEMENT_H #define CPROVER_SOLVERS_REFINEMENT_STRING_REFINEMENT_H -#include - -#include +#include #include -#include #include // Defines a limit on the string witnesses we will output. diff --git a/src/solvers/refinement/string_expr.h b/src/util/string_expr.h similarity index 71% rename from src/solvers/refinement/string_expr.h rename to src/util/string_expr.h index 29d6b12eae7..317e3c1ee6e 100644 --- a/src/solvers/refinement/string_expr.h +++ b/src/util/string_expr.h @@ -6,29 +6,27 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com \*******************************************************************/ -#ifndef CPROVER_SOLVERS_REFINEMENT_STRING_EXPR_H -#define CPROVER_SOLVERS_REFINEMENT_STRING_EXPR_H +#ifndef CPROVER_UTIL_STRING_EXPR_H +#define CPROVER_UTIL_STRING_EXPR_H -#include +#include +#include -#include -#include -#include - - -// Expressions that encode strings class string_exprt: public struct_exprt { public: - // Initialize string from the type of characters - explicit string_exprt(typet char_type); + string_exprt(): struct_exprt() {} - // Default uses C character type - string_exprt() : string_exprt(char_type()) {} + explicit string_exprt(typet type): struct_exprt(type) + { + operands().resize(2); + } - // Generate a new symbol of the given type with a prefix - static symbol_exprt fresh_symbol( - const irep_idt &prefix, const typet &type=bool_typet()); + string_exprt(const exprt &_length, const exprt &_content, typet type): + struct_exprt(type) + { + copy_to_operands(_length, _content); + } // Expression corresponding to the length of the string const exprt &length() const { return op0(); } @@ -38,12 +36,6 @@ class string_exprt: public struct_exprt const exprt &content() const { return op1(); } exprt &content() { return op1(); } - // Type of the expression as a refined string type - const refined_string_typet &refined_type() const - { - return to_refined_string_type(type()); - } - static exprt within_bounds(const exprt &idx, const exprt &bound); // Expression of the character at position idx in the string @@ -54,7 +46,7 @@ class string_exprt: public struct_exprt index_exprt operator[] (int i) const { - return index_exprt(content(), refined_type().index_of_int(i)); + return index_exprt(content(), from_integer(i, length().type())); } // Comparison on the length of the strings @@ -84,7 +76,7 @@ class string_exprt: public struct_exprt binary_relation_exprt axiom_for_is_strictly_longer_than(int i) const { - return axiom_for_is_strictly_longer_than(refined_type().index_of_int(i)); + return axiom_for_is_strictly_longer_than(from_integer(i, length().type())); } binary_relation_exprt axiom_for_is_shorter_than( @@ -101,7 +93,7 @@ class string_exprt: public struct_exprt binary_relation_exprt axiom_for_is_shorter_than(int i) const { - return axiom_for_is_shorter_than(refined_type().index_of_int(i)); + return axiom_for_is_shorter_than(from_integer(i, length().type())); } binary_relation_exprt axiom_for_is_strictly_shorter_than( @@ -129,22 +121,24 @@ class string_exprt: public struct_exprt equal_exprt axiom_for_has_length(int i) const { - return axiom_for_has_length(refined_type().index_of_int(i)); + return axiom_for_has_length(from_integer(i, length().type())); } - static irep_idt extract_java_string(const symbol_exprt &s); - - static unsigned next_symbol_id; - friend inline string_exprt &to_string_expr(exprt &expr); }; - inline string_exprt &to_string_expr(exprt &expr) { assert(expr.id()==ID_struct); + assert(expr.operands().size()==2); return static_cast(expr); } +inline const string_exprt &to_string_expr(const exprt &expr) +{ + assert(expr.id()==ID_struct); + assert(expr.operands().size()==2); + return static_cast(expr); +} #endif From e33caadeb919f0e26062e5c334b2627c504afa08 Mon Sep 17 00:00:00 2001 From: Norbert Manthey Date: Tue, 28 Feb 2017 23:17:12 +0100 Subject: [PATCH 24/27] Remove aa-symex from DIRS variable in Makefile Without this patch, calling "make clean" fails, because make tries to call "make clean" in the aa-symex directory as well. By removing aa-symex from the DIRS variable, this problem is fixed. Fixes: cleanup aa-path-symex and aa-symex --- src/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Makefile b/src/Makefile index 3a452b7fe09..269ddeda140 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,7 +1,7 @@ DIRS = ansi-c big-int cbmc cpp goto-cc goto-instrument goto-programs \ goto-symex langapi pointer-analysis solvers util linking xmllang \ - assembler analyses java_bytecode aa-path-symex path-symex musketeer \ - json cegis goto-analyzer jsil symex goto-diff aa-symex clobber \ + assembler analyses java_bytecode path-symex musketeer \ + json cegis goto-analyzer jsil symex goto-diff clobber \ memory-models all: cbmc.dir goto-cc.dir goto-instrument.dir symex.dir goto-analyzer.dir goto-diff.dir From ca11bb3adbecc2d4d99711b9862dfbc1fa606280 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Thu, 2 Feb 2017 15:48:36 +0000 Subject: [PATCH 25/27] Introduce java.lang.String when string-refinement is activated We define the java.lang.String type to suit string-refine, and we create populated String instances for string literals. --- .../java_bytecode_convert_class.cpp | 78 ++++++++++++++++++- .../java_bytecode_convert_class.h | 3 +- src/java_bytecode/java_bytecode_language.cpp | 6 +- src/java_bytecode/java_bytecode_language.h | 1 + src/java_bytecode/java_bytecode_typecheck.cpp | 5 +- src/java_bytecode/java_bytecode_typecheck.h | 10 ++- .../java_bytecode_typecheck_expr.cpp | 61 ++++++++++++++- 7 files changed, 149 insertions(+), 15 deletions(-) diff --git a/src/java_bytecode/java_bytecode_convert_class.cpp b/src/java_bytecode/java_bytecode_convert_class.cpp index 440b0ce32cf..f21b0bb1ce2 100644 --- a/src/java_bytecode/java_bytecode_convert_class.cpp +++ b/src/java_bytecode/java_bytecode_convert_class.cpp @@ -30,13 +30,15 @@ class java_bytecode_convert_classt:public messaget bool _disable_runtime_checks, size_t _max_array_length, lazy_methodst& _lazy_methods, - lazy_methods_modet _lazy_methods_mode): + lazy_methods_modet _lazy_methods_mode, + bool _string_refinement_enabled): messaget(_message_handler), symbol_table(_symbol_table), disable_runtime_checks(_disable_runtime_checks), max_array_length(_max_array_length), lazy_methods(_lazy_methods), - lazy_methods_mode(_lazy_methods_mode) + lazy_methods_mode(_lazy_methods_mode), + string_refinement_enabled(_string_refinement_enabled) { } @@ -46,6 +48,9 @@ class java_bytecode_convert_classt:public messaget if(parse_tree.loading_successful) convert(parse_tree.parsed_class); + else if(string_refinement_enabled && + parse_tree.parsed_class.name=="java.lang.String") + add_string_type(); else generate_class_stub(parse_tree.parsed_class.name); } @@ -59,6 +64,7 @@ class java_bytecode_convert_classt:public messaget const size_t max_array_length; lazy_methodst &lazy_methods; lazy_methods_modet lazy_methods_mode; + bool string_refinement_enabled; // conversion void convert(const classt &c); @@ -66,6 +72,7 @@ class java_bytecode_convert_classt:public messaget void generate_class_stub(const irep_idt &class_name); void add_array_types(); + void add_string_type(); }; /*******************************************************************\ @@ -360,7 +367,8 @@ bool java_bytecode_convert_class( bool disable_runtime_checks, size_t max_array_length, lazy_methodst &lazy_methods, - lazy_methods_modet lazy_methods_mode) + lazy_methods_modet lazy_methods_mode, + bool string_refinement_enabled) { java_bytecode_convert_classt java_bytecode_convert_class( symbol_table, @@ -368,7 +376,8 @@ bool java_bytecode_convert_class( disable_runtime_checks, max_array_length, lazy_methods, - lazy_methods_mode); + lazy_methods_mode, + string_refinement_enabled); try { @@ -392,3 +401,64 @@ bool java_bytecode_convert_class( return true; } + +/*******************************************************************\ + +Function: java_bytecode_convert_classt::add_string_type + + Purpose: Implements the java.lang.String type in the case that + we provide an internal implementation. + +\*******************************************************************/ + +void java_bytecode_convert_classt::add_string_type() +{ + class_typet string_type; + string_type.set_tag("java.lang.String"); + string_type.components().resize(3); + string_type.components()[0].set_name("@java.lang.Object"); + string_type.components()[0].set_pretty_name("@java.lang.Object"); + string_type.components()[0].type()=symbol_typet("java::java.lang.Object"); + string_type.components()[1].set_name("length"); + string_type.components()[1].set_pretty_name("length"); + string_type.components()[1].type()=java_int_type(); + string_type.components()[2].set_name("data"); + string_type.components()[2].set_pretty_name("data"); + // Use a pointer-to-unbounded-array instead of a pointer-to-char. + // Saves some casting in the string refinement algorithm but may + // be unnecessary. + string_type.components()[2].type()=pointer_typet( + array_typet(java_char_type(), infinity_exprt(java_int_type()))); + string_type.add_base(symbol_typet("java::java.lang.Object")); + + symbolt string_symbol; + string_symbol.name="java::java.lang.String"; + string_symbol.base_name="java.lang.String"; + string_symbol.type=string_type; + string_symbol.is_type=true; + + symbol_table.add(string_symbol); + + // Also add a stub of `String.equals` so that remove-virtual-functions + // generates a check for Object.equals vs. String.equals. + // No need to fill it in, as pass_preprocess will replace the calls again. + symbolt string_equals_symbol; + string_equals_symbol.name= + "java::java.lang.String.equals:(Ljava/lang/Object;)Z"; + string_equals_symbol.base_name="java.lang.String.equals"; + string_equals_symbol.pretty_name="java.lang.String.equals"; + string_equals_symbol.mode=ID_java; + + code_typet string_equals_type; + string_equals_type.return_type()=java_boolean_type(); + code_typet::parametert thisparam; + thisparam.set_this(); + thisparam.type()=pointer_typet(symbol_typet(string_symbol.name)); + code_typet::parametert otherparam; + otherparam.type()=pointer_typet(symbol_typet("java::java.lang.Object")); + string_equals_type.parameters().push_back(thisparam); + string_equals_type.parameters().push_back(otherparam); + string_equals_symbol.type=std::move(string_equals_type); + + symbol_table.add(string_equals_symbol); +} diff --git a/src/java_bytecode/java_bytecode_convert_class.h b/src/java_bytecode/java_bytecode_convert_class.h index 5cc1526a125..0d2be3d8202 100644 --- a/src/java_bytecode/java_bytecode_convert_class.h +++ b/src/java_bytecode/java_bytecode_convert_class.h @@ -22,6 +22,7 @@ bool java_bytecode_convert_class( bool disable_runtime_checks, size_t max_array_length, lazy_methodst &, - lazy_methods_modet); + lazy_methods_modet, + bool string_refinement_enabled); #endif // CPROVER_JAVA_BYTECODE_JAVA_BYTECODE_CONVERT_CLASS_H diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index e9d5a87186b..668960dbe17 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -43,6 +43,7 @@ void java_bytecode_languaget::get_language_options(const cmdlinet &cmd) { disable_runtime_checks=cmd.isset("disable-runtime-check"); assume_inputs_non_null=cmd.isset("java-assume-inputs-non-null"); + string_refinement_enabled=cmd.isset("string-refine"); if(cmd.isset("java-max-input-array-length")) max_nondet_array_length= std::stoi(cmd.get_value("java-max-input-array-length")); @@ -494,7 +495,8 @@ bool java_bytecode_languaget::typecheck( disable_runtime_checks, max_user_array_length, lazy_methods, - lazy_methods_mode)) + lazy_methods_mode, + string_refinement_enabled)) return true; } @@ -509,7 +511,7 @@ bool java_bytecode_languaget::typecheck( // now typecheck all if(java_bytecode_typecheck( - symbol_table, get_message_handler())) + symbol_table, get_message_handler(), string_refinement_enabled)) return true; return false; diff --git a/src/java_bytecode/java_bytecode_language.h b/src/java_bytecode/java_bytecode_language.h index ea177e0226a..643eacd5b7f 100644 --- a/src/java_bytecode/java_bytecode_language.h +++ b/src/java_bytecode/java_bytecode_language.h @@ -104,6 +104,7 @@ class java_bytecode_languaget:public languaget size_t max_user_array_length; // max size for user code created arrays lazy_methodst lazy_methods; lazy_methods_modet lazy_methods_mode; + bool string_refinement_enabled; }; languaget *new_java_bytecode_language(); diff --git a/src/java_bytecode/java_bytecode_typecheck.cpp b/src/java_bytecode/java_bytecode_typecheck.cpp index e72b80a885d..4f09e366452 100644 --- a/src/java_bytecode/java_bytecode_typecheck.cpp +++ b/src/java_bytecode/java_bytecode_typecheck.cpp @@ -126,10 +126,11 @@ Function: java_bytecode_typecheck bool java_bytecode_typecheck( symbol_tablet &symbol_table, - message_handlert &message_handler) + message_handlert &message_handler, + bool string_refinement_enabled) { java_bytecode_typecheckt java_bytecode_typecheck( - symbol_table, message_handler); + symbol_table, message_handler, string_refinement_enabled); return java_bytecode_typecheck.typecheck_main(); } diff --git a/src/java_bytecode/java_bytecode_typecheck.h b/src/java_bytecode/java_bytecode_typecheck.h index cceba54dee5..618b6fce44d 100644 --- a/src/java_bytecode/java_bytecode_typecheck.h +++ b/src/java_bytecode/java_bytecode_typecheck.h @@ -21,7 +21,8 @@ Author: Daniel Kroening, kroening@kroening.com bool java_bytecode_typecheck( symbol_tablet &symbol_table, - message_handlert &message_handler); + message_handlert &message_handler, + bool string_refinement_enabled); bool java_bytecode_typecheck( exprt &expr, @@ -33,10 +34,12 @@ class java_bytecode_typecheckt:public typecheckt public: java_bytecode_typecheckt( symbol_tablet &_symbol_table, - message_handlert &_message_handler): + message_handlert &_message_handler, + bool _string_refinement_enabled): typecheckt(_message_handler), symbol_table(_symbol_table), - ns(symbol_table) + ns(symbol_table), + string_refinement_enabled(_string_refinement_enabled) { } @@ -48,6 +51,7 @@ class java_bytecode_typecheckt:public typecheckt protected: symbol_tablet &symbol_table; const namespacet ns; + bool string_refinement_enabled; void typecheck_type_symbol(symbolt &); void typecheck_non_type_symbol(symbolt &); diff --git a/src/java_bytecode/java_bytecode_typecheck_expr.cpp b/src/java_bytecode/java_bytecode_typecheck_expr.cpp index c2ba2949d7d..41879c6cda5 100644 --- a/src/java_bytecode/java_bytecode_typecheck_expr.cpp +++ b/src/java_bytecode/java_bytecode_typecheck_expr.cpp @@ -10,9 +10,12 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include +#include #include "java_bytecode_typecheck.h" #include "java_pointer_casts.h" +#include "java_types.h" /*******************************************************************\ @@ -112,6 +115,15 @@ static std::string escape_non_alnum(const std::string &toescape) return escaped.str(); } +static array_exprt utf16_to_array(const std::wstring& in) +{ + const auto jchar=java_char_type(); + array_exprt ret(array_typet(jchar, infinity_exprt(java_int_type()))); + for(const auto c : in) + ret.copy_to_operands(from_integer(c, jchar)); + return ret; +} + /*******************************************************************\ Function: java_bytecode_typecheckt::typecheck_expr_java_string_literal @@ -136,14 +148,14 @@ void java_bytecode_typecheckt::typecheck_expr_java_string_literal(exprt &expr) auto findit=symbol_table.symbols.find(escaped_symbol_name); if(findit!=symbol_table.symbols.end()) { - expr=findit->second.symbol_expr(); + expr=address_of_exprt(findit->second.symbol_expr()); return; } // Create a new symbol: symbolt new_symbol; new_symbol.name=escaped_symbol_name; - new_symbol.type=pointer_typet(string_type); + new_symbol.type=string_type; new_symbol.base_name="Literal"; new_symbol.pretty_name=value; new_symbol.mode=ID_java; @@ -151,13 +163,56 @@ void java_bytecode_typecheckt::typecheck_expr_java_string_literal(exprt &expr) new_symbol.is_lvalue=true; new_symbol.is_static_lifetime=true; // These are basically const global data. + if(string_refinement_enabled) + { + // Initialise the string with a constant utf-16 array: + symbolt array_symbol; + array_symbol.name=escaped_symbol_name+"_constarray"; + array_symbol.type=array_typet( + java_char_type(), infinity_exprt(java_int_type())); + array_symbol.base_name="Literal_constarray"; + array_symbol.pretty_name=value; + array_symbol.mode=ID_java; + array_symbol.is_type=false; + array_symbol.is_lvalue=true; + // These are basically const global data: + array_symbol.is_static_lifetime=true; + array_symbol.is_state_var=true; + auto literal_array=utf16_to_array( + utf8_to_utf16_little_endian(id2string(value))); + array_symbol.value=literal_array; + + if(symbol_table.add(array_symbol)) + throw "failed to add constarray symbol to symtab"; + + symbol_typet jlo_symbol("java::java.lang.Object"); + const auto& jlo_struct=to_struct_type(ns.follow(jlo_symbol)); + const auto& jls_struct=to_struct_type(ns.follow(string_type)); + + struct_exprt literal_init(new_symbol.type); + struct_exprt jlo_init(jlo_symbol); + jlo_init.copy_to_operands( + constant_exprt("java::java.lang.String", + jlo_struct.components()[0].type())); + jlo_init.copy_to_operands( + from_integer(0, jlo_struct.components()[1].type())); + literal_init.move_to_operands(jlo_init); + literal_init.copy_to_operands( + from_integer(literal_array.operands().size(), + jls_struct.components()[1].type())); + literal_init.copy_to_operands( + address_of_exprt(array_symbol.symbol_expr())); + + new_symbol.value=literal_init; + } + if(symbol_table.add(new_symbol)) { error() << "failed to add string literal symbol to symbol table" << eom; throw 0; } - expr=new_symbol.symbol_expr(); + expr=address_of_exprt(new_symbol.symbol_expr()); } /*******************************************************************\ From 396f66e7d64aba03c0a95c43cbb16e9c9f50b8bb Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 20 Feb 2017 17:33:46 +0000 Subject: [PATCH 26/27] Init string constants when refine-string disabled In order that at least ("" instanceof String) passes, we must init the class identifier field, both in the case that java.lang.String is a stub and the case where JDK or something like it was present and defined the type. In the latter case we simply zero-initialize the other fields of String since without refine-string we can't understand them. --- .../java_bytecode_typecheck_expr.cpp | 58 +++++++++++++++---- 1 file changed, 46 insertions(+), 12 deletions(-) diff --git a/src/java_bytecode/java_bytecode_typecheck_expr.cpp b/src/java_bytecode/java_bytecode_typecheck_expr.cpp index 41879c6cda5..cd8beb86c6f 100644 --- a/src/java_bytecode/java_bytecode_typecheck_expr.cpp +++ b/src/java_bytecode/java_bytecode_typecheck_expr.cpp @@ -13,6 +13,8 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include + #include "java_bytecode_typecheck.h" #include "java_pointer_casts.h" #include "java_types.h" @@ -163,8 +165,26 @@ void java_bytecode_typecheckt::typecheck_expr_java_string_literal(exprt &expr) new_symbol.is_lvalue=true; new_symbol.is_static_lifetime=true; // These are basically const global data. + // Regardless of string refinement setting, at least initialize + // the literal with @clsid = String and @lock = false: + symbol_typet jlo_symbol("java::java.lang.Object"); + const auto& jlo_struct=to_struct_type(ns.follow(jlo_symbol)); + struct_exprt jlo_init(jlo_symbol); + const auto& jls_struct=to_struct_type(ns.follow(string_type)); + + jlo_init.copy_to_operands( + constant_exprt("java::java.lang.String", + jlo_struct.components()[0].type())); + jlo_init.copy_to_operands( + from_integer(0, jlo_struct.components()[1].type())); + + // If string refinement *is* around, populate the actual + // contents as well: if(string_refinement_enabled) { + struct_exprt literal_init(new_symbol.type); + literal_init.move_to_operands(jlo_init); + // Initialise the string with a constant utf-16 array: symbolt array_symbol; array_symbol.name=escaped_symbol_name+"_constarray"; @@ -185,18 +205,6 @@ void java_bytecode_typecheckt::typecheck_expr_java_string_literal(exprt &expr) if(symbol_table.add(array_symbol)) throw "failed to add constarray symbol to symtab"; - symbol_typet jlo_symbol("java::java.lang.Object"); - const auto& jlo_struct=to_struct_type(ns.follow(jlo_symbol)); - const auto& jls_struct=to_struct_type(ns.follow(string_type)); - - struct_exprt literal_init(new_symbol.type); - struct_exprt jlo_init(jlo_symbol); - jlo_init.copy_to_operands( - constant_exprt("java::java.lang.String", - jlo_struct.components()[0].type())); - jlo_init.copy_to_operands( - from_integer(0, jlo_struct.components()[1].type())); - literal_init.move_to_operands(jlo_init); literal_init.copy_to_operands( from_integer(literal_array.operands().size(), jls_struct.components()[1].type())); @@ -205,6 +213,32 @@ void java_bytecode_typecheckt::typecheck_expr_java_string_literal(exprt &expr) new_symbol.value=literal_init; } + else if(jls_struct.components().size()>=1 && + jls_struct.components()[0].get_name()=="@java.lang.Object") + { + // Case where something defined java.lang.String, so it has + // a proper base class (always java.lang.Object in practical + // JDKs seen so far) + struct_exprt literal_init(new_symbol.type); + literal_init.move_to_operands(jlo_init); + for(const auto &comp : jls_struct.components()) + { + if(comp.get_name()=="@java.lang.Object") + continue; + // Other members of JDK's java.lang.String we don't understand + // without string-refinement. Just zero-init them; consider using + // test-gen-like nondet object trees instead. + literal_init.copy_to_operands( + zero_initializer(comp.type(), expr.source_location(), ns)); + } + new_symbol.value=literal_init; + } + else if(jls_struct.get_bool(ID_incomplete_class)) + { + // Case where java.lang.String was stubbed, and so directly defines + // @class_identifier and @lock: + new_symbol.value=jlo_init; + } if(symbol_table.add(new_symbol)) { From 4e4fa87a73aa4f32f221b50addef676d0b5ce39b Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Tue, 21 Feb 2017 13:39:44 +0000 Subject: [PATCH 27/27] Style and document Java string-refine integration --- .../java_bytecode_typecheck_expr.cpp | 31 ++++++++++++++----- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/src/java_bytecode/java_bytecode_typecheck_expr.cpp b/src/java_bytecode/java_bytecode_typecheck_expr.cpp index cd8beb86c6f..11e488ff7fe 100644 --- a/src/java_bytecode/java_bytecode_typecheck_expr.cpp +++ b/src/java_bytecode/java_bytecode_typecheck_expr.cpp @@ -117,7 +117,19 @@ static std::string escape_non_alnum(const std::string &toescape) return escaped.str(); } -static array_exprt utf16_to_array(const std::wstring& in) +/*******************************************************************\ + +Function: utf16_to_array + + Inputs: `in`: wide string to convert + + Outputs: Returns a Java char array containing the same wchars. + + Purpose: Convert UCS-2 or UTF-16 to an array expression. + +\*******************************************************************/ + +static array_exprt utf16_to_array(const std::wstring &in) { const auto jchar=java_char_type(); array_exprt ret(array_typet(jchar, infinity_exprt(java_int_type()))); @@ -168,15 +180,18 @@ void java_bytecode_typecheckt::typecheck_expr_java_string_literal(exprt &expr) // Regardless of string refinement setting, at least initialize // the literal with @clsid = String and @lock = false: symbol_typet jlo_symbol("java::java.lang.Object"); - const auto& jlo_struct=to_struct_type(ns.follow(jlo_symbol)); + const auto &jlo_struct=to_struct_type(ns.follow(jlo_symbol)); struct_exprt jlo_init(jlo_symbol); - const auto& jls_struct=to_struct_type(ns.follow(string_type)); + const auto &jls_struct=to_struct_type(ns.follow(string_type)); jlo_init.copy_to_operands( - constant_exprt("java::java.lang.String", - jlo_struct.components()[0].type())); + constant_exprt( + "java::java.lang.String", + jlo_struct.components()[0].type())); jlo_init.copy_to_operands( - from_integer(0, jlo_struct.components()[1].type())); + from_integer( + 0, + jlo_struct.components()[1].type())); // If string refinement *is* around, populate the actual // contents as well: @@ -185,7 +200,7 @@ void java_bytecode_typecheckt::typecheck_expr_java_string_literal(exprt &expr) struct_exprt literal_init(new_symbol.type); literal_init.move_to_operands(jlo_init); - // Initialise the string with a constant utf-16 array: + // Initialize the string with a constant utf-16 array: symbolt array_symbol; array_symbol.name=escaped_symbol_name+"_constarray"; array_symbol.type=array_typet( @@ -203,7 +218,7 @@ void java_bytecode_typecheckt::typecheck_expr_java_string_literal(exprt &expr) array_symbol.value=literal_array; if(symbol_table.add(array_symbol)) - throw "failed to add constarray symbol to symtab"; + throw "failed to add constarray symbol to symbol table"; literal_init.copy_to_operands( from_integer(literal_array.operands().size(),