diff --git a/src/goto-programs/string_refine_preprocess.cpp b/src/goto-programs/string_refine_preprocess.cpp index ec02988d577..7243f66049a 100644 --- a/src/goto-programs/string_refine_preprocess.cpp +++ b/src/goto-programs/string_refine_preprocess.cpp @@ -23,6 +23,32 @@ Date: September 2016 /*******************************************************************\ +Function: string_refine_preprocesst::check_java_type + + Inputs: a type and a string + + Outputs: Boolean telling whether the type is a struct with the given + tag or a symbolic type with the tag prefixed by "java::" + +\*******************************************************************/ + +bool string_refine_preprocesst::check_java_type( + const typet &type, const std::string &tag) +{ + if(type.id()==ID_symbol) + { + irep_idt tag_id=to_symbol_type(type).get_identifier(); + return tag_id=="java::"+tag; + } + else if(type.id()==ID_struct) + { + irep_idt tag_id=to_struct_type(type).get_tag(); + return tag_id==tag; + } + return false; +} +/*******************************************************************\ + Function: string_refine_preprocesst::is_java_string_pointer_type Inputs: a type @@ -54,17 +80,7 @@ Function: string_refine_preprocesst::is_java_string_type bool string_refine_preprocesst::is_java_string_type(const typet &type) { - if(type.id()==ID_symbol) - { - const irep_idt &tag=to_symbol_type(type).get_identifier(); - return tag=="java::java.lang.String"; - } - else if(type.id()==ID_struct) - { - const irep_idt &tag=to_struct_type(type).get_tag(); - return tag=="java.lang.String"; - } - return false; + return check_java_type(type, "java.lang.String"); } /*******************************************************************\ @@ -79,17 +95,7 @@ Function: string_refine_preprocesst::is_java_string_builder_type bool string_refine_preprocesst::is_java_string_builder_type(const typet &type) { - if(type.id()==ID_pointer) - { - const pointer_typet &pt=to_pointer_type(type); - const typet &subtype=pt.subtype(); - if(subtype.id()==ID_struct) - { - const irep_idt &tag=to_struct_type(subtype).get_tag(); - return tag=="java.lang.StringBuilder"; - } - } - return false; + return check_java_type(type, "java.lang.StringBuilder"); } /*******************************************************************\ @@ -114,6 +120,7 @@ bool string_refine_preprocesst::is_java_string_builder_pointer_type( } return false; } + /*******************************************************************\ Function: string_refine_preprocesst::is_java_char_sequence_type @@ -125,16 +132,67 @@ Function: string_refine_preprocesst::is_java_char_sequence_type \*******************************************************************/ bool string_refine_preprocesst::is_java_char_sequence_type(const typet &type) +{ + return check_java_type(type, "java.lang.CharSequence"); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::is_java_char_sequence_pointer_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of a pointer + to a java char sequence + +\*******************************************************************/ + +bool string_refine_preprocesst::is_java_char_sequence_pointer_type( + const typet &type) { if(type.id()==ID_pointer) { const pointer_typet &pt=to_pointer_type(type); const typet &subtype=pt.subtype(); - if(subtype.id()==ID_struct) - { - const irep_idt &tag=to_struct_type(subtype).get_tag(); - return tag=="java.lang.CharSequence"; - } + return is_java_char_sequence_type(subtype); + } + return false; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::is_java_char_array_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of java char array + +\*******************************************************************/ + +bool string_refine_preprocesst::is_java_char_array_type(const typet &type) +{ + return check_java_type(type, "array[char]"); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::is_java_char_array_pointer_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of a pointer + to a java char array + +\*******************************************************************/ + +bool string_refine_preprocesst::is_java_char_array_pointer_type( + const typet &type) +{ + if(type.id()==ID_pointer) + { + const pointer_typet &pt=to_pointer_type(type); + const typet &subtype=pt.subtype(); + return is_java_char_array_type(subtype); } return false; } @@ -217,6 +275,32 @@ void string_refine_preprocesst::declare_function( /*******************************************************************\ +Function: string_refine_preprocesst::get_data_and_length_type_of_char_array + + Inputs: an expression, a reference to a data type and a reference to a + length type + + Purpose: assuming the expression is a char array, figure out what + the types for length and data are and put them into the references + given as argument + +\*******************************************************************/ + +void string_refine_preprocesst::get_data_and_length_type_of_char_array( + const exprt &expr, typet &data_type, typet &length_type) +{ + typet object_type=ns.follow(expr.type()); + assert(object_type.id()==ID_struct); + const struct_typet &struct_type=to_struct_type(object_type); + for(auto component : struct_type.components()) + if(component.get_name()=="length") + length_type=component.type(); + else if(component.get_name()=="data") + data_type=component.type(); +} + +/*******************************************************************\ + Function: string_refine_preprocesst::get_data_and_length_type_of_string Inputs: an expression, a reference to a data type and a reference to a @@ -232,7 +316,8 @@ void string_refine_preprocesst::get_data_and_length_type_of_string( const exprt &expr, typet &data_type, typet &length_type) { assert(is_java_string_type(expr.type()) || - is_java_string_builder_type(expr.type())); + is_java_string_builder_type(expr.type()) || + is_java_char_sequence_type(expr.type())); typet object_type=ns.follow(expr.type()); const struct_typet &struct_type=to_struct_type(object_type); for(const auto &component : struct_type.components()) @@ -263,44 +348,52 @@ exprt string_refine_preprocesst::make_cprover_string_assign( const exprt &rhs, const source_locationt &location) { - if(is_java_string_pointer_type(rhs.type())) + if(implements_java_char_sequence(rhs.type())) { - auto pair=java_to_cprover_strings.insert( - std::pair(rhs, nil_exprt())); - - if(pair.second) - { - // We do the following assignments: - // cprover_string_array = *(rhs->data) - // cprover_string = { rhs->length; cprover_string_array } - - dereference_exprt deref(rhs, rhs.type().subtype()); - - typet data_type, length_type; - get_data_and_length_type_of_string(deref, data_type, length_type); - member_exprt length(deref, "length", length_type); - symbol_exprt array_lhs=fresh_array(data_type.subtype(), location); - - // string expression for the rhs of the second assignment - string_exprt new_rhs( - length, array_lhs, refined_string_typet(length_type, data_type)); - - member_exprt data(deref, "data", data_type); - dereference_exprt deref_data(data, data_type.subtype()); - symbol_exprt lhs=fresh_string(new_rhs.type(), location); - - std::list assignments; - assignments.emplace_back(array_lhs, deref_data); - assignments.emplace_back(lhs, new_rhs); - insert_assignments( - goto_program, target, target->function, location, assignments); - target=goto_program.insert_after(target); - pair.first->second=lhs; - } - return pair.first->second; + // We do the following assignments: + // 1 length= *(rhs->length) + // 2 cprover_string_array = *(rhs->data) + // 3 cprover_string = { length; cprover_string_array } + + dereference_exprt deref(rhs, rhs.type().subtype()); + typet data_type, length_type; + get_data_and_length_type_of_string(deref, data_type, length_type); + std::list assignments; + + // 1) cprover_string_length= *(rhs->length) + symbolt sym_length=get_fresh_aux_symbol( + length_type, + "cprover_string_length", + "cprover_string_length", + location, + ID_java, + symbol_table); + symbol_exprt cprover_length=sym_length.symbol_expr(); + member_exprt length(deref, "length", length_type); + assignments.emplace_back(cprover_length, length); + + // 2) cprover_string_array = *(rhs->data) + symbol_exprt array_lhs=fresh_array(data_type.subtype(), location); + member_exprt data(deref, "data", data_type); + dereference_exprt deref_data(data, data_type.subtype()); + assignments.emplace_back(array_lhs, deref_data); + + // 3) cprover_string = { cprover_string_length; cprover_string_array } + // This assignment is useful for finding witnessing strings for counter + // examples + refined_string_typet ref_type(length_type, java_char_type()); + string_exprt new_rhs(cprover_length, array_lhs, ref_type); + + symbol_exprt lhs=fresh_string(new_rhs.type(), location); + assignments.emplace_back(lhs, new_rhs); + + insert_assignments( + goto_program, target, target->function, location, assignments); + target=goto_program.insert_after(target); + return new_rhs; } else if(rhs.id()==ID_typecast && - is_java_string_pointer_type(rhs.op0().type())) + implements_java_char_sequence(rhs.op0().type())) { exprt new_rhs=make_cprover_string_assign( goto_program, target, rhs.op0(), location); @@ -312,6 +405,75 @@ exprt string_refine_preprocesst::make_cprover_string_assign( /*******************************************************************\ +Function: string_refine_preprocesst::make_cprover_char_array_assign + + Inputs: a goto_program, a position in this program, an expression of + type char array pointer and a location + + Outputs: a string expression + + Purpose: Introduce a temporary variable for cprover strings; + returns the cprover_string corresponding to rhs + +\*******************************************************************/ + +string_exprt string_refine_preprocesst::make_cprover_char_array_assign( + goto_programt &goto_program, + goto_programt::targett &target, + const exprt &rhs, + const source_locationt &location) +{ + assert(is_java_char_array_pointer_type(rhs.type())); + + // We do the following assignments: + // deref=*(rhs->data) + // array= typecast(&deref); + // string={ rhs->length; array } + + dereference_exprt deref(rhs, rhs.type().subtype()); + typet length_type, data_type; + get_data_and_length_type_of_char_array(deref, data_type, length_type); + assert(data_type.id()==ID_pointer); + typet char_type=to_pointer_type(data_type).subtype(); + + refined_string_typet ref_type(length_type, java_char_type()); + typet content_type=ref_type.get_content_type(); + std::list assignments; + + // deref=*(rhs->data) + member_exprt array_rhs(deref, "data", data_type); + dereference_exprt deref_array(array_rhs, data_type.subtype()); + symbolt sym_lhs_deref=get_fresh_aux_symbol( + data_type.subtype(), + "char_array_assign$deref", + "char_array_assign$deref", + location, + ID_java, + symbol_table); + symbol_exprt lhs_deref=sym_lhs_deref.symbol_expr(); + assignments.emplace_back(lhs_deref, deref_array); + + // array=convert_pointer_to_char_array(*rhs->data) + declare_function(ID_cprover_string_array_of_char_pointer_func, content_type); + function_application_exprt fun_app(symbol_exprt( + ID_cprover_string_array_of_char_pointer_func), content_type); + fun_app.arguments().push_back(deref_array); + symbol_exprt array=fresh_array(content_type, location); + assignments.emplace_back(array, fun_app); + + // string={ rhs->length; string_array } + string_exprt new_rhs(get_length(deref, length_type), array, ref_type); + symbol_exprt lhs=fresh_string(ref_type, location); + assignments.emplace_back(lhs, new_rhs); + + insert_assignments( + goto_program, target, target->function, location, assignments); + target=goto_program.insert_after(target); + return new_rhs; +} + +/*******************************************************************\ + Function: string_refine_preprocesst::make_normal_assign Inputs: a goto_program, a position in this program, an expression lhs, @@ -336,27 +498,19 @@ void string_refine_preprocesst::make_normal_assign( const source_locationt &location, const std::string &signature) { - if(function_name==ID_cprover_string_copy_func) - { - assert(!arguments.empty()); - make_string_copy(goto_program, target, lhs, arguments[0], location); - } - else - { - function_application_exprt rhs( - symbol_exprt(function_name), function_type.return_type()); - rhs.add_source_location()=location; - declare_function(function_name, function_type); + function_application_exprt rhs( + symbol_exprt(function_name), function_type.return_type()); + rhs.add_source_location()=location; + declare_function(function_name, function_type); - exprt::operandst processed_arguments=process_arguments( - goto_program, target, arguments, location, signature); - rhs.arguments()=processed_arguments; + exprt::operandst processed_arguments=process_arguments( + goto_program, target, arguments, location, signature); + rhs.arguments()=processed_arguments; - code_assignt assignment(lhs, rhs); - assignment.add_source_location()=location; - target->make_assignment(); - target->code=assignment; - } + code_assignt assignment(lhs, rhs); + assignment.add_source_location()=location; + target->make_assignment(); + target->code=assignment; } /*******************************************************************\ @@ -420,7 +574,7 @@ void string_refine_preprocesst::make_string_assign( const source_locationt &location, const std::string &signature) { - assert(is_java_string_pointer_type(function_type.return_type())); + assert(implements_java_char_sequence(function_type.return_type())); dereference_exprt deref(lhs, lhs.type().subtype()); typet object_type=ns.follow(deref.type()); exprt object_size=size_of_expr(object_type, ns); @@ -454,98 +608,22 @@ void string_refine_preprocesst::make_string_assign( malloc_expr.type()=pointer_typet(object_type); malloc_expr.add_source_location()=location; + refined_string_typet ref_type(length_type, data_type.subtype().subtype()); + string_exprt str(tmp_length, tmp_array, ref_type); + symbol_exprt cprover_string_sym=fresh_string(ref_type, location); + std::list assigns; assigns.emplace_back(lhs, malloc_expr); assigns.emplace_back(tmp_length, rhs_length); assigns.emplace_back(lhs_length, tmp_length); assigns.emplace_back(tmp_array, rhs_data); + assigns.emplace_back(cprover_string_sym, str); assigns.emplace_back(lhs_data, address_of_exprt(tmp_array)); insert_assignments(goto_program, target, target->function, location, assigns); } /*******************************************************************\ -Function: string_refine_preprocesst::make_assign - - Inputs: a goto_program, a position in this program, an expression lhs, - a function type, a function name, a vector of arguments, a location - and a signature - - Purpose: assign the result of the function application to lhs, - in case the function type is string, it does a special assignment - using `make_string_assign` - -\*******************************************************************/ - -void string_refine_preprocesst::make_assign( - goto_programt &goto_program, - goto_programt::targett &target, - const exprt &lhs, - const code_typet &function_type, - const irep_idt &function_name, - const exprt::operandst &arg, - const source_locationt &loc, - const std::string &sig) -{ - if(is_java_string_pointer_type(function_type.return_type())) - make_string_assign( - goto_program, target, lhs, function_type, function_name, arg, loc, sig); - else - make_normal_assign( - goto_program, target, lhs, function_type, function_name, arg, loc, sig); -} - -/*******************************************************************\ - -Function: string_refine_preprocesst::make_string_copy - - Inputs: a goto_program, a position in this program, a lhs expression, - an argument expression and a location - - Outputs: an expression - - Purpose: replace the current instruction by: - > lhs->length=argument->length - > tmp_data=*(argument->data) - > lhs->data=&tmp_data - -\*******************************************************************/ - -void string_refine_preprocesst::make_string_copy( - goto_programt &goto_program, - goto_programt::targett &target, - const exprt &lhs, - const exprt &argument, - const source_locationt &location) -{ - // TODO : treat CharSequence and StringBuffer - assert(is_java_string_pointer_type(lhs.type()) || - is_java_string_builder_pointer_type(lhs.type())); - exprt deref=dereference_exprt(lhs, lhs.type().subtype()); - - typet length_type, data_type; - get_data_and_length_type_of_string(deref, data_type, length_type); - - dereference_exprt deref_arg(argument, argument.type().subtype()); - std::list assignments; - - exprt lhs_length=get_length(deref, length_type); - exprt rhs_length=get_length(deref_arg, length_type); - assignments.emplace_back(lhs_length, rhs_length); - - symbol_exprt tmp_data=fresh_array(data_type.subtype(), location); - exprt rhs_data=get_data(deref_arg, data_type); - exprt lhs_data=get_data(deref, data_type); - assignments.emplace_back( - tmp_data, dereference_exprt(rhs_data, data_type.subtype())); - assignments.emplace_back(lhs_data, address_of_exprt(tmp_data)); - - insert_assignments( - goto_program, target, target->function, location, assignments); -} - -/*******************************************************************\ - Function: string_refine_preprocesst::make_string_function Inputs: a position in a goto program, a function name, an expression lhs, @@ -566,7 +644,36 @@ void string_refine_preprocesst::make_string_function( const source_locationt &location, const std::string &signature) { - if(is_java_string_pointer_type(function_type.return_type())) + if(signature.length()>0) + { + if(signature.back()=='S') + { + code_typet ft=function_type; + ft.return_type()=jls_ptr; + typecast_exprt lhs2(lhs, jls_ptr); + + make_string_assign( + goto_program, + target, + lhs2, + ft, + function_name, + arguments, + location, + signature); + } + else + make_normal_assign( + goto_program, + target, + lhs, + function_type, + function_name, + arguments, + location, + signature); + } + else if(implements_java_char_sequence(function_type.return_type())) make_string_assign( goto_program, target, @@ -597,9 +704,11 @@ Function: string_refine_preprocesst::make_string_function Purpose: at the current position replace `lhs=s.some_function(x,...)` by `lhs=function_name(s,x,...)`; option `assign_first_arg` uses `s` instead of `lhs` in the resulting - expression; + expression, Warning : it assumes that `s` is string-like option `skip_first_arg`, removes `s` from the arguments, ie `x` is - the first one + the first one; + arguments that are string (TODO: and char array) are replaced + by string_exprt \*******************************************************************/ @@ -635,11 +744,17 @@ void string_refine_preprocesst::make_string_function( new_type.parameters().push_back(function_type.parameters()[i]); } + std::string new_sig=signature; exprt lhs; if(assign_first_arg) { assert(!function_call.arguments().empty()); lhs=function_call.arguments()[0]; + std::size_t size=function_call.arguments().size(); + if(signature.length()<=size) + new_sig.resize(size+1, '_'); + + new_sig.replace(size, 1, "S"); } else lhs=function_call.lhs(); @@ -650,7 +765,7 @@ void string_refine_preprocesst::make_string_function( new_type.return_type()=lhs.type(); make_string_function( - goto_program, target, lhs, new_type, function_name, args, loc, signature); + goto_program, target, lhs, new_type, function_name, args, loc, new_sig); } /*******************************************************************\ @@ -680,9 +795,11 @@ Function: string_refine_preprocesst::make_string_function_side_effect Inputs: a position in a goto program and a function name - Purpose: at the current position, replace `r=s.some_function(x,...)` - by `s=function_name(s,x,...)` and add a correspondance from r - to s in the `string_builders` map + Purpose: at the current position, replace `r=s.some_function(x,...)` by + > tmp=function_name(x,...) + > s->data=tmp.data + > s->length=tmp.length + > r=s \*******************************************************************/ @@ -692,11 +809,48 @@ void string_refine_preprocesst::make_string_function_side_effect( const irep_idt &function_name, const std::string &signature) { - const code_function_callt function_call=to_code_function_call(target->code); - assert(!function_call.arguments().empty()); - string_builders[function_call.lhs()]=function_call.arguments()[0]; - make_string_function( - goto_program, target, function_name, signature, true, false); + code_function_callt function_call=to_code_function_call(target->code); + source_locationt loc=function_call.source_location(); + std::list assignments; + exprt lhs=function_call.lhs(); + exprt s=function_call.arguments()[0]; + code_typet function_type=to_code_type(function_call.type()); + + function_type.return_type()=s.type(); + + if(lhs.is_not_nil()) + { + symbol_exprt tmp_string=fresh_string(lhs.type(), loc); + + make_string_assign( + goto_program, + target, + tmp_string, + function_type, + function_name, + function_call.arguments(), + loc, + signature); + dereference_exprt deref_lhs(s, s.type().subtype()); + typet data_type, length_type; + get_data_and_length_type_of_string(deref_lhs, data_type, length_type); + member_exprt lhs_data(deref_lhs, "data", data_type); + member_exprt lhs_length(deref_lhs, "length", length_type); + dereference_exprt deref_rhs(tmp_string, s.type().subtype()); + member_exprt rhs_data(deref_rhs, "data", data_type); + member_exprt rhs_length(deref_rhs, "length", length_type); + assignments.emplace_back(lhs_length, rhs_length); + assignments.emplace_back(lhs_data, rhs_data); + assignments.emplace_back(lhs, s); + target=goto_program.insert_after(target); + insert_assignments( + goto_program, target, target->function, loc, assignments); + } + else + { + make_string_function( + goto_program, target, function_name, signature, true, false); + } } /*******************************************************************\ @@ -735,6 +889,7 @@ Function: string_refine_preprocesst::make_to_char_array_function Inputs: a goto program and a position in that goto program Purpose: at the given position replace `return_tmp0=s.toCharArray()` with: + > return_tmp0 = malloc(array[char]); > return_tmp0->data=&((s->data)[0]) > return_tmp0->length=s->length @@ -744,16 +899,36 @@ void string_refine_preprocesst::make_to_char_array_function( goto_programt &goto_program, goto_programt::targett &target) { const code_function_callt &function_call=to_code_function_call(target->code); + source_locationt location=function_call.source_location(); - assert(!function_call.arguments().empty()); + assert(function_call.arguments().size()>=1); const exprt &string_argument=function_call.arguments()[0]; assert(is_java_string_pointer_type(string_argument.type())); + typet deref_type=function_call.lhs().type().subtype(); + const exprt &lhs=function_call.lhs(); + dereference_exprt deref_lhs(lhs, deref_type); + dereference_exprt deref(string_argument, string_argument.type().subtype()); typet length_type, data_type; get_data_and_length_type_of_string(deref, data_type, length_type); std::list assignments; + // lhs=malloc(array[char]) + typet object_type=ns.follow(deref_type); + exprt object_size=size_of_expr(object_type, ns); + + if(object_size.is_nil()) + debug() << "string_refine_preprocesst::make_to_char_array_function " + << "got nil object_size" << eom; + + side_effect_exprt malloc_expr(ID_malloc); + malloc_expr.copy_to_operands(object_size); + malloc_expr.type()=pointer_typet(object_type); + malloc_expr.add_source_location()=location; + assignments.emplace_back(lhs, malloc_expr); + + // &((s->data)[0]) exprt rhs_data=get_data(deref, data_type); dereference_exprt rhs_array(rhs_data, data_type.subtype()); @@ -762,8 +937,6 @@ void string_refine_preprocesst::make_to_char_array_function( address_of_exprt rhs_pointer(first_element); // return_tmp0->data=&((s->data)[0]) - typet deref_type=function_call.lhs().type().subtype(); - dereference_exprt deref_lhs(function_call.lhs(), deref_type); exprt lhs_data=get_data(deref_lhs, data_type); assignments.emplace_back(lhs_data, rhs_pointer); @@ -771,178 +944,8 @@ void string_refine_preprocesst::make_to_char_array_function( exprt rhs_length=get_length(deref, length_type); exprt lhs_length=get_length(deref_lhs, length_type); assignments.emplace_back(lhs_length, rhs_length); - source_locationt location=target->source_location; - insert_assignments( - goto_program, target, target->function, location, assignments); -} - -/*******************************************************************\ - -Function: string_refine_preprocesst::make_cprover_char_array_assign - - Inputs: a goto_program, a position in this program, an expression and a - location - - Outputs: a char array expression (not a pointer) - - Purpose: Introduce a temporary variable for cprover strings; - returns the cprover_string corresponding to rhs - -\*******************************************************************/ - -exprt string_refine_preprocesst::make_cprover_char_array_assign( - goto_programt &goto_program, - goto_programt::targett &target, - const exprt &rhs, - const source_locationt &location) -{ - // TODO : add an assertion on the type of rhs - - // We do the following assignments: - // cprover_string_array = rhs.data - // cprover_string = { rhs.length; cprover_string_array } - - // string expression for the rhs of the second assignment - string_exprt new_rhs(java_char_type()); - - typet data_type=new_rhs.content().type(); - typet length_type=java_int_type(); - - symbol_exprt array_lhs=fresh_array(data_type, location); - exprt array_rhs=get_data(rhs, new_rhs.content().type()); - symbol_exprt lhs=fresh_string(new_rhs.type(), location); - new_rhs.length()=get_length(rhs, length_type); - new_rhs.content()=array_lhs; - - std::list assignments; - assignments.emplace_back(array_lhs, array_rhs); - assignments.emplace_back(lhs, new_rhs); insert_assignments( goto_program, target, target->function, location, assignments); - target=goto_program.insert_after(target); - return lhs; -} - -/*******************************************************************\ - -Function: string_refine_preprocesst::make_char_array_function - - Inputs: a position in a goto program, a function name, two Boolean options, - and the index of the char array argument in the function - - Purpose: at the given position replace - `lhs=s.some_function(...,char_array,...)` by - > cprover_string = { char_array->length, *char_array } - > tmp_string=function_name(s, cprover_string, ...) - option `assign_first_arg` uses `s` instead of `lhs` in the second - assignment; - option `skip_first_arg`, removes `s` from the arguments, ie `x` is - the first one; - argument index gives the index of the argument containing char_array - -\*******************************************************************/ - -void string_refine_preprocesst::make_char_array_function( - goto_programt &goto_program, - goto_programt::targett &target, - const irep_idt &function_name, - const std::string &signature, - std::size_t index, - bool assign_first_arg, - bool skip_first_arg) -{ - code_function_callt &function_call=to_code_function_call(target->code); - code_typet function_type=to_code_type(function_call.function().type()); - code_typet new_function_type; - const source_locationt &location=function_call.source_location(); - assert(!function_call.arguments().size()>index); - const std::vector &args=function_call.arguments(); - std::vector new_args; - - exprt lhs; - if(assign_first_arg) - { - assert(!function_call.arguments().empty()); - lhs=function_call.arguments()[0]; - else - lhs=function_call.lhs(); - - if(lhs.id()==ID_typecast) - lhs=to_typecast_expr(lhs).op(); - - exprt char_array=dereference_exprt( - function_call.arguments()[index], - function_call.arguments()[index].type().subtype()); - exprt string=make_cprover_char_array_assign( - goto_program, target, char_array, location); - - std::size_t start_index=skip_first_arg?1:0; - for(std::size_t i=start_index; icode); - assert(!function_call.arguments().empty()); - string_builders[function_call.lhs()]=function_call.arguments()[0]; } /*******************************************************************\ @@ -974,22 +977,20 @@ exprt::operandst string_refine_preprocesst::process_arguments( for(std::size_t i=0; isecond); - else + if(iis_function_call()) { code_function_callt &function_call=to_code_function_call(target->code); - for(auto &arg : function_call.arguments()) - { - auto sb_it=string_builders.find(arg); - if(sb_it!=string_builders.end()) - arg=sb_it->second; - } if(function_call.function().id()==ID_symbol) { @@ -1070,21 +1065,6 @@ void string_refine_preprocesst::replace_string_calls( make_string_function_call( goto_program, target, it->second, signature); - it=string_of_char_array_functions.find(function_id); - if(it!=string_of_char_array_functions.end()) - make_char_array_function( - goto_program, target, it->second, signature, 0); - - it=string_of_char_array_function_calls.find(function_id); - if(it!=string_of_char_array_function_calls.end()) - make_char_array_function_call( - goto_program, target, it->second, signature); - - it=side_effect_char_array_functions.find(function_id); - if(it!=side_effect_char_array_functions.end()) - make_char_array_side_effect( - goto_program, target, it->second, signature); - if(function_id==irep_idt("java::java.lang.String.toCharArray:()[C")) make_to_char_array_function(goto_program, target); } @@ -1299,12 +1279,18 @@ void string_refine_preprocesst::initialize_string_function_table() ID_cprover_string_set_length_func; - side_effect_char_array_functions + + side_effect_functions + ["java::java.lang.StringBuilder.append:([C)" + "Ljava/lang/StringBuilder;"]= + ID_cprover_string_concat_func; + side_effect_functions ["java::java.lang.StringBuilder.insert:(I[CII)Ljava/lang/StringBuilder;"]= - ID_cprover_string_insert_char_array_func; - side_effect_char_array_functions + ID_cprover_string_insert_func; + side_effect_functions ["java::java.lang.StringBuilder.insert:(I[C)Ljava/lang/StringBuilder;"]= - ID_cprover_string_insert_char_array_func; + ID_cprover_string_insert_func; + // TODO clean irep ids from insert_char_array etc... string_function_calls ["java::java.lang.String.:(Ljava/lang/String;)V"]= @@ -1320,23 +1306,23 @@ void string_refine_preprocesst::initialize_string_function_table() string_function_calls["java::java.lang.StringBuilder.:()V"]= ID_cprover_string_empty_string_func; - string_of_char_array_function_calls["java::java.lang.String.:([C)V"]= - ID_cprover_string_of_char_array_func; - string_of_char_array_function_calls["java::java.lang.String.:([CII)V"]= - ID_cprover_string_of_char_array_func; + string_function_calls["java::java.lang.String.:([C)V"]= + ID_cprover_string_copy_func; + string_function_calls["java::java.lang.String.:([CII)V"]= + ID_cprover_string_copy_func; - string_of_char_array_functions + string_functions ["java::java.lang.String.valueOf:([CII)Ljava/lang/String;"]= - ID_cprover_string_of_char_array_func; - string_of_char_array_functions + ID_cprover_string_copy_func; + string_functions ["java::java.lang.String.valueOf:([C)Ljava/lang/String;"]= - ID_cprover_string_of_char_array_func; - string_of_char_array_functions + ID_cprover_string_copy_func; + string_functions ["java::java.lang.String.copyValueOf:([CII)Ljava/lang/String;"]= - ID_cprover_string_of_char_array_func; - string_of_char_array_functions + ID_cprover_string_copy_func; + string_functions ["java::java.lang.String.copyValueOf:([C)Ljava/lang/String;"]= - ID_cprover_string_of_char_array_func; + ID_cprover_string_copy_func; c_string_functions["__CPROVER_uninterpreted_string_literal_func"]= ID_cprover_string_literal_func; @@ -1370,9 +1356,23 @@ void string_refine_preprocesst::initialize_string_function_table() ID_cprover_string_of_int_func; signatures["java::java.lang.String.equals:(Ljava/lang/Object;)Z"]="SSZ"; - signatures - ["java::java.lang.String.contains:(Ljava/lang/CharSequence;)Z"]= - "SSZ"; + signatures["java::java.lang.String.contains:(Ljava/lang/CharSequence;)Z"]= + "SSZ"; + signatures["java::java.lang.StringBuilder.insert:(IZ)" + "Ljava/lang/StringBuilder;"]="SIZS"; + signatures["java::java.lang.StringBuilder.insert:(IJ)" + "Ljava/lang/StringBuilder;"]="SIJS"; + signatures["java::java.lang.StringBuilder.insert:(II)" + "Ljava/lang/StringBuilder;"]="SIIS"; + signatures["java::java.lang.StringBuilder.insert:(IC)" + "Ljava/lang/StringBuilder;"]="SICS"; + signatures["java::java.lang.StringBuilder.insert:(ILjava/lang/String;)" + "Ljava/lang/StringBuilder;"]="SISS"; + signatures["java::java.lang.StringBuilder.insert:(ILjava/lang/String;)" + "Ljava/lang/StringBuilder;"]="SISS"; + signatures["java::java.lang.StringBuilder.insert:(I[C)" + "Ljava/lang/StringBuilder;"]="SI[S"; + signatures["java::java.lang.String.intern:()Ljava/lang/String;"]="SV"; } /*******************************************************************\ diff --git a/src/goto-programs/string_refine_preprocess.h b/src/goto-programs/string_refine_preprocess.h index 613d0161926..5ae398b757a 100644 --- a/src/goto-programs/string_refine_preprocess.h +++ b/src/goto-programs/string_refine_preprocess.h @@ -14,6 +14,7 @@ Date: September 2016 #include #include +#include class string_refine_preprocesst:public messaget { @@ -29,28 +30,21 @@ class string_refine_preprocesst:public messaget typedef std::unordered_map id_mapt; typedef std::unordered_map expr_mapt; - // String builders maps the different names of a same StringBuilder object - // to a unique expression. - expr_mapt string_builders; - // Map name of Java string functions to there equivalent in the solver id_mapt side_effect_functions; id_mapt string_functions; id_mapt c_string_functions; id_mapt string_function_calls; - id_mapt string_of_char_array_functions; - id_mapt string_of_char_array_function_calls; - id_mapt side_effect_char_array_functions; std::unordered_map signatures; - expr_mapt hidden_strings; - expr_mapt java_to_cprover_strings; // unique id for each newly created symbols int next_symbol_id; void initialize_string_function_table(); + static bool check_java_type(const typet &type, const std::string &tag); + static bool is_java_string_pointer_type(const typet &type); static bool is_java_string_type(const typet &type); @@ -61,6 +55,20 @@ class string_refine_preprocesst:public messaget static bool is_java_char_sequence_type(const typet &type); + static bool is_java_char_sequence_pointer_type(const typet &type); + + static bool is_java_char_array_type(const typet &type); + + static bool is_java_char_array_pointer_type(const typet &type); + + static bool implements_java_char_sequence(const typet &type) + { + return + is_java_char_sequence_pointer_type(type) || + is_java_string_builder_pointer_type(type) || + is_java_string_pointer_type(type); + } + symbol_exprt fresh_array( const typet &type, const source_locationt &location); symbol_exprt fresh_string( @@ -110,6 +118,9 @@ class string_refine_preprocesst:public messaget void get_data_and_length_type_of_string( const exprt &expr, typet &data_type, typet &length_type); + void get_data_and_length_type_of_char_array( + const exprt &expr, typet &data_type, typet &length_type); + function_application_exprt build_function_application( const irep_idt &function_name, const typet &type, @@ -136,27 +147,16 @@ class string_refine_preprocesst:public messaget const source_locationt &location, const std::string &signature); - void make_assign( - goto_programt &goto_program, - goto_programt::targett &target, - const exprt &lhs, - const code_typet &function_type, - const irep_idt &function_name, - const exprt::operandst &arg, - const source_locationt &loc, - const std::string &sig); - exprt make_cprover_string_assign( goto_programt &goto_program, goto_programt::targett &target, const exprt &rhs, const source_locationt &location); - void make_string_copy( + string_exprt make_cprover_char_array_assign( goto_programt &goto_program, goto_programt::targett &target, - const exprt &lhs, - const exprt &argument, + const exprt &rhs, const source_locationt &location); void make_string_function( @@ -192,33 +192,6 @@ class string_refine_preprocesst:public messaget void make_to_char_array_function( goto_programt &goto_program, goto_programt::targett &); - exprt make_cprover_char_array_assign( - goto_programt &goto_program, - goto_programt::targett &target, - const exprt &rhs, - const source_locationt &location); - - void make_char_array_function( - goto_programt &goto_program, - goto_programt::targett &target, - const irep_idt &function_name, - const std::string &signature, - std::size_t index, - bool assign_first_arg=false, - bool skip_first_arg=false); - - void make_char_array_function_call( - goto_programt &goto_program, - goto_programt::targett &target, - const irep_idt &function_name, - const std::string &signature); - - void make_char_array_side_effect( - goto_programt &goto_program, - goto_programt::targett &target, - const irep_idt &function_name, - const std::string &signature); - void replace_string_calls(goto_functionst::function_mapt::iterator f_it); }; diff --git a/src/java_bytecode/java_bytecode_convert_class.cpp b/src/java_bytecode/java_bytecode_convert_class.cpp index f21b0bb1ce2..7f52c6ef77b 100644 --- a/src/java_bytecode/java_bytecode_convert_class.cpp +++ b/src/java_bytecode/java_bytecode_convert_class.cpp @@ -50,7 +50,16 @@ class java_bytecode_convert_classt:public messaget convert(parse_tree.parsed_class); else if(string_refinement_enabled && parse_tree.parsed_class.name=="java.lang.String") - add_string_type(); + add_string_type("java.lang.String"); + else if(string_refinement_enabled && + parse_tree.parsed_class.name=="java.lang.StringBuilder") + add_string_type("java.lang.StringBuilder"); + else if(string_refinement_enabled && + parse_tree.parsed_class.name=="java.lang.CharSequence") + add_string_type("java.lang.CharSequence"); + else if(string_refinement_enabled && + parse_tree.parsed_class.name=="java.lang.StringBuffer") + add_string_type("java.lang.StringBuffer"); else generate_class_stub(parse_tree.parsed_class.name); } @@ -72,7 +81,7 @@ class java_bytecode_convert_classt:public messaget void generate_class_stub(const irep_idt &class_name); void add_array_types(); - void add_string_type(); + void add_string_type(const irep_idt &class_name); }; /*******************************************************************\ @@ -406,15 +415,17 @@ bool java_bytecode_convert_class( Function: java_bytecode_convert_classt::add_string_type + Inputs: a name for the class such as "java.lang.String" + Purpose: Implements the java.lang.String type in the case that we provide an internal implementation. \*******************************************************************/ -void java_bytecode_convert_classt::add_string_type() +void java_bytecode_convert_classt::add_string_type(const irep_idt &class_name) { class_typet string_type; - string_type.set_tag("java.lang.String"); + string_type.set_tag(class_name); string_type.components().resize(3); string_type.components()[0].set_name("@java.lang.Object"); string_type.components()[0].set_pretty_name("@java.lang.Object"); @@ -432,8 +443,8 @@ void java_bytecode_convert_classt::add_string_type() string_type.add_base(symbol_typet("java::java.lang.Object")); symbolt string_symbol; - string_symbol.name="java::java.lang.String"; - string_symbol.base_name="java.lang.String"; + string_symbol.name="java::"+id2string(class_name); + string_symbol.base_name=id2string(class_name); string_symbol.type=string_type; string_symbol.is_type=true; @@ -445,8 +456,8 @@ void java_bytecode_convert_classt::add_string_type() symbolt string_equals_symbol; string_equals_symbol.name= "java::java.lang.String.equals:(Ljava/lang/Object;)Z"; - string_equals_symbol.base_name="java.lang.String.equals"; - string_equals_symbol.pretty_name="java.lang.String.equals"; + string_equals_symbol.base_name=id2string(class_name)+".equals"; + string_equals_symbol.pretty_name=id2string(class_name)+".equals"; string_equals_symbol.mode=ID_java; code_typet string_equals_type; diff --git a/src/solvers/refinement/string_constraint.h b/src/solvers/refinement/string_constraint.h index 4b8d1229e09..1bb460349b7 100644 --- a/src/solvers/refinement/string_constraint.h +++ b/src/solvers/refinement/string_constraint.h @@ -48,7 +48,6 @@ class string_constraintt: public exprt return operands()[4]; } - private: string_constraintt(); diff --git a/src/solvers/refinement/string_constraint_generator.h b/src/solvers/refinement/string_constraint_generator.h index ed898cb9a18..12447e0cf04 100644 --- a/src/solvers/refinement/string_constraint_generator.h +++ b/src/solvers/refinement/string_constraint_generator.h @@ -14,6 +14,7 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #define CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_GENERATOR_H #include +#include #include #include @@ -65,21 +66,20 @@ class string_constraint_generatort symbol_exprt fresh_univ_index(const irep_idt &prefix, const typet &type); symbol_exprt fresh_boolean(const irep_idt &prefix); string_exprt fresh_string(const refined_string_typet &type); + string_exprt get_string_expr(const exprt &expr); + string_exprt convert_java_string_to_string_exprt( + const exprt &underlying); + plus_exprt plus_exprt_with_overflow_check(const exprt &op1, const exprt &op2); - // We maintain a map from symbols to strings. - std::map symbol_to_string; + // Maps unresolved symbols to the string_exprt that was created for them + std::map unresolved_symbols; - string_exprt find_or_add_string_of_symbol(const symbol_exprt &sym); - void assign_to_symbol( - const symbol_exprt &sym, const string_exprt &expr) - { - symbol_to_string[sym.get_identifier()]=expr; - } + string_exprt find_or_add_string_of_symbol( + const symbol_exprt &sym, + const refined_string_typet &ref_type); - string_exprt add_axioms_for_string_expr(const exprt &expr); - void set_string_symbol_equal_to_expr( - const symbol_exprt &sym, const exprt &str); + string_exprt add_axioms_for_refined_string(const exprt &expr); exprt add_axioms_for_function_application( const function_application_exprt &expr); @@ -96,6 +96,8 @@ class string_constraint_generatort const std::size_t MAX_FLOAT_LENGTH=15; const std::size_t MAX_DOUBLE_LENGTH=30; + std::map function_application_cache; + static irep_idt extract_java_string(const symbol_exprt &s); exprt axiom_for_is_positive_index(const exprt &x); @@ -117,6 +119,9 @@ class string_constraint_generatort // The specification is partial: the actual value is not actually computed // but we ensure that hash codes of equal strings are equal. exprt add_axioms_for_hash_code(const function_application_exprt &f); + // To each string on which hash_code was called we associate a symbol + // representing the return value of the hash_code function. + std::map hash_code_of_string; exprt add_axioms_for_is_empty(const function_application_exprt &f); exprt add_axioms_for_is_prefix( @@ -224,7 +229,9 @@ class string_constraint_generatort // the start for negative number string_exprt add_axioms_from_float(const function_application_exprt &f); string_exprt add_axioms_from_float( - const exprt &f, bool double_precision=false); + const exprt &f, + const refined_string_typet &ref_type, + bool double_precision); // Add axioms corresponding to the String.valueOf(D) java function // TODO: the specifications is only partial @@ -253,6 +260,7 @@ class string_constraint_generatort string_exprt add_axioms_for_code_point( const exprt &code_point, const refined_string_typet &ref_type); string_exprt add_axioms_for_java_char_array(const exprt &char_array); + exprt add_axioms_for_char_pointer(const function_application_exprt &fun); string_exprt add_axioms_for_if(const if_exprt &expr); exprt add_axioms_for_char_literal(const function_application_exprt &f); @@ -270,6 +278,8 @@ class string_constraint_generatort const function_application_exprt &f); exprt add_axioms_for_parse_int(const function_application_exprt &f); + exprt add_axioms_for_correct_number_format( + const string_exprt &str, std::size_t max_size=10); exprt add_axioms_for_to_char_array(const function_application_exprt &f); exprt add_axioms_for_compare_to(const function_application_exprt &f); @@ -278,6 +288,9 @@ class string_constraint_generatort // string pointers symbol_exprt add_axioms_for_intern(const function_application_exprt &f); + // Pool used for the intern method + std::map intern_of_string; + // Tells which language is used. C and Java are supported irep_idt mode; @@ -293,14 +306,8 @@ class string_constraint_generatort exprt int_of_hex_char(const exprt &chr) const; exprt is_high_surrogate(const exprt &chr) const; exprt is_low_surrogate(const exprt &chr) const; - static exprt character_equals_ignore_case( + exprt character_equals_ignore_case( exprt char1, exprt char2, exprt char_a, exprt char_A, exprt char_Z); - - // Pool used for the intern method - std::map pool; - - // Used to determine whether hashcode should be equal - std::map hash; }; #endif diff --git a/src/solvers/refinement/string_constraint_generator_code_points.cpp b/src/solvers/refinement/string_constraint_generator_code_points.cpp index 7c035a0d3e4..16763dfc97c 100644 --- a/src/solvers/refinement/string_constraint_generator_code_points.cpp +++ b/src/solvers/refinement/string_constraint_generator_code_points.cpp @@ -160,17 +160,18 @@ exprt string_constraint_generatort::add_axioms_for_code_point_at( { typet return_type=f.type(); assert(return_type.id()==ID_signedbv); - string_exprt str=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt str=get_string_expr(args(f, 2)[0]); const exprt &pos=args(f, 2)[1]; symbol_exprt result=fresh_symbol("char", return_type); exprt index1=from_integer(1, str.length().type()); const exprt &char1=str[pos]; - const exprt &char2=str[plus_exprt(pos, index1)]; + const exprt &char2=str[plus_exprt_with_overflow_check(pos, index1)]; exprt char1_as_int=typecast_exprt(char1, return_type); exprt char2_as_int=typecast_exprt(char2, return_type); exprt pair=pair_value(char1_as_int, char2_as_int, return_type); - exprt is_low=is_low_surrogate(str[plus_exprt(pos, index1)]); + exprt is_low=is_low_surrogate( + str[plus_exprt_with_overflow_check(pos, index1)]); exprt return_pair=and_exprt(is_high_surrogate(str[pos]), is_low); axioms.push_back(implies_exprt(return_pair, equal_exprt(result, pair))); @@ -199,7 +200,7 @@ exprt string_constraint_generatort::add_axioms_for_code_point_before( typet return_type=f.type(); assert(return_type.id()==ID_signedbv); symbol_exprt result=fresh_symbol("char", return_type); - string_exprt str=add_axioms_for_string_expr(args[0]); + string_exprt str=get_string_expr(args[0]); const exprt &char1= str[minus_exprt(args[1], from_integer(2, str.length().type()))]; @@ -234,7 +235,7 @@ Function: string_constraint_generatort::add_axioms_for_code_point_count exprt string_constraint_generatort::add_axioms_for_code_point_count( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt str=get_string_expr(args(f, 3)[0]); const exprt &begin=args(f, 3)[1]; const exprt &end=args(f, 3)[2]; const typet &return_type=f.type(); @@ -265,14 +266,15 @@ Function: string_constraint_generatort::add_axioms_for_offset_by_code_point exprt string_constraint_generatort::add_axioms_for_offset_by_code_point( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt str=get_string_expr(args(f, 3)[0]); const exprt &index=args(f, 3)[1]; const exprt &offset=args(f, 3)[2]; const typet &return_type=f.type(); symbol_exprt result=fresh_symbol("offset_by_code_point", return_type); - exprt minimum=plus_exprt(index, offset); - exprt maximum=plus_exprt(index, plus_exprt(offset, offset)); + exprt minimum=plus_exprt_with_overflow_check(index, offset); + exprt maximum=plus_exprt_with_overflow_check( + index, plus_exprt_with_overflow_check(offset, offset)); axioms.push_back(binary_relation_exprt(result, ID_le, maximum)); axioms.push_back(binary_relation_exprt(result, ID_ge, minimum)); diff --git a/src/solvers/refinement/string_constraint_generator_comparison.cpp b/src/solvers/refinement/string_constraint_generator_comparison.cpp index 321282ee9c1..a51e2abe3cd 100644 --- a/src/solvers/refinement/string_constraint_generator_comparison.cpp +++ b/src/solvers/refinement/string_constraint_generator_comparison.cpp @@ -18,7 +18,9 @@ Function: string_constraint_generatort::add_axioms_for_equals Outputs: a expression of Boolean type Purpose: add axioms stating that the result is true exactly when the strings - represented by the arguments are equal + represented by the arguments are equal. + the variable ending in `witness_unequal` is -1 if the length differs + or an index at which the strings are different \*******************************************************************/ @@ -29,8 +31,8 @@ exprt string_constraint_generatort::add_axioms_for_equals( symbol_exprt eq=fresh_boolean("equal"); typecast_exprt tc_eq(eq, f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_for_string_expr(args(f, 2)[1]); + string_exprt s1=get_string_expr(args(f, 2)[0]); + string_exprt s2=get_string_expr(args(f, 2)[1]); typet index_type=s1.length().type(); // We want to write: @@ -54,9 +56,10 @@ exprt string_constraint_generatort::add_axioms_for_equals( binary_relation_exprt(witness, ID_lt, s1.length()), binary_relation_exprt(witness, ID_ge, zero)); and_exprt witnessing(bound_witness, notequal_exprt(s1[witness], s2[witness])); - implies_exprt a3( - not_exprt(eq), - or_exprt(notequal_exprt(s1.length(), s2.length()), witnessing)); + and_exprt diff_length( + notequal_exprt(s1.length(), s2.length()), + equal_exprt(witness, from_integer(-1, index_type))); + implies_exprt a3(not_exprt(eq), or_exprt(diff_length, witnessing)); axioms.push_back(a3); return tc_eq; @@ -92,8 +95,13 @@ exprt string_constraint_generatort::character_equals_ignore_case( // p3 : (is_up2&&'a'-'A'+char2=char1) equal_exprt p1(char1, char2); minus_exprt diff=minus_exprt(char_a, char_A); - and_exprt p2(is_upper_case_1, equal_exprt(plus_exprt(diff, char1), char2)); - and_exprt p3(is_upper_case_2, equal_exprt(plus_exprt(diff, char2), char1)); + + // Overflow is not a problem here because is_upper_case conditions + // ensure that we are within a safe range. + and_exprt p2(is_upper_case_1, + equal_exprt(plus_exprt(diff, char1), char2)); + and_exprt p3(is_upper_case_2, + equal_exprt(plus_exprt(diff, char2), char1)); return or_exprt(or_exprt(p1, p2), p3); } @@ -116,8 +124,8 @@ exprt string_constraint_generatort::add_axioms_for_equals_ignore_case( symbol_exprt eq=fresh_boolean("equal_ignore_case"); typecast_exprt tc_eq(eq, f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_for_string_expr(args(f, 2)[1]); + string_exprt s1=get_string_expr(args(f, 2)[0]); + string_exprt s2=get_string_expr(args(f, 2)[1]); typet char_type=to_refined_string_type(s1.type()).get_char_type(); exprt char_a=constant_char('a', char_type); exprt char_A=constant_char('A', char_type); @@ -174,37 +182,35 @@ Function: string_constraint_generatort::add_axioms_for_hash_code exprt string_constraint_generatort::add_axioms_for_hash_code( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt str=get_string_expr(args(f, 1)[0]); typet return_type=f.type(); typet index_type=str.length().type(); - // initialisation of the missing pool variable - std::map::iterator it; - for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) - if(hash.find(it->second)==hash.end()) - hash[it->second]=fresh_symbol("hash", return_type); + auto pair=hash_code_of_string.insert( + std::make_pair(str, fresh_symbol("hash", return_type))); + exprt hash=pair.first->second; // for each string s. either: // c1: hash(str)=hash(s) // c2: |str|!=|s| - // c3: (|str|==|s| &&exists i<|s|. s[i]!=str[i]) + // c3: (|str|==|s| && exists i<|s|. s[i]!=str[i]) // WARNING: the specification may be incomplete - for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) + for(auto it : hash_code_of_string) { symbol_exprt i=fresh_exist_index("index_hash", index_type); - equal_exprt c1(hash[it->second], hash[str]); - not_exprt c2(equal_exprt(it->second.length(), str.length())); + equal_exprt c1(it.second, hash); + not_exprt c2(equal_exprt(it.first.length(), str.length())); and_exprt c3( - equal_exprt(it->second.length(), str.length()), + equal_exprt(it.first.length(), str.length()), and_exprt( - not_exprt(equal_exprt(str[i], it->second[i])), + not_exprt(equal_exprt(str[i], it.first[i])), and_exprt( str.axiom_for_is_strictly_longer_than(i), axiom_for_is_positive_index(i)))); axioms.push_back(or_exprt(c1, or_exprt(c2, c3))); } - return hash[str]; + return hash; } /*******************************************************************\ @@ -222,8 +228,8 @@ Function: string_constraint_generatort::add_axioms_for_compare_to exprt string_constraint_generatort::add_axioms_for_compare_to( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_for_string_expr(args(f, 2)[1]); + string_exprt s1=get_string_expr(args(f, 2)[0]); + string_exprt s2=get_string_expr(args(f, 2)[1]); const typet &return_type=f.type(); symbol_exprt res=fresh_symbol("compare_to", return_type); typet index_type=s1.length().type(); @@ -234,12 +240,12 @@ exprt string_constraint_generatort::add_axioms_for_compare_to( // a1 : res==0 => |s1|=|s2| // a2 : forall i<|s1|. s1[i]==s2[i] // a3 : exists x. - // res!=0 ==> x> 0 && - // ((|s1| <= |s2| &&x<|s1|) || (|s1| >= |s2| &&x<|s2|) - // &&res=s1[x]-s2[x] ) - // || cond2: - // (|s1|<|s2| &&x=|s1|) || (|s1| > |s2| &&x=|s2|) &&res=|s1|-|s2|) - // a4 : forall i s1[i]=s2[i] + // res!=0 ==> x > 0 + // && ((|s1| <= |s2| && x<|s1|) || (|s1| >= |s2| &&x<|s2|) + // && res=s1[x]-s2[x] ) + // || cond2: + // (|s1|<|s2| &&x=|s1|) || (|s1| > |s2| &&x=|s2|) &&res=|s1|-|s2|) + // a4 : forall i' s1[i]=s2[i] assert(return_type.id()==ID_signedbv); @@ -282,7 +288,9 @@ exprt string_constraint_generatort::add_axioms_for_compare_to( or_exprt(cond1, cond2))); axioms.push_back(a3); - string_constraintt a4(i, x, not_exprt(res_null), equal_exprt(s1[i], s2[i])); + symbol_exprt i2=fresh_univ_index("QA_compare_to", index_type); + string_constraintt a4( + i2, x, not_exprt(res_null), equal_exprt(s1[i2], s2[i2])); axioms.push_back(a4); return res; @@ -304,15 +312,15 @@ Function: string_constraint_generatort::add_axioms_for_intern symbol_exprt string_constraint_generatort::add_axioms_for_intern( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt str=get_string_expr(args(f, 1)[0]); + // For now we only enforce content equality and not pointer equality const typet &return_type=f.type(); + typet index_type=str.length().type(); - // initialisation of the missing pool variable - std::map::iterator it; - for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) - if(pool.find(it->second)==pool.end()) - pool[it->second]=fresh_symbol("pool", return_type); + auto pair=intern_of_string.insert( + std::make_pair(str, fresh_symbol("pool", return_type))); + symbol_exprt intern=pair.first->second; // intern(str)=s_0 || s_1 || ... // for each string s. @@ -320,30 +328,30 @@ symbol_exprt string_constraint_generatort::add_axioms_for_intern( // || (|str|==|s| &&exists i<|s|. s[i]!=str[i]) exprt disj=false_exprt(); - for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) + for(auto it : intern_of_string) disj=or_exprt( - disj, equal_exprt(pool[str], symbol_exprt(it->first, return_type))); + disj, equal_exprt(intern, it.second)); axioms.push_back(disj); // WARNING: the specification may be incomplete or incorrect - for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) - if(it->second!=str) + for(auto it : intern_of_string) + if(it.second!=str) { symbol_exprt i=fresh_exist_index("index_intern", index_type); axioms.push_back( or_exprt( - equal_exprt(pool[it->second], pool[str]), + equal_exprt(it.second, intern), or_exprt( - not_exprt(str.axiom_for_has_same_length_as(it->second)), + not_exprt(str.axiom_for_has_same_length_as(it.first)), and_exprt( - str.axiom_for_has_same_length_as(it->second), + str.axiom_for_has_same_length_as(it.first), and_exprt( - not_exprt(equal_exprt(str[i], it->second[i])), + not_exprt(equal_exprt(str[i], it.first[i])), and_exprt(str.axiom_for_is_strictly_longer_than(i), axiom_for_is_positive_index(i))))))); } - return pool[str]; + return intern; } diff --git a/src/solvers/refinement/string_constraint_generator_concat.cpp b/src/solvers/refinement/string_constraint_generator_concat.cpp index a7d9c4921c4..fb8e24d5320 100644 --- a/src/solvers/refinement/string_constraint_generator_concat.cpp +++ b/src/solvers/refinement/string_constraint_generator_concat.cpp @@ -35,9 +35,7 @@ string_exprt string_constraint_generatort::add_axioms_for_concat( // a4 : forall i<|s1|. res[i]=s1[i] // a5 : forall i<|s2|. res[i+|s1|]=s2[i] - exprt a1=res.axiom_for_has_length( - plus_exprt(s1.length(), s2.length())); - axioms.push_back(a1); + res.length()=plus_exprt_with_overflow_check(s1.length(), s2.length()); axioms.push_back(s1.axiom_for_is_shorter_than(res)); axioms.push_back(s2.axiom_for_is_shorter_than(res)); @@ -73,8 +71,8 @@ string_exprt string_constraint_generatort::add_axioms_for_concat( const function_application_exprt::argumentst &args=f.arguments(); assert(args.size()==2); - string_exprt s1=add_axioms_for_string_expr(args[0]); - string_exprt s2=add_axioms_for_string_expr(args[1]); + string_exprt s1=get_string_expr(args[0]); + string_exprt s2=get_string_expr(args[1]); return add_axioms_for_concat(s1, s2); } @@ -95,7 +93,7 @@ string_exprt string_constraint_generatort::add_axioms_for_concat_int( const function_application_exprt &f) { const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[0]); string_exprt s2=add_axioms_from_int( args(f, 2)[1], MAX_INTEGER_LENGTH, ref_type); return add_axioms_for_concat(s1, s2); @@ -118,7 +116,7 @@ string_exprt string_constraint_generatort::add_axioms_for_concat_long( const function_application_exprt &f) { const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[0]); string_exprt s2=add_axioms_from_int(args(f, 2)[1], MAX_LONG_LENGTH, ref_type); return add_axioms_for_concat(s1, s2); } @@ -138,7 +136,7 @@ Function: string_constraint_generatort::add_axioms_for_concat_bool string_exprt string_constraint_generatort::add_axioms_for_concat_bool( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[0]); const refined_string_typet &ref_type=to_refined_string_type(s1.type()); string_exprt s2=add_axioms_from_bool(args(f, 2)[1], ref_type); return add_axioms_for_concat(s1, s2); @@ -159,7 +157,7 @@ Function: string_constraint_generatort::add_axioms_for_concat_char string_exprt string_constraint_generatort::add_axioms_for_concat_char( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[0]); const refined_string_typet &ref_type=to_refined_string_type(s1.type()); string_exprt s2=add_axioms_from_char(args(f, 2)[1], ref_type); return add_axioms_for_concat(s1, s2); @@ -180,9 +178,10 @@ Function: string_constraint_generatort::add_axioms_for_concat_double string_exprt string_constraint_generatort::add_axioms_for_concat_double( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_from_float( - args(f, 2)[1], MAX_DOUBLE_LENGTH); + string_exprt s1=get_string_expr(args(f, 2)[0]); + assert(refined_string_typet::is_refined_string_type(f.type())); + refined_string_typet ref_type=to_refined_string_type(f.type()); + string_exprt s2=add_axioms_from_float(args(f, 2)[1], ref_type, true); return add_axioms_for_concat(s1, s2); } @@ -201,8 +200,10 @@ Function: string_constraint_generatort::add_axioms_for_concat_float string_exprt string_constraint_generatort::add_axioms_for_concat_float( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_from_float(args(f, 2)[1], MAX_FLOAT_LENGTH); + string_exprt s1=get_string_expr(args(f, 2)[0]); + assert(refined_string_typet::is_refined_string_type(f.type())); + refined_string_typet ref_type=to_refined_string_type(f.type()); + string_exprt s2=add_axioms_from_float(args(f, 2)[1], ref_type, false); return add_axioms_for_concat(s1, s2); } @@ -222,7 +223,7 @@ Function: string_constraint_generatort::add_axioms_for_concat_code_point string_exprt string_constraint_generatort::add_axioms_for_concat_code_point( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[0]); const refined_string_typet &ref_type=to_refined_string_type(s1.type()); string_exprt s2=add_axioms_for_code_point(args(f, 2)[1], ref_type); return add_axioms_for_concat(s1, s2); diff --git a/src/solvers/refinement/string_constraint_generator_constants.cpp b/src/solvers/refinement/string_constraint_generator_constants.cpp index 64ffabfd266..2905c6a5f21 100644 --- a/src/solvers/refinement/string_constraint_generator_constants.cpp +++ b/src/solvers/refinement/string_constraint_generator_constants.cpp @@ -93,9 +93,13 @@ string_exprt string_constraint_generatort::add_axioms_for_empty_string( const function_application_exprt &f) { assert(f.arguments().empty()); + assert(refined_string_typet::is_refined_string_type(f.type())); const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt res=fresh_string(ref_type); - axioms.push_back(res.axiom_for_has_length(0)); + exprt size=from_integer(0, ref_type.get_index_type()); + const array_typet &content_type=ref_type.get_content_type(); + array_of_exprt empty_array( + from_integer(0, ref_type.get_content_type().subtype()), content_type); + string_exprt res(size, empty_array, ref_type); return res; } @@ -132,6 +136,7 @@ string_exprt string_constraint_generatort::add_axioms_from_literal( else { // Java string constant + assert(false); // TODO: Check if used. On the contrary, discard else. assert(arg.id()==ID_symbol); const exprt &s=arg.op0(); diff --git a/src/solvers/refinement/string_constraint_generator_indexof.cpp b/src/solvers/refinement/string_constraint_generator_indexof.cpp index 283de683bd6..d799091a573 100644 --- a/src/solvers/refinement/string_constraint_generator_indexof.cpp +++ b/src/solvers/refinement/string_constraint_generator_indexof.cpp @@ -92,15 +92,16 @@ exprt string_constraint_generatort::add_axioms_for_index_of_string( symbol_exprt contains=fresh_boolean("contains_substring"); // We add axioms: - // a1 : contains => |substring|>=offset>=from_index + // a1 : contains => |str|-|substring|>=offset>=from_index // a2 : !contains => offset=-1 - // a3 : forall 0 <= witness str[witness+offset]=substring[witness] + // a3 : forall 0<=witness<|substring|. + // contains => str[witness+offset]=substring[witness] implies_exprt a1( contains, and_exprt( - str.axiom_for_is_longer_than(plus_exprt(substring.length(), offset)), + str.axiom_for_is_longer_than(plus_exprt_with_overflow_check( + substring.length(), offset)), binary_relation_exprt(offset, ID_ge, from_index))); axioms.push_back(a1); @@ -138,7 +139,8 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of_string( implies_exprt a1( contains, and_exprt( - str.axiom_for_is_longer_than(plus_exprt(substring.length(), offset)), + str.axiom_for_is_longer_than( + plus_exprt_with_overflow_check(substring.length(), offset)), binary_relation_exprt(offset, ID_le, from_index))); axioms.push_back(a1); @@ -173,7 +175,7 @@ exprt string_constraint_generatort::add_axioms_for_index_of( const function_application_exprt &f) { const function_application_exprt::argumentst &args=f.arguments(); - string_exprt str=add_axioms_for_string_expr(args[0]); + string_exprt str=get_string_expr(args[0]); const exprt &c=args[1]; const refined_string_typet &ref_type=to_refined_string_type(str.type()); assert(f.type()==ref_type.get_index_type()); @@ -186,14 +188,15 @@ exprt string_constraint_generatort::add_axioms_for_index_of( else assert(false); - if(c.type().id()==ID_unsignedbv) + if(c.type().id()==ID_unsignedbv || c.type().id()==ID_signedbv) { return add_axioms_for_index_of( str, typecast_exprt(c, ref_type.get_char_type()), from_index); } else { - string_exprt sub=add_axioms_for_string_expr(c); + assert(refined_string_typet::is_refined_string_type(c.type())); + string_exprt sub=get_string_expr(c); return add_axioms_for_index_of_string(str, sub, from_index); } } @@ -215,7 +218,7 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( exprt index1=from_integer(1, index_type); exprt minus1=from_integer(-1, index_type); - exprt from_index_plus_one=plus_exprt(from_index, index1); + exprt from_index_plus_one=plus_exprt_with_overflow_check(from_index, index1); and_exprt a1( binary_relation_exprt(index, ID_ge, minus1), binary_relation_exprt(index, ID_lt, from_index_plus_one)); @@ -269,7 +272,7 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( const function_application_exprt &f) { const function_application_exprt::argumentst &args=f.arguments(); - string_exprt str=add_axioms_for_string_expr(args[0]); + string_exprt str=get_string_expr(args[0]); exprt c=args[1]; const refined_string_typet &ref_type=to_refined_string_type(str.type()); exprt from_index; @@ -282,14 +285,14 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( else assert(false); - if(c.type().id()==ID_unsignedbv) + if(c.type().id()==ID_unsignedbv || c.type().id()==ID_signedbv) { return add_axioms_for_last_index_of( str, typecast_exprt(c, ref_type.get_char_type()), from_index); } else { - string_exprt sub=add_axioms_for_string_expr(c); + string_exprt sub=get_string_expr(c); return add_axioms_for_last_index_of_string(str, sub, from_index); } } diff --git a/src/solvers/refinement/string_constraint_generator_insert.cpp b/src/solvers/refinement/string_constraint_generator_insert.cpp index 6a080a5dc6c..cd5f4376a93 100644 --- a/src/solvers/refinement/string_constraint_generator_insert.cpp +++ b/src/solvers/refinement/string_constraint_generator_insert.cpp @@ -48,8 +48,8 @@ Function: string_constraint_generatort::add_axioms_for_insert string_exprt string_constraint_generatort::add_axioms_for_insert( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); - string_exprt s2=add_axioms_for_string_expr(args(f, 3)[2]); + string_exprt s1=get_string_expr(args(f, 3)[0]); + string_exprt s2=get_string_expr(args(f, 3)[2]); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -70,7 +70,7 @@ string_exprt string_constraint_generatort::add_axioms_for_insert_int( const function_application_exprt &f) { const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s1=get_string_expr(args(f, 3)[0]); string_exprt s2=add_axioms_from_int( args(f, 3)[2], MAX_INTEGER_LENGTH, ref_type); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); @@ -93,7 +93,7 @@ string_exprt string_constraint_generatort::add_axioms_for_insert_long( const function_application_exprt &f) { const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s1=get_string_expr(args(f, 3)[0]); string_exprt s2=add_axioms_from_int(args(f, 3)[2], MAX_LONG_LENGTH, ref_type); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -115,7 +115,7 @@ string_exprt string_constraint_generatort::add_axioms_for_insert_bool( const function_application_exprt &f) { const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s1=get_string_expr(args(f, 3)[0]); string_exprt s2=add_axioms_from_bool(args(f, 3)[2], ref_type); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -136,7 +136,7 @@ Function: string_constraint_generatort::add_axioms_for_insert_char string_exprt string_constraint_generatort::add_axioms_for_insert_char( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s1=get_string_expr(args(f, 3)[0]); const refined_string_typet &ref_type=to_refined_string_type(s1.type()); string_exprt s2=add_axioms_from_char(args(f, 3)[2], ref_type); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); @@ -158,8 +158,9 @@ Function: string_constraint_generatort::add_axioms_for_insert_double string_exprt string_constraint_generatort::add_axioms_for_insert_double( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); - string_exprt s2=add_axioms_from_float(args(f, 3)[2]); + string_exprt s1=get_string_expr(args(f, 3)[0]); + const refined_string_typet &ref_type=to_refined_string_type(s1.type()); + string_exprt s2=add_axioms_from_float(args(f, 3)[2], ref_type, true); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -179,8 +180,9 @@ Function: string_constraint_generatort::add_axioms_for_insert_float string_exprt string_constraint_generatort::add_axioms_for_insert_float( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); - string_exprt s2=add_axioms_from_float(args(f, 3)[2]); + string_exprt s1=get_string_expr(args(f, 3)[0]); + const refined_string_typet &ref_type=to_refined_string_type(s1.type()); + string_exprt s2=add_axioms_from_float(args(f, 3)[2], ref_type, false); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -216,7 +218,7 @@ string_exprt string_constraint_generatort::add_axioms_for_insert_char_array( offset=from_integer(0, count.type()); } - string_exprt str=add_axioms_for_string_expr(f.arguments()[0]); + string_exprt str=get_string_expr(f.arguments()[0]); const exprt &length=f.arguments()[2]; const exprt &data=f.arguments()[3]; string_exprt arr=add_axioms_from_char_array( diff --git a/src/solvers/refinement/string_constraint_generator_main.cpp b/src/solvers/refinement/string_constraint_generator_main.cpp index 7fe510c2e35..f8dedaab9a7 100644 --- a/src/solvers/refinement/string_constraint_generator_main.cpp +++ b/src/solvers/refinement/string_constraint_generator_main.cpp @@ -16,6 +16,7 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #include #include #include +#include unsigned string_constraint_generatort::next_symbol_id=1; @@ -122,6 +123,42 @@ symbol_exprt string_constraint_generatort::fresh_boolean( /*******************************************************************\ +Function: string_constraint_generatort::plus_exprt_with_overflow_check + + Inputs: + op1 - First term of the sum + op2 - Second term of the sum + + Outputs: A plus expression representing the sum of the arguments + + Purpose: Create a plus expression while adding extra constraints to + axioms in order to prevent overflows. + +\*******************************************************************/ +plus_exprt string_constraint_generatort::plus_exprt_with_overflow_check( + const exprt &op1, const exprt &op2) +{ + plus_exprt sum(plus_exprt(op1, op2)); + + exprt zero=from_integer(0, op1.type()); + + binary_relation_exprt neg1(op1, ID_lt, zero); + binary_relation_exprt neg2(op2, ID_lt, zero); + binary_relation_exprt neg_sum(sum, ID_lt, zero); + + // We prevent overflows by adding the following constraint: + // If the signs of the two operands are the same, then the sign of the sum + // should also be the same. + implies_exprt no_overflow(equal_exprt(neg1, neg2), + equal_exprt(neg1, neg_sum)); + + axioms.push_back(no_overflow); + + return sum; +} + +/*******************************************************************\ + Function: string_constraint_generatort::fresh_string Inputs: a type for string @@ -144,59 +181,113 @@ string_exprt string_constraint_generatort::fresh_string( /*******************************************************************\ -Function: string_constraint_generatort::add_axioms_for_string_expr +Function: string_constraint_generatort::get_string_expr + + Inputs: an expression of refined string type + + Outputs: a string expression + + Purpose: casts an expression to a string expression, or fetches the + actual string_exprt in the case of a symbol. + +\*******************************************************************/ + +string_exprt string_constraint_generatort::get_string_expr(const exprt &expr) +{ + assert(refined_string_typet::is_refined_string_type(expr.type())); + + if(expr.id()==ID_symbol) + { + return find_or_add_string_of_symbol( + to_symbol_expr(expr), + to_refined_string_type(expr.type())); + } + else + { + return to_string_expr(expr); + } +} + +string_exprt string_constraint_generatort::convert_java_string_to_string_exprt( + const exprt &jls) +{ + assert(get_mode()==ID_java); + assert(jls.id()==ID_struct); + + exprt length(to_struct_expr(jls).op1()); + // TODO: Add assertion on the type. + // assert(length.type()==refined_string_typet::index_type()); + exprt java_content(to_struct_expr(jls).op2()); + if(java_content.id()==ID_address_of) + { + java_content=to_address_of_expr(java_content).object(); + } + else + { + java_content=dereference_exprt(java_content, java_content.type()); + } + + refined_string_typet type(java_int_type(), java_char_type()); + + return string_exprt(length, java_content, type); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_refined_string - Inputs: an expression of type string + Inputs: an expression of refined string type Outputs: a string expression that is linked to the argument through axioms that are added to the list - Purpose: obtain a refined string expression corresponding to string - variable of string function call + Purpose: obtain a refined string expression corresponding to a expression + of type string \*******************************************************************/ -string_exprt string_constraint_generatort::add_axioms_for_string_expr( - const exprt &unrefined_string) + +string_exprt string_constraint_generatort::add_axioms_for_refined_string( + const exprt &string) { - string_exprt s; + assert(refined_string_typet::is_refined_string_type(string.type())); + refined_string_typet type=to_refined_string_type(string.type()); - if(unrefined_string.id()==ID_function_application) + // Function applications should have been removed before + assert(string.id()!=ID_function_application); + + if(string.id()==ID_symbol) { - exprt res=add_axioms_for_function_application( - to_function_application_expr(unrefined_string)); - s=to_string_expr(res); + const symbol_exprt &sym=to_symbol_expr(string); + string_exprt s=find_or_add_string_of_symbol(sym, type); + axioms.push_back( + s.axiom_for_is_longer_than(from_integer(0, s.length().type()))); + return s; } - else if(unrefined_string.id()==ID_symbol) - s=find_or_add_string_of_symbol(to_symbol_expr(unrefined_string)); - else if(unrefined_string.id()==ID_address_of) + else if(string.id()==ID_nondet_symbol) { - assert(unrefined_string.op0().id()==ID_symbol); - s=find_or_add_string_of_symbol(to_symbol_expr(unrefined_string.op0())); + string_exprt s=fresh_string(type); + axioms.push_back( + s.axiom_for_is_longer_than(from_integer(0, s.length().type()))); + return s; } - else if(unrefined_string.id()==ID_if) - s=add_axioms_for_if(to_if_expr(unrefined_string)); - else if(unrefined_string.id()==ID_nondet_symbol || - unrefined_string.id()==ID_struct) + else if(string.id()==ID_if) { - // TODO: for now we ignore non deterministic symbols and struct + return add_axioms_for_if(to_if_expr(string)); } - else if(unrefined_string.id()==ID_typecast) + else if(string.id()==ID_struct) { - exprt arg=to_typecast_expr(unrefined_string).op(); - exprt res=add_axioms_for_string_expr(arg); - s=to_string_expr(res); + const string_exprt &s=to_string_expr(string); + axioms.push_back( + s.axiom_for_is_longer_than(from_integer(0, s.length().type()))); + return s; } else { - throw "add_axioms_for_string_expr:\n"+unrefined_string.pretty()+ + throw "add_axioms_for_refined_string:\n"+string.pretty()+ "\nwhich is not a function application, "+ - "a symbol or an if expression"; + "a symbol, a struct or an if expression"; } - - axioms.push_back( - s.axiom_for_is_longer_than(from_integer(0, s.length().type()))); - return s; } /*******************************************************************\ @@ -216,10 +307,10 @@ string_exprt string_constraint_generatort::add_axioms_for_if( { assert( refined_string_typet::is_refined_string_type(expr.true_case().type())); - string_exprt t=add_axioms_for_string_expr(expr.true_case()); + string_exprt t=get_string_expr(expr.true_case()); assert( refined_string_typet::is_refined_string_type(expr.false_case().type())); - string_exprt f=add_axioms_for_string_expr(expr.false_case()); + string_exprt f=get_string_expr(expr.false_case()); const refined_string_typet &ref_type=to_refined_string_type(t.type()); const typet &index_type=ref_type.get_index_type(); string_exprt res=fresh_string(ref_type); @@ -246,7 +337,7 @@ Function: string_constraint_generatort::find_or_add_string_of_symbol Outputs: a string expression - Purpose: if a symbol represent a string is present in the symbol_to_string + Purpose: if a symbol representing a string is present in the symbol_to_string table, returns the corresponding string, if the symbol is not yet present, creates a new string with the correct type depending on whether the mode is java or c, adds it to the table and returns it. @@ -254,12 +345,11 @@ Function: string_constraint_generatort::find_or_add_string_of_symbol \*******************************************************************/ string_exprt string_constraint_generatort::find_or_add_string_of_symbol( - const symbol_exprt &sym) + const symbol_exprt &sym, const refined_string_typet &ref_type) { irep_idt id=sym.get_identifier(); - const refined_string_typet &ref_type=to_refined_string_type(sym.type()); string_exprt str=fresh_string(ref_type); - auto entry=symbol_to_string.insert(std::make_pair(id, str)); + auto entry=unresolved_symbols.insert(std::make_pair(id, str)); return entry.first->second; } @@ -286,125 +376,180 @@ exprt string_constraint_generatort::add_axioms_for_function_application( const irep_idt &id=is_ssa_expr(name)?to_ssa_expr(name).get_object_name(): to_symbol_expr(name).get_identifier(); + std::string str_id(id.c_str()); + + size_t pos=str_id.find("func_length"); + if(pos!=std::string::npos) + { + function_application_exprt new_expr(expr); + // TODO: This part needs some improvement. + // Stripping the symbol name is not a very robust process. + new_expr.function() = symbol_exprt(str_id.substr(0, pos+4)); + assert(get_mode()==ID_java); + new_expr.type() = refined_string_typet(java_int_type(), java_char_type()); + + auto res_it=function_application_cache.insert(std::make_pair(new_expr, + nil_exprt())); + if(res_it.second) + { + string_exprt res=to_string_expr( + add_axioms_for_function_application(new_expr)); + res_it.first->second=res; + return res.length(); + } + else + return to_string_expr(res_it.first->second).length(); + } + + pos = str_id.find("func_data"); + if(pos!=std::string::npos) + { + function_application_exprt new_expr(expr); + new_expr.function() = symbol_exprt(str_id.substr(0, pos+4)); + new_expr.type() = refined_string_typet(java_int_type(), java_char_type()); + + auto res_it=function_application_cache.insert(std::make_pair(new_expr, + nil_exprt())); + if(res_it.second) + { + string_exprt res=to_string_expr( + add_axioms_for_function_application(new_expr)); + res_it.first->second=res; + return res.content(); + } + else + return to_string_expr(res_it.first->second).content(); + } + // TODO: improve efficiency of this test by either ordering test by frequency // or using a map + auto res_it=function_application_cache.find(expr); + if(res_it!=function_application_cache.end() && res_it->second!=nil_exprt()) + return res_it->second; + + exprt res; + if(id==ID_cprover_char_literal_func) - return add_axioms_for_char_literal(expr); + res=add_axioms_for_char_literal(expr); else if(id==ID_cprover_string_length_func) - return add_axioms_for_length(expr); + res=add_axioms_for_length(expr); else if(id==ID_cprover_string_equal_func) - return add_axioms_for_equals(expr); + res=add_axioms_for_equals(expr); else if(id==ID_cprover_string_equals_ignore_case_func) - return add_axioms_for_equals_ignore_case(expr); + res=add_axioms_for_equals_ignore_case(expr); else if(id==ID_cprover_string_is_empty_func) - return add_axioms_for_is_empty(expr); + res=add_axioms_for_is_empty(expr); else if(id==ID_cprover_string_char_at_func) - return add_axioms_for_char_at(expr); + res=add_axioms_for_char_at(expr); else if(id==ID_cprover_string_is_prefix_func) - return add_axioms_for_is_prefix(expr); + res=add_axioms_for_is_prefix(expr); else if(id==ID_cprover_string_is_suffix_func) - return add_axioms_for_is_suffix(expr); + res=add_axioms_for_is_suffix(expr); else if(id==ID_cprover_string_startswith_func) - return add_axioms_for_is_prefix(expr, true); + res=add_axioms_for_is_prefix(expr, true); else if(id==ID_cprover_string_endswith_func) - return add_axioms_for_is_suffix(expr, true); + res=add_axioms_for_is_suffix(expr, true); else if(id==ID_cprover_string_contains_func) - return add_axioms_for_contains(expr); + res=add_axioms_for_contains(expr); else if(id==ID_cprover_string_hash_code_func) - return add_axioms_for_hash_code(expr); + res=add_axioms_for_hash_code(expr); else if(id==ID_cprover_string_index_of_func) - return add_axioms_for_index_of(expr); + res=add_axioms_for_index_of(expr); else if(id==ID_cprover_string_last_index_of_func) - return add_axioms_for_last_index_of(expr); + res=add_axioms_for_last_index_of(expr); else if(id==ID_cprover_string_parse_int_func) - return add_axioms_for_parse_int(expr); + res=add_axioms_for_parse_int(expr); else if(id==ID_cprover_string_to_char_array_func) - return add_axioms_for_to_char_array(expr); + res=add_axioms_for_to_char_array(expr); else if(id==ID_cprover_string_code_point_at_func) - return add_axioms_for_code_point_at(expr); + res=add_axioms_for_code_point_at(expr); else if(id==ID_cprover_string_code_point_before_func) - return add_axioms_for_code_point_before(expr); + res=add_axioms_for_code_point_before(expr); else if(id==ID_cprover_string_code_point_count_func) - return add_axioms_for_code_point_count(expr); + res=add_axioms_for_code_point_count(expr); else if(id==ID_cprover_string_offset_by_code_point_func) - return add_axioms_for_offset_by_code_point(expr); + res=add_axioms_for_offset_by_code_point(expr); else if(id==ID_cprover_string_compare_to_func) - return add_axioms_for_compare_to(expr); + res=add_axioms_for_compare_to(expr); else if(id==ID_cprover_string_literal_func) - return add_axioms_from_literal(expr); + res=add_axioms_from_literal(expr); else if(id==ID_cprover_string_concat_func) - return add_axioms_for_concat(expr); + res=add_axioms_for_concat(expr); else if(id==ID_cprover_string_concat_int_func) - return add_axioms_for_concat_int(expr); + res=add_axioms_for_concat_int(expr); else if(id==ID_cprover_string_concat_long_func) - return add_axioms_for_concat_long(expr); + res=add_axioms_for_concat_long(expr); else if(id==ID_cprover_string_concat_bool_func) - return add_axioms_for_concat_bool(expr); + res=add_axioms_for_concat_bool(expr); else if(id==ID_cprover_string_concat_char_func) - return add_axioms_for_concat_char(expr); + res=add_axioms_for_concat_char(expr); else if(id==ID_cprover_string_concat_double_func) - return add_axioms_for_concat_double(expr); + res=add_axioms_for_concat_double(expr); else if(id==ID_cprover_string_concat_float_func) - return add_axioms_for_concat_float(expr); + res=add_axioms_for_concat_float(expr); else if(id==ID_cprover_string_concat_code_point_func) - return add_axioms_for_concat_code_point(expr); + res=add_axioms_for_concat_code_point(expr); else if(id==ID_cprover_string_insert_func) - return add_axioms_for_insert(expr); + res=add_axioms_for_insert(expr); else if(id==ID_cprover_string_insert_int_func) - return add_axioms_for_insert_int(expr); + res=add_axioms_for_insert_int(expr); else if(id==ID_cprover_string_insert_long_func) - return add_axioms_for_insert_long(expr); + res=add_axioms_for_insert_long(expr); else if(id==ID_cprover_string_insert_bool_func) - return add_axioms_for_insert_bool(expr); + res=add_axioms_for_insert_bool(expr); else if(id==ID_cprover_string_insert_char_func) - return add_axioms_for_insert_char(expr); + res=add_axioms_for_insert_char(expr); else if(id==ID_cprover_string_insert_double_func) - return add_axioms_for_insert_double(expr); + res=add_axioms_for_insert_double(expr); else if(id==ID_cprover_string_insert_float_func) - return add_axioms_for_insert_float(expr); + res=add_axioms_for_insert_float(expr); +#if 0 else if(id==ID_cprover_string_insert_char_array_func) - return add_axioms_for_insert_char_array(expr); + res=add_axioms_for_insert_char_array(expr); +#endif else if(id==ID_cprover_string_substring_func) - return add_axioms_for_substring(expr); + res=add_axioms_for_substring(expr); else if(id==ID_cprover_string_trim_func) - return add_axioms_for_trim(expr); + res=add_axioms_for_trim(expr); else if(id==ID_cprover_string_to_lower_case_func) - return add_axioms_for_to_lower_case(expr); + res=add_axioms_for_to_lower_case(expr); else if(id==ID_cprover_string_to_upper_case_func) - return add_axioms_for_to_upper_case(expr); + res=add_axioms_for_to_upper_case(expr); else if(id==ID_cprover_string_char_set_func) - return add_axioms_for_char_set(expr); + res=add_axioms_for_char_set(expr); else if(id==ID_cprover_string_value_of_func) - return add_axioms_for_value_of(expr); + res=add_axioms_for_value_of(expr); else if(id==ID_cprover_string_empty_string_func) - return add_axioms_for_empty_string(expr); + res=add_axioms_for_empty_string(expr); else if(id==ID_cprover_string_copy_func) - return add_axioms_for_copy(expr); + res=add_axioms_for_copy(expr); else if(id==ID_cprover_string_of_int_func) - return add_axioms_from_int(expr); + res=add_axioms_from_int(expr); else if(id==ID_cprover_string_of_int_hex_func) - return add_axioms_from_int_hex(expr); + res=add_axioms_from_int_hex(expr); else if(id==ID_cprover_string_of_float_func) - return add_axioms_from_float(expr); + res=add_axioms_from_float(expr); else if(id==ID_cprover_string_of_double_func) - return add_axioms_from_double(expr); + res=add_axioms_from_double(expr); else if(id==ID_cprover_string_of_long_func) - return add_axioms_from_long(expr); + res=add_axioms_from_long(expr); else if(id==ID_cprover_string_of_bool_func) - return add_axioms_from_bool(expr); + res=add_axioms_from_bool(expr); else if(id==ID_cprover_string_of_char_func) - return add_axioms_from_char(expr); - else if(id==ID_cprover_string_of_char_array_func) - return add_axioms_from_char_array(expr); + res=add_axioms_from_char(expr); else if(id==ID_cprover_string_set_length_func) - return add_axioms_for_set_length(expr); + res=add_axioms_for_set_length(expr); else if(id==ID_cprover_string_delete_func) - return add_axioms_for_delete(expr); + res=add_axioms_for_delete(expr); else if(id==ID_cprover_string_delete_char_at_func) - return add_axioms_for_delete_char_at(expr); + res=add_axioms_for_delete_char_at(expr); else if(id==ID_cprover_string_replace_func) - return add_axioms_for_replace(expr); + res=add_axioms_for_replace(expr); + else if(id==ID_cprover_string_intern_func) + res=add_axioms_for_intern(expr); + else if(id==ID_cprover_string_array_of_char_pointer_func) + res=add_axioms_for_char_pointer(expr); else { std::string msg( @@ -412,13 +557,16 @@ exprt string_constraint_generatort::add_axioms_for_function_application( msg+=id2string(id); throw msg; } + function_application_cache[expr]=res; + return res; } /*******************************************************************\ Function: string_constraint_generatort::add_axioms_for_copy - Inputs: function application with one argument, which is a string + Inputs: function application with one argument, which is a string, + or three arguments: string, integer offset and count Outputs: a new string expression @@ -430,20 +578,20 @@ Function: string_constraint_generatort::add_axioms_for_copy string_exprt string_constraint_generatort::add_axioms_for_copy( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 1)[0]); - const refined_string_typet &ref_type=to_refined_string_type(s1.type()); - string_exprt res=fresh_string(ref_type); - - // We add axioms: - // a1 : |res|=|s1| - // a2 : forall i<|s1|. s1[i]=res[i] - - axioms.push_back(res.axiom_for_has_same_length_as(s1)); - - symbol_exprt idx=fresh_univ_index("QA_index_copy", ref_type.get_index_type()); - string_constraintt a2(idx, s1.length(), equal_exprt(s1[idx], res[idx])); - axioms.push_back(a2); - return res; + const auto &args=f.arguments(); + if(args.size()==1) + { + string_exprt s1=get_string_expr(args[0]); + return s1; + } + else + { + assert(args.size()==3); + string_exprt s1=get_string_expr(args[0]); + exprt offset=args[1]; + exprt count=args[2]; + return add_axioms_for_substring(s1, offset, plus_exprt(offset, count)); + } } /*******************************************************************\ @@ -473,6 +621,28 @@ string_exprt string_constraint_generatort::add_axioms_for_java_char_array( /*******************************************************************\ +Function: string_constraint_generatort::add_axioms_for_char_pointer + + Inputs: an expression of type char + + Outputs: an array expression + + Purpose: for an expression of the form `array[0]` returns `array` + +\*******************************************************************/ + +exprt string_constraint_generatort::add_axioms_for_char_pointer( + const function_application_exprt &fun) +{ + exprt char_pointer=args(fun, 1)[0]; + if(char_pointer.id()==ID_index) + return char_pointer.op0(); + // TODO: we do not know what to do in the other cases + assert(false); +} + +/*******************************************************************\ + Function: string_constraint_generatort::add_axioms_for_length Inputs: function application with one string argument @@ -486,7 +656,7 @@ Function: string_constraint_generatort::add_axioms_for_length exprt string_constraint_generatort::add_axioms_for_length( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt str=get_string_expr(args(f, 1)[0]); return str.length(); } @@ -511,6 +681,7 @@ string_exprt string_constraint_generatort::add_axioms_from_char_array( const exprt &offset, const exprt &count) { + assert(false); // deprecated, we should use add_axioms_for_substring instead const typet &char_type=to_array_type(data.type()).subtype(); const typet &index_type=length.type(); refined_string_typet ref_type(index_type, char_type); @@ -523,7 +694,7 @@ string_exprt string_constraint_generatort::add_axioms_from_char_array( symbol_exprt qvar=fresh_univ_index("QA_string_of_char_array", index_type); exprt char_in_tab=data; assert(char_in_tab.id()==ID_index); - char_in_tab.op1()=plus_exprt(qvar, offset); + char_in_tab.op1()=plus_exprt_with_overflow_check(qvar, offset); string_constraintt a1(qvar, count, equal_exprt(str[qvar], char_in_tab)); axioms.push_back(a1); @@ -549,6 +720,7 @@ Function: string_constraint_generatort::add_axioms_from_char_array string_exprt string_constraint_generatort::add_axioms_from_char_array( const function_application_exprt &f) { + assert(false); // deprecated, we should use add_axioms_for_substring instead exprt offset; exprt count; if(f.arguments().size()==4) @@ -638,7 +810,7 @@ Function: string_constraint_generatort::add_axioms_for_char_at exprt string_constraint_generatort::add_axioms_for_char_at( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt str=get_string_expr(args(f, 2)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); symbol_exprt char_sym=fresh_symbol("char", ref_type.get_char_type()); axioms.push_back(equal_exprt(char_sym, str[args(f, 2)[1]])); @@ -660,26 +832,6 @@ Function: string_constraint_generatort::add_axioms_for_to_char_array exprt string_constraint_generatort::add_axioms_for_to_char_array( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt str=get_string_expr(args(f, 1)[0]); return str.content(); } - -/*******************************************************************\ - -Function: string_constraint_generatort::set_string_symbol_equal_to_expr - - Inputs: a symbol and a string - - Purpose: add a correspondence to make sure the symbol points to the - same string as the second argument - -\*******************************************************************/ - -void string_constraint_generatort::set_string_symbol_equal_to_expr( - const symbol_exprt &sym, const exprt &str) -{ - if(str.id()==ID_symbol) - assign_to_symbol(sym, find_or_add_string_of_symbol(to_symbol_expr(str))); - else - assign_to_symbol(sym, add_axioms_for_string_expr(str)); -} diff --git a/src/solvers/refinement/string_constraint_generator_testing.cpp b/src/solvers/refinement/string_constraint_generator_testing.cpp index 8b1c7bca9d4..2a08b021070 100644 --- a/src/solvers/refinement/string_constraint_generator_testing.cpp +++ b/src/solvers/refinement/string_constraint_generator_testing.cpp @@ -31,15 +31,15 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( // We add axioms: // a1 : isprefix => |str| >= |prefix|+offset - // a2 : forall 0<=qvar - // s0[witness+offset]=s2[witness] - // a3 : !isprefix => |str| < |prefix|+offset - // || (|str| >= |prefix|+offset &&0<=witness<|prefix| - // &&str[witness+ofsset]!=prefix[witness]) + // a2 : forall 0<=qvar<|prefix|. isprefix => s0[witness+offset]=s2[witness] + // a3 : !isprefix => + // |str|<|prefix|+offset || + // (0<=witness<|prefix| && str[witness+offset]!=prefix[witness]) implies_exprt a1( isprefix, - str.axiom_for_is_longer_than(plus_exprt(prefix.length(), offset))); + str.axiom_for_is_longer_than(plus_exprt_with_overflow_check( + prefix.length(), offset))); axioms.push_back(a1); symbol_exprt qvar=fresh_univ_index("QA_isprefix", index_type); @@ -47,7 +47,8 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( qvar, prefix.length(), isprefix, - equal_exprt(str[plus_exprt(qvar, offset)], prefix[qvar])); + equal_exprt(str[plus_exprt_with_overflow_check(qvar, offset)], + prefix[qvar])); axioms.push_back(a2); symbol_exprt witness=fresh_exist_index("witness_not_isprefix", index_type); @@ -55,14 +56,13 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( axiom_for_is_positive_index(witness), and_exprt( prefix.axiom_for_is_strictly_longer_than(witness), - notequal_exprt(str[plus_exprt(witness, offset)], prefix[witness]))); + notequal_exprt(str[plus_exprt_with_overflow_check(witness, offset)], + prefix[witness]))); or_exprt s0_notpref_s1( not_exprt( - str.axiom_for_is_longer_than(plus_exprt(prefix.length(), offset))), - and_exprt( - witness_diff, str.axiom_for_is_longer_than( - plus_exprt(prefix.length(), offset)))); + plus_exprt_with_overflow_check(prefix.length(), offset))), + witness_diff); implies_exprt a3(not_exprt(isprefix), s0_notpref_s1); axioms.push_back(a3); @@ -88,8 +88,8 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( { const function_application_exprt::argumentst &args=f.arguments(); assert(f.type()==bool_typet() || f.type().id()==ID_c_bool); - string_exprt s0=add_axioms_for_string_expr(args[swap_arguments?1:0]); - string_exprt s1=add_axioms_for_string_expr(args[swap_arguments?0:1]); + string_exprt s0=get_string_expr(args[swap_arguments?1:0]); + string_exprt s1=get_string_expr(args[swap_arguments?0:1]); exprt offset; if(args.size()==2) offset=from_integer(0, s0.length().type()); @@ -121,7 +121,7 @@ exprt string_constraint_generatort::add_axioms_for_is_empty( // a2 : s0 => is_empty symbol_exprt is_empty=fresh_boolean("is_empty"); - string_exprt s0=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt s0=get_string_expr(args(f, 1)[0]); axioms.push_back(implies_exprt(is_empty, s0.axiom_for_has_length(0))); axioms.push_back(implies_exprt(s0.axiom_for_has_length(0), is_empty)); return typecast_exprt(is_empty, f.type()); @@ -150,8 +150,8 @@ exprt string_constraint_generatort::add_axioms_for_is_suffix( symbol_exprt issuffix=fresh_boolean("issuffix"); typecast_exprt tc_issuffix(issuffix, f.type()); - string_exprt s0=add_axioms_for_string_expr(args[swap_arguments?1:0]); - string_exprt s1=add_axioms_for_string_expr(args[swap_arguments?0:1]); + string_exprt s0=get_string_expr(args[swap_arguments?1:0]); + string_exprt s1=get_string_expr(args[swap_arguments?0:1]); const typet &index_type=s0.length().type(); // We add axioms: @@ -159,7 +159,7 @@ exprt string_constraint_generatort::add_axioms_for_is_suffix( // a2 : forall witness s1[witness]=s0[witness + s0.length-s1.length] // a3 : !issuffix => - // s1.length > s0.length + // (s1.length > s0.length && witness=-1) // || (s1.length > witness>=0 // &&s1[witness]!=s0[witness + s0.length-s1.length] @@ -177,7 +177,8 @@ exprt string_constraint_generatort::add_axioms_for_is_suffix( exprt shifted=plus_exprt( witness, minus_exprt(s1.length(), s0.length())); or_exprt constr3( - s0.axiom_for_is_strictly_longer_than(s1), + and_exprt(s0.axiom_for_is_strictly_longer_than(s1), + equal_exprt(witness, from_integer(-1, index_type))), and_exprt( notequal_exprt(s0[witness], s1[shifted]), and_exprt( @@ -207,9 +208,10 @@ exprt string_constraint_generatort::add_axioms_for_contains( assert(f.type()==bool_typet() || f.type().id()==ID_c_bool); symbol_exprt contains=fresh_boolean("contains"); typecast_exprt tc_contains(contains, f.type()); - string_exprt s0=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[1]); - const typet &index_type=s0.type(); + string_exprt s0=get_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[1]); + const refined_string_typet ref_type=to_refined_string_type(s0.type()); + const typet &index_type=ref_type.get_index_type(); // We add axioms: // a1 : contains => s0.length >= s1.length diff --git a/src/solvers/refinement/string_constraint_generator_transformation.cpp b/src/solvers/refinement/string_constraint_generator_transformation.cpp index 83735e0d5b5..f27fe5d4317 100644 --- a/src/solvers/refinement/string_constraint_generator_transformation.cpp +++ b/src/solvers/refinement/string_constraint_generator_transformation.cpp @@ -29,7 +29,7 @@ Function: string_constraint_generatort::add_axioms_for_set_length string_exprt string_constraint_generatort::add_axioms_for_set_length( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[0]); exprt k=args(f, 2)[1]; const refined_string_typet &ref_type=to_refined_string_type(s1.type()); string_exprt res=fresh_string(ref_type); @@ -76,7 +76,7 @@ string_exprt string_constraint_generatort::add_axioms_for_substring( { const function_application_exprt::argumentst &args=f.arguments(); assert(args.size()>=2); - string_exprt str=add_axioms_for_string_expr(args[0]); + string_exprt str=get_string_expr(args[0]); exprt i(args[1]); exprt j; if(args.size()==3) @@ -110,7 +110,6 @@ string_exprt string_constraint_generatort::add_axioms_for_substring( { const refined_string_typet &ref_type=to_refined_string_type(str.type()); const typet &index_type=ref_type.get_index_type(); - symbol_exprt idx=fresh_exist_index("index_substring", index_type); assert(start.type()==index_type); assert(end.type()==index_type); string_exprt res=fresh_string(ref_type); @@ -133,8 +132,11 @@ string_exprt string_constraint_generatort::add_axioms_for_substring( // Warning: check what to do if the string is not long enough axioms.push_back(str.axiom_for_is_longer_than(end)); - string_constraintt a4( - idx, res.length(), equal_exprt(res[idx], str[plus_exprt(start, idx)])); + symbol_exprt idx=fresh_univ_index("QA_index_substring", index_type); + string_constraintt a4(idx, + res.length(), + equal_exprt(res[idx], + str[plus_exprt_with_overflow_check(start, idx)])); axioms.push_back(a4); return res; } @@ -154,7 +156,7 @@ Function: string_constraint_generatort::add_axioms_for_trim string_exprt string_constraint_generatort::add_axioms_for_trim( const function_application_exprt &expr) { - string_exprt str=add_axioms_for_string_expr(args(expr, 1)[0]); + string_exprt str=get_string_expr(args(expr, 1)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); const typet &index_type=ref_type.get_index_type(); string_exprt res=fresh_string(ref_type); @@ -173,7 +175,8 @@ string_exprt string_constraint_generatort::add_axioms_for_trim( // a8 : forall n<|s1|, s[idx+n]=s1[n] // a9 : (s[m]>' ' &&s[m+|s1|-1]>' ') || m=|s| - exprt a1=str.axiom_for_is_longer_than(plus_exprt(idx, res.length())); + exprt a1=str.axiom_for_is_longer_than( + plus_exprt_with_overflow_check(idx, res.length())); axioms.push_back(a1); binary_relation_exprt a2(idx, ID_ge, from_integer(0, index_type)); @@ -195,7 +198,8 @@ string_exprt string_constraint_generatort::add_axioms_for_trim( axioms.push_back(a6); symbol_exprt n2=fresh_univ_index("QA_index_trim2", index_type); - minus_exprt bound(str.length(), plus_exprt(idx, res.length())); + minus_exprt bound(str.length(), plus_exprt_with_overflow_check(idx, + res.length())); binary_relation_exprt eqn2( str[plus_exprt(idx, plus_exprt(res.length(), n2))], ID_le, @@ -210,7 +214,8 @@ string_exprt string_constraint_generatort::add_axioms_for_trim( axioms.push_back(a8); minus_exprt index_before( - plus_exprt(idx, res.length()), from_integer(1, index_type)); + plus_exprt_with_overflow_check(idx, res.length()), + from_integer(1, index_type)); binary_relation_exprt no_space_before(str[index_before], ID_gt, space_char); or_exprt a9( equal_exprt(idx, str.length()), @@ -236,7 +241,7 @@ Function: string_constraint_generatort::add_axioms_for_to_lower_case string_exprt string_constraint_generatort::add_axioms_for_to_lower_case( const function_application_exprt &expr) { - string_exprt str=add_axioms_for_string_expr(args(expr, 1)[0]); + string_exprt str=get_string_expr(args(expr, 1)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); const typet &char_type=ref_type.get_char_type(); const typet &index_type=ref_type.get_index_type(); @@ -289,7 +294,7 @@ Function: string_constraint_generatort::add_axioms_for_to_upper_case string_exprt string_constraint_generatort::add_axioms_for_to_upper_case( const function_application_exprt &expr) { - string_exprt str=add_axioms_for_string_expr(args(expr, 1)[0]); + string_exprt str=get_string_expr(args(expr, 1)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); const typet &char_type=ref_type.get_char_type(); const typet &index_type=ref_type.get_index_type(); @@ -345,7 +350,7 @@ Function: string_constraint_generatort::add_axioms_for_char_set string_exprt string_constraint_generatort::add_axioms_for_char_set( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt str=get_string_expr(args(f, 3)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); string_exprt res=fresh_string(ref_type); with_exprt sarrnew(str.content(), args(f, 3)[1], args(f, 3)[2]); @@ -378,7 +383,7 @@ Function: string_constraint_generatort::add_axioms_for_replace string_exprt string_constraint_generatort::add_axioms_for_replace( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt str=get_string_expr(args(f, 3)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); const exprt &old_char=args(f, 3)[1]; const exprt &new_char=args(f, 3)[2]; @@ -421,10 +426,12 @@ Function: string_constraint_generatort::add_axioms_for_delete_char_at string_exprt string_constraint_generatort::add_axioms_for_delete_char_at( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt str=get_string_expr(args(f, 2)[0]); exprt index_one=from_integer(1, str.length().type()); return add_axioms_for_delete( - str, args(f, 2)[1], plus_exprt(args(f, 2)[1], index_one)); + str, + args(f, 2)[1], + plus_exprt_with_overflow_check(args(f, 2)[1], index_one)); } /*******************************************************************\ @@ -468,6 +475,6 @@ Function: string_constraint_generatort::add_axioms_for_delete string_exprt string_constraint_generatort::add_axioms_for_delete( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt str=get_string_expr(args(f, 3)[0]); return add_axioms_for_delete(str, args(f, 3)[1], args(f, 3)[2]); } diff --git a/src/solvers/refinement/string_constraint_generator_valueof.cpp b/src/solvers/refinement/string_constraint_generator_valueof.cpp index d1030e5e085..10fe490259a 100644 --- a/src/solvers/refinement/string_constraint_generator_valueof.cpp +++ b/src/solvers/refinement/string_constraint_generator_valueof.cpp @@ -63,7 +63,8 @@ Function: string_constraint_generatort::add_axioms_from_float string_exprt string_constraint_generatort::add_axioms_from_float( const function_application_exprt &f) { - return add_axioms_from_float(args(f, 1)[0], false); + const refined_string_typet &ref_type=to_refined_string_type(f.type()); + return add_axioms_from_float(args(f, 1)[0], ref_type, false); } /*******************************************************************\ @@ -81,7 +82,8 @@ Function: string_constraint_generatort::add_axioms_from_double string_exprt string_constraint_generatort::add_axioms_from_double( const function_application_exprt &f) { - return add_axioms_from_float(args(f, 1)[0], true); + const refined_string_typet &ref_type=to_refined_string_type(f.type()); + return add_axioms_from_float(args(f, 1)[0], ref_type, true); } /*******************************************************************\ @@ -99,13 +101,12 @@ Function: string_constraint_generatort::add_axioms_from_float \*******************************************************************/ string_exprt string_constraint_generatort::add_axioms_from_float( - const exprt &f, bool double_precision) + const exprt &f, const refined_string_typet &ref_type, bool double_precision) { - const refined_string_typet &ref_type=to_refined_string_type(f.type()); + string_exprt res=fresh_string(ref_type); const typet &index_type=ref_type.get_index_type(); const typet &char_type=ref_type.get_char_type(); - string_exprt res=fresh_string(ref_type); - const exprt &index24=from_integer(24, ref_type.get_index_type()); + const exprt &index24=from_integer(24, index_type); axioms.push_back(res.axiom_for_is_shorter_than(index24)); string_exprt magnitude=fresh_string(ref_type); @@ -323,13 +324,22 @@ string_exprt string_constraint_generatort::add_axioms_from_int( axioms.push_back(a1); exprt chr=res[0]; - exprt starts_with_minus=equal_exprt(chr, minus_char); - exprt starts_with_digit=and_exprt( + equal_exprt starts_with_minus(chr, minus_char); + and_exprt starts_with_digit( binary_relation_exprt(chr, ID_ge, zero_char), binary_relation_exprt(chr, ID_le, nine_char)); or_exprt a2(starts_with_digit, starts_with_minus); axioms.push_back(a2); + // These are constraints to detect number that requiere the maximum number + // of digits + exprt smallest_with_max_digits= + from_integer(smallest_by_digit(max_size-1), type); + binary_relation_exprt big_negative( + i, ID_le, unary_minus_exprt(smallest_with_max_digits)); + binary_relation_exprt big_positive(i, ID_ge, smallest_with_max_digits); + or_exprt requieres_max_digits(big_negative, big_positive); + for(size_t size=1; size<=max_size; size++) { // For each possible size, we add axioms: @@ -387,13 +397,18 @@ string_exprt string_constraint_generatort::add_axioms_from_int( axioms.push_back(a6); } - // we have to be careful when exceeding the maximal size of integers + // when the size is close to the maximum, either the number is very big + // or it is negative + if(size==max_size-1) + { + implies_exprt a7(premise, or_exprt(requieres_max_digits, + starts_with_minus)); + axioms.push_back(a7); + } + // when we reach the maximal size the number is very big in the negative if(size==max_size) { - exprt smallest_with_10_digits=from_integer( - smallest_by_digit(max_size), type); - binary_relation_exprt big(i, ID_ge, smallest_with_10_digits); - implies_exprt a7(premise, big); + implies_exprt a7(premise, and_exprt(starts_with_minus, big_negative)); axioms.push_back(a7); } } @@ -597,6 +612,60 @@ string_exprt string_constraint_generatort::add_axioms_for_value_of( /*******************************************************************\ +Function: string_constraint_generatort::add_axioms_for_correct_number_format + + Inputs: function application with one string expression + + Outputs: an boolean expression + + Purpose: add axioms making the return value true if the given string is + a correct number + +\*******************************************************************/ + +exprt string_constraint_generatort::add_axioms_for_correct_number_format( + const string_exprt &str, std::size_t max_size) +{ + symbol_exprt correct=fresh_boolean("correct_number_format"); + const refined_string_typet &ref_type=to_refined_string_type(str.type()); + const typet &char_type=ref_type.get_char_type(); + const typet &index_type=ref_type.get_index_type(); + exprt zero_char=constant_char('0', char_type); + exprt nine_char=constant_char('9', char_type); + exprt minus_char=constant_char('-', char_type); + exprt plus_char=constant_char('+', char_type); + + exprt chr=str[0]; + equal_exprt starts_with_minus(chr, minus_char); + equal_exprt starts_with_plus(chr, plus_char); + and_exprt starts_with_digit( + binary_relation_exprt(chr, ID_ge, zero_char), + binary_relation_exprt(chr, ID_le, nine_char)); + + or_exprt correct_first( + or_exprt(starts_with_minus, starts_with_plus), starts_with_digit); + exprt has_first=str.axiom_for_is_longer_than(from_integer(1, index_type)); + implies_exprt a1(correct, and_exprt(has_first, correct_first)); + axioms.push_back(a1); + + exprt not_too_long=str.axiom_for_is_shorter_than(max_size); + axioms.push_back(not_too_long); + + symbol_exprt qvar=fresh_univ_index("number_format", index_type); + + and_exprt is_digit( + binary_relation_exprt(str[qvar], ID_ge, zero_char), + binary_relation_exprt(str[qvar], ID_le, nine_char)); + + string_constraintt a2( + qvar, from_integer(1, index_type), str.length(), correct, is_digit); + + axioms.push_back(a2); + return correct; +} + +/*******************************************************************\ + Function: string_constraint_generatort::add_axioms_for_parse_int Inputs: function application with one string expression @@ -610,7 +679,7 @@ Function: string_constraint_generatort::add_axioms_for_parse_int exprt string_constraint_generatort::add_axioms_for_parse_int( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt str=get_string_expr(args(f, 1)[0]); const typet &type=f.type(); symbol_exprt i=fresh_symbol("parsed_int", type); const refined_string_typet &ref_type=to_refined_string_type(str.type()); @@ -626,6 +695,10 @@ exprt string_constraint_generatort::add_axioms_for_parse_int( exprt starts_with_plus=equal_exprt(chr, plus_char); exprt starts_with_digit=binary_relation_exprt(chr, ID_ge, zero_char); + // TODO: we should throw an exception when this does not hold: + exprt correct=add_axioms_for_correct_number_format(str); + axioms.push_back(correct); + for(unsigned size=1; size<=10; size++) { exprt sum=from_integer(0, type); diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index db9a2c460cc..1391ffd2315 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -11,17 +11,38 @@ Author: Alberto Griggio, alberto.griggio@gmail.com \*******************************************************************/ #include +#include #include #include #include +#include +#include #include #include #include +#include + +/*******************************************************************\ + +Constructor: string_refinementt + + Inputs: a namespace, a decision procedure, a bound on the number + of refinements and a boolean flag `concretize_result` + + Purpose: refinement_bound is a bound on the number of refinement allowed. + if `concretize_result` is set to true, at the end of the decision + procedure, the solver try to find a concrete value for each + character + +\*******************************************************************/ string_refinementt::string_refinementt( - const namespacet &_ns, propt &_prop, unsigned refinement_bound): + const namespacet &_ns, + propt &_prop, + unsigned refinement_bound): supert(_ns, _prop), use_counter_example(false), + do_concretizing(false), initial_loop_bound(refinement_bound) { } @@ -52,15 +73,27 @@ Function: string_refinementt::display_index_set void string_refinementt::display_index_set() { + std::size_t count=0; + std::size_t count_current=0; for(const auto &i : index_set) { const exprt &s=i.first; - debug() << "IS(" << from_expr(s) << ")=={"; + debug() << "IS(" << from_expr(s) << ")=={" << eom; for(auto j : i.second) - debug() << from_expr(j) << "; "; + { + if(current_index_set[i.first].find(j)!=current_index_set[i.first].end()) + { + count_current++; + debug() << "**"; + } + debug() << " " << from_expr(j) << ";" << eom; + count++; + } debug() << "}" << eom; } + debug() << count << " elements in index set (" << count_current + << " newly added)" << eom; } /*******************************************************************\ @@ -100,152 +133,320 @@ void string_refinementt::add_instantiations() /*******************************************************************\ -Function: string_refinementt::convert_rest - - Inputs: an expression +Function: string_refinementt::add_symbol_to_symbol_map() - Outputs: a literal + Inputs: a symbol and the expression to map it to - Purpose: if the expression is a function application, we convert it - using our own convert_function_application method + Purpose: keeps a map of symbols to expressions, such as none of the + mapped values exist as a key \*******************************************************************/ -literalt string_refinementt::convert_rest(const exprt &expr) +void string_refinementt::add_symbol_to_symbol_map +(const exprt &lhs, const exprt &rhs) { - if(expr.id()==ID_function_application) + assert(lhs.id()==ID_symbol); + + // We insert the mapped value of the rhs, if it exists. + auto it=symbol_resolve.find(rhs); + const exprt &new_rhs=it!=symbol_resolve.end()?it->second:rhs; + + symbol_resolve[lhs]=new_rhs; + reverse_symbol_resolve[new_rhs].push_back(lhs); + + std::list symbols_to_update_with_new_rhs(reverse_symbol_resolve[rhs]); + for(exprt item : symbols_to_update_with_new_rhs) { - // can occur in __CPROVER_assume - bvt bv=convert_function_application(to_function_application_expr(expr)); - assert(bv.size()==1); - return bv[0]; + symbol_resolve[item]=new_rhs; + reverse_symbol_resolve[new_rhs].push_back(item); } - else +} + +/*******************************************************************\ + +Function: string_refinementt::set_char_array_equality() + + Inputs: the rhs and lhs of an equality over character arrays + + Purpose: add axioms if the rhs is a character array + +\*******************************************************************/ + +void string_refinementt::set_char_array_equality( + const exprt &lhs, const exprt &rhs) +{ + assert(lhs.id()==ID_symbol); + + if(rhs.id()==ID_array && rhs.type().id()==ID_array) { - return supert::convert_rest(expr); + const typet &index_type=to_array_type(rhs.type()).size().type(); + for(size_t i=0, ilim=rhs.operands().size(); i!=ilim; ++i) + { + // Introduce axioms to map symbolic rhs to its char array. + index_exprt arraycell(rhs, from_integer(i, index_type)); + equal_exprt arrayeq(arraycell, rhs.operands()[i]); + add_lemma(arrayeq, false); +#if 0 + generator.axioms.push_back(arrayeq); +#endif + } } + // At least for Java (as it is currently pre-processed), we need not consider + // other cases, because all character arrays find themselves on the rhs of an + // equality. Note that this might not be the case for other languages. } /*******************************************************************\ -Function: string_refinementt::convert_symbol +Function: string_refinementt::substitute_function_applications() - Inputs: an expression + Inputs: an expression containing function applications - Outputs: a bitvector + Outputs: an epression containing no function application - Purpose: if the expression as string type, look up for the string in the - list of string symbols that we maintain, and convert it; - otherwise use the method of the parent class + Purpose: remove functions applications and create the necessary + axioms \*******************************************************************/ -bvt string_refinementt::convert_symbol(const exprt &expr) +exprt string_refinementt::substitute_function_applications(exprt expr) { - const typet &type=expr.type(); - const irep_idt &identifier=expr.get(ID_identifier); - assert(!identifier.empty()); + for(size_t i=0; iMAX_CONCRETE_STRING_SIZE? + MAX_CONCRETE_STRING_SIZE:concretize_limit; + exprt content_expr=str.content(); + replace_expr(current_model, content_expr); + for(size_t i=0; i &pair : non_string_axioms) + { + replace_expr(symbol_resolve, pair.first); + debug() << "super::set_to " << from_expr(pair.first) << eom; + supert::set_to(pair.first, pair.second); + } + + for(exprt &axiom : generator.axioms) + { + replace_expr(symbol_resolve, axiom); if(axiom.id()==ID_string_constraint) { string_constraintt c=to_string_constraint(axiom); @@ -282,6 +494,7 @@ decision_proceduret::resultt string_refinementt::dec_solve() { add_lemma(axiom); } + } initial_index_set(universal_axioms); update_index_set(cur); @@ -302,6 +515,7 @@ decision_proceduret::resultt string_refinementt::dec_solve() else { debug() << "check_SAT: the model is correct" << eom; + concretize_lengths(); return D_SATISFIABLE; } @@ -318,7 +532,13 @@ decision_proceduret::resultt string_refinementt::dec_solve() if(current_index_set.empty()) { debug() << "current index set is empty" << eom; - return D_SATISFIABLE; + if(do_concretizing) + { + concretize_results(); + do_concretizing=false; + } + else + return D_SATISFIABLE; } display_index_set(); @@ -367,131 +587,337 @@ bvt string_refinementt::convert_bool_bv(const exprt &boole, const exprt &orig) Function: string_refinementt::add_lemma - Inputs: a lemma + Inputs: a lemma and Boolean value stating whether the lemma should + be added to the index set. Purpose: add the given lemma to the solver \*******************************************************************/ -void string_refinementt::add_lemma(const exprt &lemma, bool add_to_index_set) +void string_refinementt::add_lemma( + const exprt &lemma, bool _simplify, bool add_to_index_set) { if(!seen_instances.insert(lemma).second) return; - if(lemma.is_true()) + if(add_to_index_set) + cur.push_back(lemma); + + exprt simple_lemma=lemma; + if(_simplify) + simplify(simple_lemma, ns); + + if(simple_lemma.is_true()) { +#if 0 debug() << "string_refinementt::add_lemma : tautology" << eom; +#endif return; } - debug() << "adding lemma " << from_expr(lemma) << eom; + debug() << "adding lemma " << from_expr(simple_lemma) << eom; - prop.l_set_to_true(convert(lemma)); - if(add_to_index_set) - cur.push_back(lemma); + prop.l_set_to_true(convert(simple_lemma)); } /*******************************************************************\ -Function: string_refinementt::string_of_array +Function: string_refinementt::get_array - Inputs: a constant array expression and a integer expression + Inputs: an expression representing an array and an expression + representing an integer - Outputs: a string + Outputs: an array expression or an array_of_exprt - Purpose: convert the content of a string to a more readable representation. - This should only be used for debbuging. + Purpose: get a model of an array and put it in a certain form. + If the size cannot be obtained or if it is too big, return an + empty array. \*******************************************************************/ -std::string string_refinementt::string_of_array( - const exprt &arr, const exprt &size) const +exprt string_refinementt::get_array(const exprt &arr, const exprt &size) const { - if(size.id()!=ID_constant) - return "string of unknown size"; + exprt arr_val=get_array(arr); + exprt size_val=supert::get(size); + size_val=simplify_expr(size_val, ns); + typet char_type=arr.type().subtype(); + typet index_type=size.type(); + array_typet empty_ret_type(char_type, from_integer(0, index_type)); + array_of_exprt empty_ret(from_integer(0, char_type), empty_ret_type); + + if(size_val.id()!=ID_constant) + { +#if 0 + debug() << "(sr::get_array) string of unknown size: " + << from_expr(size_val) << eom; +#endif + return empty_ret; + } + unsigned n; - if(to_unsigned_integer(to_constant_expr(size), n)) - n=0; + if(to_unsigned_integer(to_constant_expr(size_val), n)) + { +#if 0 + debug() << "(sr::get_array) size is not valid" << eom; +#endif + return empty_ret; + } + + array_typet ret_type(char_type, from_integer(n, index_type)); + array_exprt ret(ret_type); if(n>MAX_CONCRETE_STRING_SIZE) - return "very long string"; + { +#if 0 + debug() << "(sr::get_array) long string (size=" << n << ")" << eom; +#endif + return empty_ret; + } + if(n==0) - return "\"\""; + { +#if 0 + debug() << "(sr::get_array) empty string" << eom; +#endif + return empty_ret; + } - std::ostringstream buf; - buf << "\""; - exprt val=get(arr); + std::vector concrete_array(n); - if(val.id()=="array-list") + if(arr_val.id()=="array-list") { - for(size_t i=0; i(c); + exprt value=arr_val.operands()[i*2+1]; + to_unsigned_integer(to_constant_expr(value), concrete_array[idx]); } } } } + else if(arr_val.id()==ID_array) + { + for(size_t i=0; i=32) + result << (unsigned char) c; + else + { + result << "\\u" << std::hex << std::setw(4) << std::setfill('0') + << (unsigned int) c; + } + } + + return result.str(); +} + +/*******************************************************************\ - for(size_t i=0; i &m, bool negated) const + std::map &m, const typet &type, bool negated) const { exprt sum=nil_exprt(); mp_integer constants=0; typet index_type; if(m.empty()) - return nil_exprt(); + return from_integer(0, type); else index_type=m.begin()->first.type(); @@ -723,12 +1122,20 @@ exprt string_refinementt::sum_over_map( default: if(second>1) { - for(int i=0; isecond; i--) + if(sum.is_nil()) + sum=unary_minus_exprt(t); + else + sum=minus_exprt(sum, t); + for(int i=-1; i>second; i--) sum=minus_exprt(sum, t); } } @@ -755,7 +1162,7 @@ Function: string_refinementt::simplify_sum exprt string_refinementt::simplify_sum(const exprt &f) const { std::map map=map_representation_of_sum(f); - return sum_over_map(map); + return sum_over_map(map, f.type()); } /*******************************************************************\ @@ -800,7 +1207,7 @@ exprt string_refinementt::compute_inverse_function( } elems.erase(it); - return sum_over_map(elems, neg); + return sum_over_map(elems, f.type(), neg); } @@ -830,7 +1237,7 @@ Function: find_qvar Outputs: a Boolean - Purpose: looks for the symbol and return true if it is found + Purpose: look for the symbol and return true if it is found \*******************************************************************/ @@ -885,6 +1292,26 @@ Function: string_refinementt::initial_index_set and the upper bound minus one \*******************************************************************/ +void string_refinementt::add_to_index_set(const exprt &s, exprt i) +{ + simplify(i, ns); + if(i.id()==ID_constant) + { + mp_integer mpi; + to_integer(i, mpi); + if(mpi<0) + { + debug() << "add_to_index_set : ignoring negative number " << mpi << eom; + return; + } + } + if(index_set[s].insert(i).second) + { + debug() << "adding to index set of " << from_expr(s) + << ": " << from_expr(i) << eom; + current_index_set[s].insert(i); + } +} void string_refinementt::initial_index_set(const string_constraintt &axiom) { @@ -906,8 +1333,7 @@ void string_refinementt::initial_index_set(const string_constraintt &axiom) // if cur is of the form s[i] and no quantified variable appears in i if(!has_quant_var) { - current_index_set[s].insert(i); - index_set[s].insert(i); + add_to_index_set(s, i); } else { @@ -917,8 +1343,7 @@ void string_refinementt::initial_index_set(const string_constraintt &axiom) axiom.upper_bound(), from_integer(1, axiom.upper_bound().type())); replace_expr(qvar, kminus1, e); - current_index_set[s].insert(e); - index_set[s].insert(e); + add_to_index_set(s, e); } } else @@ -952,12 +1377,7 @@ void string_refinementt::update_index_set(const exprt &formula) const exprt &i=cur.op1(); assert(s.type().id()==ID_array); exprt simplified=simplify_sum(i); - if(index_set[s].insert(simplified).second) - { - debug() << "adding to index set of " << from_expr(s) - << ": " << from_expr(simplified) << eom; - current_index_set[s].insert(simplified); - } + add_to_index_set(s, simplified); } else { @@ -1014,19 +1434,18 @@ exprt find_index(const exprt &expr, const exprt &str) catch (exprt i) { return i; } } - /*******************************************************************\ Function: string_refinementt::instantiate - Inputs: an universaly quantified formula `axiom`, an array of char + Inputs: a universally quantified formula `axiom`, an array of char variable `str`, and an index expression `val`. - Outputs: substitute `qvar` the universaly quantified variable of `axiom`, by + Outputs: substitute `qvar` the universally quantified variable of `axiom`, by an index `val`, in `axiom`, so that the index used for `str` equals `val`. For instance, if `axiom` corresponds to - $\forall q. s[q+x]='a' && t[q]='b'$, `instantiate(axom,s,v)` would return - an expression for $s[v]='a' && t[v-x]='b'$. + $\forall q. s[q+x]='a' && t[q]='b'$, `instantiate(axom,s,v)` + would return an expression for $s[v]='a' && t[v-x]='b'$. \*******************************************************************/ @@ -1051,6 +1470,17 @@ exprt string_refinementt::instantiate( return implies_exprt(bounds, instance); } +/*******************************************************************\ + +Function: string_refinementt::instantiate_not_contains + + Inputs: a quantified formula representing `not_contains`, and a + list to which to add the created lemmas to + + Purpose: instantiate a quantified formula representing `not_contains` + by substituting the quantifiers and generating axioms + +\*******************************************************************/ void string_refinementt::instantiate_not_contains( const string_not_contains_constraintt &axiom, std::list &new_lemmas) @@ -1102,3 +1532,76 @@ void string_refinementt::instantiate_not_contains( new_lemmas.push_back(witness_bounds); } } + +/*******************************************************************\ + +Function: string_refinementt::substitute_array_lists() + + Inputs: an expression containing array-list expressions + + Outputs: an epression containing no array-list + + Purpose: replace array-lists by 'with' expressions + +\*******************************************************************/ + +exprt string_refinementt::substitute_array_lists(exprt expr) const +{ + for(size_t i=0; i=2); + typet &char_type=expr.operands()[1].type(); + array_typet arr_type(char_type, infinity_exprt(char_type)); + array_of_exprt new_arr(from_integer(0, char_type), + arr_type); + + with_exprt ret_expr(new_arr, + expr.operands()[0], + expr.operands()[1]); + + for(size_t i=2; isecond); + } + + ecopy=supert::get(ecopy); + + return substitute_array_lists(ecopy); +} diff --git a/src/solvers/refinement/string_refinement.h b/src/solvers/refinement/string_refinement.h index be0cd332bc3..252146f4620 100644 --- a/src/solvers/refinement/string_refinement.h +++ b/src/solvers/refinement/string_refinement.h @@ -14,6 +14,7 @@ Author: Alberto Griggio, alberto.griggio@gmail.com #define CPROVER_SOLVERS_REFINEMENT_STRING_REFINEMENT_H #include +#include #include #include @@ -27,30 +28,32 @@ Author: Alberto Griggio, alberto.griggio@gmail.com class string_refinementt: public bv_refinementt { public: - // refinement_bound is a bound on the number of refinement allowed string_refinementt( - const namespacet &_ns, propt &_prop, unsigned refinement_bound); + const namespacet &_ns, + propt &_prop, + unsigned refinement_bound); void set_mode(); // Should we use counter examples at each iteration? bool use_counter_example; - virtual std::string decision_procedure_text() const + bool do_concretizing; + + virtual std::string decision_procedure_text() const override { return "string refinement loop with "+prop.solver_text(); } static exprt is_positive(const exprt &x); + exprt get(const exprt &expr) const override; + protected: typedef std::set expr_sett; + typedef std::list exprt_listt; - virtual bvt convert_symbol(const exprt &expr); - virtual bvt convert_function_application( - const function_application_exprt &expr); - - decision_proceduret::resultt dec_solve(); + decision_proceduret::resultt dec_solve() override; bvt convert_bool_bv(const exprt &boole, const exprt &orig); @@ -60,6 +63,9 @@ class string_refinementt: public bv_refinementt unsigned initial_loop_bound; + // Is the current model correct + bool concrete_model; + string_constraint_generatort generator; // Simple constraints that have been given to the solver @@ -76,23 +82,42 @@ class string_refinementt: public bv_refinementt // Warning: this is indexed by array_expressions and not string expressions std::map current_index_set; std::map index_set; + replace_mapt symbol_resolve; + std::map reverse_symbol_resolve; + std::list> non_string_axioms; - void display_index_set(); + // Valuation in the current model of the symbols that have been created + // by the solver + replace_mapt current_model; - void add_lemma(const exprt &lemma, bool add_to_index_set=true); + void add_equivalence(const irep_idt & lhs, const exprt & rhs); - bool boolbv_set_equality_to_true(const equal_exprt &expr); + void display_index_set(); - literalt convert_rest(const exprt &expr); + void add_lemma(const exprt &lemma, + bool simplify=true, + bool add_to_index_set=true); - void add_instantiations(); + exprt substitute_function_applications(exprt expr); + typet substitute_java_string_types(typet type); + exprt substitute_java_strings(exprt expr); + void add_symbol_to_symbol_map(const exprt &lhs, const exprt &rhs); + bool is_char_array(const typet &type) const; + bool add_axioms_for_string_assigns(const exprt &lhs, const exprt &rhs); + void set_to(const exprt &expr, bool value) override; + void add_instantiations(); + void add_negation_of_constraint_to_solver( + const string_constraintt &axiom, supert &solver); + void fill_model(); bool check_axioms(); + void set_char_array_equality(const exprt &lhs, const exprt &rhs); void update_index_set(const exprt &formula); void update_index_set(const std::vector &cur); void initial_index_set(const string_constraintt &axiom); void initial_index_set(const std::vector &string_axioms); + void add_to_index_set(const exprt &s, exprt i); exprt instantiate( const string_constraintt &axiom, const exprt &str, const exprt &val); @@ -101,17 +126,26 @@ class string_refinementt: public bv_refinementt const string_not_contains_constraintt &axiom, std::list &new_lemmas); + exprt substitute_array_lists(exprt) const; + exprt compute_inverse_function( const exprt &qvar, const exprt &val, const exprt &f); std::map map_representation_of_sum(const exprt &f) const; - exprt sum_over_map(std::map &m, bool negated=false) const; + exprt sum_over_map( + std::map &m, const typet &type, bool negated=false) const; exprt simplify_sum(const exprt &f) const; - exprt get_array(const exprt &arr, const exprt &size); + void concretize_results(); + void concretize_lengths(); + // Length of char arrays found during concretization + std::map found_length; + + exprt get_array(const exprt &arr, const exprt &size) const; + exprt get_array(const exprt &arr) const; - std::string string_of_array(const exprt &arr, const exprt &size) const; + std::string string_of_array(const array_exprt &arr); }; #endif diff --git a/src/util/irep_ids.txt b/src/util/irep_ids.txt index b9d2d253538..1958522eaee 100644 --- a/src/util/irep_ids.txt +++ b/src/util/irep_ids.txt @@ -744,6 +744,7 @@ string_constraint string_not_contains_constraint cprover_char_literal_func cprover_string_literal_func +cprover_string_array_of_char_pointer_func cprover_string_char_at_func cprover_string_char_set_func cprover_string_code_point_at_func