diff --git a/regression/cbmc/string-refinement-strchr1/test.c b/regression/cbmc/string-refinement-strchr1/test.c new file mode 100644 index 00000000000..3c34ad8927e --- /dev/null +++ b/regression/cbmc/string-refinement-strchr1/test.c @@ -0,0 +1,15 @@ +#include +#include + +int main() +{ + char test[] = "foo"; + + char *first_o = strchr(test, 'o'); + assert(*first_o == test[1]); + assert(*first_o == test[0]); + char *first_t = strchr(test, 't'); + assert(*first_t == test[1]); + assert(*first_t == test[0]); + return 0; +} diff --git a/regression/cbmc/string-refinement-strchr1/test.desc b/regression/cbmc/string-refinement-strchr1/test.desc new file mode 100644 index 00000000000..7984871b3bc --- /dev/null +++ b/regression/cbmc/string-refinement-strchr1/test.desc @@ -0,0 +1,12 @@ +CORE broken-smt-backend +test.c +--refine-strings --max-nondet-string-length 10 +^EXIT=10$ +^SIGNAL=0$ +^\[main.assertion.\d+\] line \d+ assertion \*first_o == test\[1\]: SUCCESS$ +^\[main.assertion.\d+\] line \d+ assertion \*first_o == test\[0\]: FAILURE$ +^\[main.assertion.\d+\] line \d+ assertion \*first_t == test\[1\]: FAILURE$ +^\[main.assertion.\d+\] line \d+ assertion \*first_t == test\[0\]: FAILURE$ +^VERIFICATION FAILED$ +-- +^warning: ignoring diff --git a/regression/cbmc/string-refinement-strchr2/test.c b/regression/cbmc/string-refinement-strchr2/test.c new file mode 100644 index 00000000000..37a3837e721 --- /dev/null +++ b/regression/cbmc/string-refinement-strchr2/test.c @@ -0,0 +1,13 @@ +#include +#include + +int main() +{ + int i; + char *test = i ? "fo\0tis" : "notfoti"; + + char *first_o = strchr(test, 'o'); + assert(first_o != NULL); + assert(*first_o == test[0]); + return 0; +} diff --git a/regression/cbmc/string-refinement-strchr2/test.desc b/regression/cbmc/string-refinement-strchr2/test.desc new file mode 100644 index 00000000000..65615497f6e --- /dev/null +++ b/regression/cbmc/string-refinement-strchr2/test.desc @@ -0,0 +1,10 @@ +CORE broken-smt-backend +test.c +--refine-strings +^EXIT=10$ +^SIGNAL=0$ +^\[main.assertion.\d+\] line \d+ assertion first_o != (NULL|\(\(void \*\)0\)): SUCCESS$ +^\[main.assertion.\d+\] line \d+ assertion \*first_o == test\[0\]: FAILURE$ +^VERIFICATION FAILED$ +-- +^warning: ignoring diff --git a/regression/cbmc/string-refinement-strchr3/test.c b/regression/cbmc/string-refinement-strchr3/test.c new file mode 100644 index 00000000000..57d0ed2671c --- /dev/null +++ b/regression/cbmc/string-refinement-strchr3/test.c @@ -0,0 +1,24 @@ +#include +#include +#include + +int main() +{ + int size = 4; + char *str = malloc(sizeof(char) * size); + char nondet_char; + __CPROVER_assume(nondet_char != '\0'); + str[0] = nondet_char; + char nondet_char2; + __CPROVER_assume(str[1] != '\0'); + // str[1] = nondet_char2; + str[2] = 'z'; + str[3] = '\0'; + char *result = strchr(str, 'z'); + assert(result); + __CPROVER_assert(0, "unreachable"); + // assert(result == str+1); + // assert(result == str); + + return 0; +} diff --git a/regression/cbmc/string-refinement-strchr3/test.desc b/regression/cbmc/string-refinement-strchr3/test.desc new file mode 100644 index 00000000000..99a928e53fa --- /dev/null +++ b/regression/cbmc/string-refinement-strchr3/test.desc @@ -0,0 +1,11 @@ +CORE broken-smt-backend +test.c +--refine-strings +^EXIT=10$ +^SIGNAL=0$ +^\[strchr.assertion.\d+\] line \d+ zero should be present: SUCCESS$ +^\[main.assertion.\d+\] line \d+ assertion result: SUCCESS$ +^\[main.assertion.\d+\] line \d+ unreachable: FAILURE$ +^VERIFICATION FAILED$ +-- +^warning: ignoring diff --git a/src/ansi-c/library/cprover.h b/src/ansi-c/library/cprover.h index a070ca1302c..ff2d6eb25fd 100644 --- a/src/ansi-c/library/cprover.h +++ b/src/ansi-c/library/cprover.h @@ -159,4 +159,9 @@ __CPROVER_bool __CPROVER_overflow_plus(); __CPROVER_bool __CPROVER_overflow_shl(); __CPROVER_bool __CPROVER_overflow_unary_minus(); +long __CPROVER_math_func_string_index_of( + const char *haystack, + __CPROVER_size_t length, + char needle); + #endif // CPROVER_ANSI_C_LIBRARY_CPROVER_H diff --git a/src/ansi-c/library/string.c b/src/ansi-c/library/string.c index 328cfab4440..31fb8be0bd1 100644 --- a/src/ansi-c/library/string.c +++ b/src/ansi-c/library/string.c @@ -1,3 +1,19 @@ + +/* FUNCTION: __CPROVER_math_func_string_index_of */ + +long __CPROVER_math_func_string_index_of( + const char *haystack, + __CPROVER_size_t length, + char needle) +{ + for(__CPROVER_size_t i = 0; i < length; i++) + { + if(haystack[i] == needle) + return i; + } + return -1; +} + /* FUNCTION: __builtin___strcpy_chk */ inline char *__builtin___strcpy_chk(char *dst, const char *src, __CPROVER_size_t s) @@ -932,9 +948,16 @@ inline int memcmp(const void *s1, const void *s2, size_t n) #include #define __CPROVER_STRING_H_INCLUDED #endif +#include // SIZE_MAX #undef strchr +__CPROVER_size_t __VERIFIER_nondet___CPROVER_size_t(); +long __CPROVER_math_func_string_index_of( + const char *haystack, + __CPROVER_size_t length, + char needle); + inline char *strchr(const char *src, int c) { __CPROVER_HIDE:; @@ -945,14 +968,27 @@ inline char *strchr(const char *src, int c) __CPROVER_size_t i; return found?src+i:0; #else - for(__CPROVER_size_t i=0; ; i++) - { - if(src[i]==(char)c) - return ((char *)src)+i; // cast away const-ness - if(src[i]==0) break; - } - return 0; - #endif + + /* for(__CPROVER_size_t i=0; ; i++) */ + /* { */ + /* if(src[i]==(char)c) */ + /* return ((char *)src)+i; // cast away const-ness */ + /* if(src[i]==0) break; */ + /* } */ + /* return 0; */ + + __CPROVER_size_t src_array_length = + SIZE_MAX; // __VERIFIER_nondet___CPROVER_size_t(); + long index_of_zero = + __CPROVER_math_func_string_index_of(src, src_array_length, '\0'); + __CPROVER_assert(index_of_zero >= 0, "zero should be present"); + long index_of_c = + __CPROVER_math_func_string_index_of(src, src_array_length, (char)c); + if(index_of_c >= 0 && index_of_c < index_of_zero) + return ((char *)src) + (__CPROVER_size_t)index_of_c; + else + return (char *)0; +#endif } /* FUNCTION: strrchr */ diff --git a/src/cbmc/cbmc_parse_options.cpp b/src/cbmc/cbmc_parse_options.cpp index ec7968b5c70..e2eab41e033 100644 --- a/src/cbmc/cbmc_parse_options.cpp +++ b/src/cbmc/cbmc_parse_options.cpp @@ -49,6 +49,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include #include #include #include @@ -242,6 +243,8 @@ void cbmc_parse_optionst::get_command_line_options(optionst &options) if(cmdline.isset("string-abstraction")) options.set_option("string-abstraction", true); + if(cmdline.isset("refine-strings")) + options.set_option("refine-strings", true); if(cmdline.isset("reachability-slice-fb")) options.set_option("reachability-slice-fb", true); @@ -363,6 +366,9 @@ void cbmc_parse_optionst::get_command_line_options(optionst &options) { options.set_option("refine-strings", true); options.set_option("string-printable", cmdline.isset("string-printable")); + options.set_option( + "max-nondet-string-length", + cmdline.get_value("max-nondet-string-length")); } if(cmdline.isset("max-node-refinement")) @@ -872,6 +878,11 @@ bool cbmc_parse_optionst::process_goto_program( if(options.get_bool_option("string-abstraction")) string_instrumentation(goto_model, log.get_message_handler()); + else if(options.get_bool_option("refine-strings")) + c_string_refinement( + goto_model, + log.get_message_handler(), + options.get_option("max-nondet-string-length")); // remove function pointers log.status() << "Removal of function pointers and virtual functions" diff --git a/src/goto-programs/Makefile b/src/goto-programs/Makefile index ec7887728ce..8aeb5d5aee4 100644 --- a/src/goto-programs/Makefile +++ b/src/goto-programs/Makefile @@ -1,5 +1,6 @@ SRC = adjust_float_expressions.cpp \ builtin_functions.cpp \ + c_string_refinement.cpp \ class_hierarchy.cpp \ class_identifier.cpp \ compute_called_functions.cpp \ diff --git a/src/goto-programs/c_string_refinement.cpp b/src/goto-programs/c_string_refinement.cpp new file mode 100644 index 00000000000..2d17a02d304 --- /dev/null +++ b/src/goto-programs/c_string_refinement.cpp @@ -0,0 +1,381 @@ +/*******************************************************************\ + +Module: C String Refinement + +Author: diffblue + +\*******************************************************************/ + +/// \file +/// C String Refinement + +#include "c_string_refinement.h" + +#include +#include +#include +#include +#include +#include + +#include + +struct c_string_refinementt : public messaget +{ + c_string_refinementt( + symbol_tablet &symbol_table, + message_handlert &message_handler, + size_t max_nondet_string_length) + : messaget{message_handler}, + symbol_table(symbol_table), + ns(symbol_table), + max_nondet_string_length(max_nondet_string_length) + { + } + + void operator()(goto_functionst &goto_functions); + +private: + symbol_tablet &symbol_table; + namespacet ns; + size_t max_nondet_string_length; + size_t symbol_counter = 0; + const typet string_size_type = size_type(); + const typet string_index_type = index_type(); + const typet string_return_type = signed_int_type(); + + auxiliary_symbolt new_aux_string_symbol( + const std::string &name, + const typet &type, + symbol_table_baset &symbol_table); + + refined_string_exprt char_ptr_to_refined_string( + const exprt &char_ptr, + const exprt &length_symbol_expr, + goto_programt &program); + + void do_string_index_of(goto_functiont &string_index_of); + void do_string_substring(goto_functiont &string_substring); + void do_string_concat(goto_functiont &string_concat); +}; + +auxiliary_symbolt c_string_refinementt::new_aux_string_symbol( + const std::string &name, + const typet &type, + symbol_table_baset &symbol_table) +{ + const auto decorated_name = + std::string{name} + std::to_string(++symbol_counter); + auxiliary_symbolt new_symbol; + new_symbol.base_name = decorated_name; + new_symbol.pretty_name = decorated_name; + new_symbol.is_static_lifetime = false; + new_symbol.is_state_var = false; + new_symbol.mode = ID_C; + new_symbol.name = decorated_name; + new_symbol.type = type; + symbol_table.add(new_symbol); + + return new_symbol; +} + +void c_string_refinementt::operator()(goto_functionst &goto_functions) +{ + for(auto it = goto_functions.function_map.begin(); + it != goto_functions.function_map.end(); + it++) + { + if(it->first == CPROVER_PREFIX "math_func_string_index_of") + { + do_string_index_of(it->second); + } + else if(it->first == CPROVER_PREFIX "math_func_string_substring") + { + do_string_substring(it->second); + } + else if(it->first == CPROVER_PREFIX "math_func_string_concat") + { + do_string_concat(it->second); + } + } +} + +void c_string_refinementt::do_string_concat(goto_functiont &string_concat) +{ + const auto ¶ms = to_code_type(string_concat.type).parameters(); + PRECONDITION(params.size() == 6); + const auto &res_param = params.at(0); + const auto &s1_param = params.at(1); + const auto &s2_param = params.at(2); + const auto &end_s1_param = params.at(3); + const auto &start_s2_param = params.at(4); + const auto &end_s2_param = params.at(5); + + const auto res_param_symbol = + symbol_exprt{res_param.get_identifier(), res_param.type()}; + const auto s1_param_symbol = + symbol_exprt{s1_param.get_identifier(), s1_param.type()}; + const auto s2_param_symbol = + symbol_exprt{s2_param.get_identifier(), s2_param.type()}; + const auto end_s1_param_symbol = + symbol_exprt{end_s1_param.get_identifier(), end_s1_param.type()}; + const auto start_s2_param_symbol = + symbol_exprt{start_s2_param.get_identifier(), start_s2_param.type()}; + const auto end_s2_param_symbol = + symbol_exprt{end_s2_param.get_identifier(), end_s2_param.type()}; + + goto_programt new_body; + + const auto res_length_symbol = new_aux_string_symbol( + "StringConcat::res_length", string_size_type, symbol_table); + const auto res_length = res_length_symbol.symbol_expr(); + + // const auto res_refined_string = + // char_ptr_to_refined_string(res_param_symbol, res_length, new_body); + const auto s1_refined_string = char_ptr_to_refined_string( + s1_param_symbol, + typecast_exprt{minus_exprt{end_s1_param_symbol, + from_integer(0, end_s1_param_symbol.type())}, + string_size_type}, + new_body); + const auto s2_refined_string = char_ptr_to_refined_string( + s2_param_symbol, + typecast_exprt{minus_exprt{end_s2_param_symbol, start_s2_param_symbol}, + string_size_type}, + new_body); + + const auto cprover_string_concat_func_symbol = + symbol_exprt{ID_cprover_string_concat_func, + mathematical_function_typet{{res_length.type(), + res_param_symbol.type(), + // res_refined_string.type(), + s1_refined_string.type(), + s2_refined_string.type(), + start_s2_param_symbol.type(), + end_s2_param_symbol.type()}, + string_return_type}}; + + const auto apply_concat = + function_application_exprt{cprover_string_concat_func_symbol, + {res_length, + // res_refined_string, + res_param_symbol, + s1_refined_string, + s2_refined_string, + start_s2_param_symbol, + end_s2_param_symbol}}; + + const auto return_symbol = new_aux_string_symbol( + "StringConcat::return", string_return_type, symbol_table); + const auto return_expr = return_symbol.symbol_expr(); + new_body.add(goto_programt::make_decl(return_expr)); + new_body.add(goto_programt::make_assignment(return_expr, apply_concat)); + new_body.add(goto_programt::make_end_function()); + + string_concat.body = std::move(new_body); +} + +void c_string_refinementt::do_string_substring(goto_functiont &string_substring) +{ + const auto ¶ms = to_code_type(string_substring.type).parameters(); + PRECONDITION(params.size() == 4); + const auto &dst_param = params.at(0); + const auto &src_param = params.at(1); + const auto &from_param = params.at(2); + const auto &to_param = params.at(3); + + const auto dst_param_symbol = + symbol_exprt{dst_param.get_identifier(), dst_param.type()}; + const auto src_param_symbol = + symbol_exprt{src_param.get_identifier(), src_param.type()}; + const auto from_param_symbol = + symbol_exprt{from_param.get_identifier(), from_param.type()}; + const auto to_param_symbol = + symbol_exprt{to_param.get_identifier(), to_param.type()}; + + goto_programt new_body; + const auto dst_length = + plus_exprt{typecast_exprt{to_param_symbol, string_size_type}, + typecast_exprt{from_param_symbol, string_size_type}}; + + // const auto dst_refined_string = + // char_ptr_to_refined_string(dst_param_symbol, dst_length, new_body); + const auto src_length_symbol = new_aux_string_symbol( + "StringSubstring::src_length", string_size_type, symbol_table); + + const auto src_refined_string = char_ptr_to_refined_string( + src_param_symbol, src_length_symbol.symbol_expr(), new_body); + + const auto cprover_string_substring_func_symbol = + symbol_exprt{ID_cprover_string_substring_func, + mathematical_function_typet{{dst_length.type(), + // dst_refined_string.type(), + dst_param_symbol.type(), + src_refined_string.type(), + from_param_symbol.type(), + to_param_symbol.type()}, + string_return_type}}; + + const auto apply_substring = + function_application_exprt{cprover_string_substring_func_symbol, + {dst_length, + // dst_refined_string, + dst_param_symbol, + src_refined_string, + from_param_symbol, + to_param_symbol}}; + + const auto return_symbol = new_aux_string_symbol( + "StringSubstring::return", string_return_type, symbol_table); + const auto return_expr = return_symbol.symbol_expr(); + + new_body.add(goto_programt::make_decl(return_expr)); + new_body.add(goto_programt::make_assignment(return_expr, apply_substring)); + const auto char_type = dst_param_symbol.type().subtype(); + new_body.add(goto_programt::make_assignment( + dereference_exprt{ + plus_exprt{dst_param_symbol, + minus_exprt{dst_length, from_integer(1, string_size_type)}}}, + from_integer(0, char_type))); + // new_body.add(goto_programt::make_return(code_returnt{return_expr})); + new_body.add(goto_programt::make_end_function()); + + string_substring.body = std::move(new_body); +} + +void c_string_refinementt::do_string_index_of(goto_functiont &string_index_of) +{ + const auto ¶ms = to_code_type(string_index_of.type).parameters(); + PRECONDITION(params.size() == 3); + const auto &str_param = params.at(0); + const auto &length_param = params.at(1); + const auto &ch_param = params.at(2); + // const auto refined_string_type = + // refined_string_typet{size_type(), to_pointer_type(str_param.type())}; + const auto str_param_symbol = + symbol_exprt{str_param.get_identifier(), str_param.type()}; + const auto length_param_symbol = + symbol_exprt{length_param.get_identifier(), length_param.type()}; + const auto ch_param_symbol = + symbol_exprt{ch_param.get_identifier(), ch_param.type()}; + goto_programt new_body; + const auto refined_string = + char_ptr_to_refined_string(str_param_symbol, length_param_symbol, new_body); + + const auto cprover_string_index_of_func_symbol = symbol_exprt{ + ID_cprover_string_index_of_func, + mathematical_function_typet( + {refined_string.type(), ch_param_symbol.type()}, string_index_type)}; + const auto apply_index_of = function_application_exprt{ + cprover_string_index_of_func_symbol, {refined_string, ch_param_symbol}}; + const auto return_symbol = new_aux_string_symbol( + "StringIndexOf::return", string_index_type, symbol_table); + const auto return_expr = return_symbol.symbol_expr(); + new_body.add(goto_programt::make_decl(return_expr)); + new_body.add(goto_programt::make_assignment(return_expr, apply_index_of)); + new_body.add(goto_programt::make_return(code_returnt{return_expr})); + new_body.add(goto_programt::make_end_function()); + + string_index_of.body = std::move(new_body); +} + +/// Helper function to produce the necessary associations related to +/// char-pointers needed for string solver. +refined_string_exprt c_string_refinementt::char_ptr_to_refined_string( + const exprt &char_ptr, + const exprt &length_symbol_expr, + goto_programt &program) +{ + // char *string_content = src; + const auto content_symbol = new_aux_string_symbol( + "cprover_string_index_of_func::string_content", + char_ptr.type(), + symbol_table); + const auto content_symbol_expr = content_symbol.symbol_expr(); + program.add(goto_programt::make_decl(content_symbol_expr)); + program.add(goto_programt::make_assignment(content_symbol_expr, char_ptr)); + + // char (*nondet_infinite_array_ponter)[\infty]; + const symbolt nondet_infinite_array_pointer = new_aux_string_symbol( + "cprover_string_index_of_func::nondet_infinite_array_pointer", + pointer_type(array_typet{char_type(), infinity_exprt(string_size_type)}), + symbol_table); + const symbol_exprt nondet_infinite_array_pointer_expr = + nondet_infinite_array_pointer.symbol_expr(); + program.add(goto_programt::make_decl(nondet_infinite_array_pointer_expr)); + program.add(goto_programt::make_assignment( + nondet_infinite_array_pointer_expr, + typecast_exprt{address_of_exprt{char_ptr}, + nondet_infinite_array_pointer_expr.type()})); + + // cprover_associate_length_to_array_func(nondet_infinite_array_pointer, + // string_length); + const auto refined_string_expr = refined_string_exprt{ + length_symbol_expr, + content_symbol_expr, + refined_string_typet{length_symbol_expr.type(), + to_pointer_type(char_ptr.type())}}; + const symbolt return_array_length = new_aux_string_symbol( + "cprover_string_index_of_func::return_array_length", + string_index_type, + symbol_table); + dereference_exprt nondet_array_expr{nondet_infinite_array_pointer_expr}; + const auto cprover_associate_length_to_array_func_symbol = + symbol_exprt{ID_cprover_associate_length_to_array_func, + mathematical_function_typet{ + {nondet_array_expr.type(), length_symbol_expr.type()}, + return_array_length.type}}; + const auto apply_associate_length_to_array = function_application_exprt{ + cprover_associate_length_to_array_func_symbol, + {nondet_array_expr, refined_string_expr.length()}, + return_array_length.type}; + const auto return_length_expr = return_array_length.symbol_expr(); + program.add(goto_programt::make_decl(return_length_expr)); + program.add(goto_programt::make_assignment( + return_length_expr, apply_associate_length_to_array)); + + // cprover_associate_array_to_pointer_func(nondet_infinite_array_pointer, + // src); + const address_of_exprt array_pointer( + index_exprt(nondet_array_expr, from_integer(0, string_index_type))); + program.add(goto_programt::make_assignment(array_pointer, char_ptr)); + const symbolt return_sym_pointer = new_aux_string_symbol( + "return_array_pointer", string_index_type, symbol_table); + const auto cprover_associate_array_to_pointer_func_symbol = + symbol_exprt{ID_cprover_associate_array_to_pointer_func, + mathematical_function_typet{ + {nondet_array_expr.type(), array_pointer.type()}, + return_sym_pointer.type}}; + const auto apply_associate_pointer_to_array = + function_application_exprt{cprover_associate_array_to_pointer_func_symbol, + {nondet_array_expr, char_ptr}}; + const auto return_pointer_expr = return_sym_pointer.symbol_expr(); + program.add(goto_programt::make_decl(return_pointer_expr)); + program.add(goto_programt::make_assignment( + return_pointer_expr, apply_associate_pointer_to_array)); + + return refined_string_exprt{refined_string_expr.length(), char_ptr}; +} + +void c_string_refinement( + goto_modelt &goto_model, + message_handlert &message_handler, + const std::string &maybe_max_nondet_string_length_string) +{ + size_t max_nondet_string_length = 1000; // default + if(!maybe_max_nondet_string_length_string.empty()) + { + auto maybe_max_nondet_string_length = + string2optional_size_t(maybe_max_nondet_string_length_string); + if(!maybe_max_nondet_string_length.has_value()) + { + throw invalid_command_line_argument_exceptiont{ + "max string length should be a number", "--max-nondet-string-length"}; + } + max_nondet_string_length = *maybe_max_nondet_string_length; + } + auto c_string_refinement = c_string_refinementt{ + goto_model.symbol_table, message_handler, max_nondet_string_length}; + + c_string_refinement(goto_model.goto_functions); +} diff --git a/src/goto-programs/c_string_refinement.h b/src/goto-programs/c_string_refinement.h new file mode 100644 index 00000000000..19203191ec3 --- /dev/null +++ b/src/goto-programs/c_string_refinement.h @@ -0,0 +1,25 @@ +/*******************************************************************\ + +Module: C String Refinement + +Author: diffblue + +\*******************************************************************/ + +/// \file +/// C String Refinement + +#ifndef CPROVER_GOTO_PROGRAMS_C_STRING_REFINEMENT_H +#define CPROVER_GOTO_PROGRAMS_C_STRING_REFINEMENT_H + +#include "goto_functions.h" + +class message_handlert; +class goto_modelt; + +void c_string_refinement( + goto_modelt &goto_model, + message_handlert &message_handler, + const std::string &max_nondet_string_length); + +#endif // CPROVER_GOTO_PROGRAMS_C_STRING_REFINEMENT_H diff --git a/src/solvers/strings/array_pool.cpp b/src/solvers/strings/array_pool.cpp index ab7d3c7cf5b..8859b3e0760 100644 --- a/src/solvers/strings/array_pool.cpp +++ b/src/solvers/strings/array_pool.cpp @@ -7,6 +7,8 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com \*******************************************************************/ #include "array_pool.h" +#include "string_refinement_util.h" +#include symbol_exprt symbol_generatort:: operator()(const irep_idt &prefix, const typet &type) @@ -102,13 +104,15 @@ array_string_exprt array_poolt::make_char_array_for_char_pointer( const bool is_constant_array = char_pointer.id() == ID_address_of && to_address_of_expr(char_pointer).object().id() == ID_index && - to_index_expr(to_address_of_expr(char_pointer).object()).array().id() == - ID_array; + (to_index_expr(to_address_of_expr(char_pointer).object()).array().id() == + ID_array || + to_index_expr(to_address_of_expr(char_pointer).object()).array().id() == + ID_string_constant); if(is_constant_array) { - return to_array_string_expr( - to_index_expr(to_address_of_expr(char_pointer).object()).array()); + return to_array_string_expr(convert_string_representation_to_array( + to_index_expr(to_address_of_expr(char_pointer).object()).array())); } const std::string symbol_name = "char_array_" + id2string(char_pointer.id()); const auto array_sym = @@ -135,35 +139,57 @@ static void attempt_assign_length_from_type( std::unordered_map &length_of_array, symbol_generatort &symbol_generator) { - DATA_INVARIANT( - array_expr.id() != ID_if, - "attempt_assign_length_from_type does not handle if exprts"); - // This invariant seems always true, but I don't know why. - // If we find a case where this is violated, try calling - // attempt_assign_length_from_type on the true and false cases. + if(array_expr.id() == ID_if) + { + const auto &if_expr = to_if_expr(array_expr); + attempt_assign_length_from_type( + to_array_string_expr(if_expr.true_case()), + length_of_array, + symbol_generator); + attempt_assign_length_from_type( + to_array_string_expr(if_expr.false_case()), + length_of_array, + symbol_generator); + return; + } const exprt &size_from_type = to_array_type(array_expr.type()).size(); const exprt &size_to_assign = size_from_type != infinity_exprt(size_from_type.type()) ? size_from_type : symbol_generator("string_length", array_expr.length_type()); - const auto emplace_result = + // c++17 + // length_of_array.insert_or_assign(array_expr, size_to_assign); + if(length_of_array.count(array_expr) == 0) + { length_of_array.emplace(array_expr, size_to_assign); - INVARIANT( - emplace_result.second, - "attempt_assign_length_from_type should only be called when no entry" - "for the array_string_exprt exists in the length_of_array map"); + } + else + { + length_of_array[array_expr] = size_to_assign; + } + // const auto emplace_result = + // INVARIANT( + // emplace_result.second, + // "attempt_assign_length_from_type should only be called when no entry " + // "for the array_string_exprt exists in the length_of_array map"); } void array_poolt::insert( const exprt &pointer_expr, const array_string_exprt &array_expr) { - const auto it_bool = + const auto association_it = arrays_of_pointers.find(pointer_expr); + if(association_it != arrays_of_pointers.end()) + { + INVARIANT( + association_it->second == array_expr, + "should not associate two different arrays to the same pointer"); + } + else + { arrays_of_pointers.insert(std::make_pair(pointer_expr, array_expr)); - - INVARIANT( - it_bool.second, "should not associate two arrays to the same pointer"); + } if(length_of_array.find(array_expr) == length_of_array.end()) { diff --git a/src/solvers/strings/string_constraint_generator_indexof.cpp b/src/solvers/strings/string_constraint_generator_indexof.cpp index 9a3e1f33633..7e97cb40822 100644 --- a/src/solvers/strings/string_constraint_generator_indexof.cpp +++ b/src/solvers/strings/string_constraint_generator_indexof.cpp @@ -41,14 +41,17 @@ string_constraint_generatort::add_axioms_for_index_of( const exprt &from_index) { string_constraintst constraints; - const typet &index_type = str.length_type(); + const typet &index_type = from_index.type(); symbol_exprt index = fresh_symbol("index_of", index_type); symbol_exprt contains = fresh_symbol("contains_in_index_of"); exprt minus1 = from_integer(-1, index_type); and_exprt a1( binary_relation_exprt(index, ID_ge, minus1), - binary_relation_exprt(index, ID_lt, array_pool.get_or_create_length(str))); + binary_relation_exprt( + index, + ID_lt, + typecast_exprt{array_pool.get_or_create_length(str), index.type()})); constraints.existential.push_back(a1); equal_exprt a2(not_exprt(contains), equal_exprt(index, minus1)); @@ -75,7 +78,8 @@ string_constraint_generatort::add_axioms_for_index_of( string_constraintt a5( m, lower_bound, - zero_if_negative(array_pool.get_or_create_length(str)), + zero_if_negative( + typecast_exprt{array_pool.get_or_create_length(str), m.type()}), implies_exprt(not_exprt(contains), not_exprt(equal_exprt(str[m], c)))); constraints.universal.push_back(a5); @@ -306,9 +310,9 @@ string_constraint_generatort::add_axioms_for_index_of( PRECONDITION(args.size() == 2 || args.size() == 3); const array_string_exprt str = get_string_expr(array_pool, args[0]); const exprt &c = args[1]; - const typet &index_type = str.length_type(); + const typet &index_type = f.type(); const typet &char_type = str.content().type().subtype(); - PRECONDITION(f.type() == index_type); + const exprt from_index = args.size() == 2 ? from_integer(0, index_type) : args[2]; diff --git a/src/solvers/strings/string_constraint_generator_main.cpp b/src/solvers/strings/string_constraint_generator_main.cpp index e83b0d9460b..26b840bf76c 100644 --- a/src/solvers/strings/string_constraint_generator_main.cpp +++ b/src/solvers/strings/string_constraint_generator_main.cpp @@ -19,6 +19,7 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #include "string_constraint_generator.h" #include "string_refinement_invariant.h" +#include "string_refinement_util.h" #include #include @@ -68,8 +69,9 @@ exprt string_constraint_generatort::associate_array_to_pointer( /// \todo: we allow expression of the form of `arr[0]` instead of `arr` as /// the array argument but this could go away. array_string_exprt array_expr = to_array_string_expr( - f.arguments()[0].id() == ID_index ? to_index_expr(f.arguments()[0]).array() - : f.arguments()[0]); + f.arguments()[0].id() == ID_index + ? to_index_expr(f.arguments()[0]).array() + : convert_string_representation_to_array(f.arguments()[0])); const exprt &pointer_expr = f.arguments()[1]; array_pool.insert(simplify_expr(pointer_expr, ns), array_expr); diff --git a/src/solvers/strings/string_refinement.cpp b/src/solvers/strings/string_refinement.cpp index 32960538db9..f91b2c51e1e 100644 --- a/src/solvers/strings/string_refinement.cpp +++ b/src/solvers/strings/string_refinement.cpp @@ -24,9 +24,11 @@ Author: Alberto Griggio, alberto.griggio@gmail.com #include #include #include +#include #include #include #include +#include #include #include @@ -327,6 +329,16 @@ static void add_equations_for_symbol_resolution( continue; } + if( + rhs.id() == ID_symbol && + has_prefix( + id2string(to_symbol_expr(rhs).get_identifier()), "symex::args::")) + { + // Symex introduces a new name for non-constant arguments that it uses in + // trace building, we do not need it here for symbol resolve. + continue; + } + if(is_char_pointer_type(rhs.type())) { symbol_solver.make_union(lhs, simplify_expr(rhs, ns)); @@ -1066,7 +1078,7 @@ static exprt get_char_array_and_concretize( const namespacet &ns, messaget::mstreamt &stream, const array_string_exprt &arr, - array_poolt &array_pool) + const array_poolt &array_pool) { stream << "- " << format(arr) << ":\n"; stream << std::string(4, ' ') << "- type: " << format(arr.type()) @@ -1115,7 +1127,7 @@ void debug_model( const namespacet &ns, const std::function &super_get, const std::vector &symbols, - array_poolt &array_pool) + const array_poolt &array_pool) { stream << "debug_model:" << '\n'; for(const auto &pointer_array : generator.array_pool.get_arrays_of_pointers()) @@ -1201,7 +1213,7 @@ static optionalt substitute_array_access( symbol_generatort &symbol_generator, const bool left_propagate) { - const exprt &array = index_expr.array(); + const auto array = convert_string_representation_to_array(index_expr.array()); if(auto array_of = expr_try_dynamic_cast(array)) return array_of->op(); if(auto array_with = expr_try_dynamic_cast(array)) @@ -1617,8 +1629,20 @@ static void initial_index_set( } if(auto ite = expr_try_dynamic_cast(s)) { - initial_index_set(index_set, ns, qvar, upper_bound, ite->true_case(), i); - initial_index_set(index_set, ns, qvar, upper_bound, ite->false_case(), i); + initial_index_set( + index_set, + ns, + qvar, + upper_bound, + convert_string_representation_to_array(ite->true_case()), + i); + initial_index_set( + index_set, + ns, + qvar, + upper_bound, + convert_string_representation_to_array(ite->false_case()), + i); return; } const minus_exprt u_minus_1(upper_bound, from_integer(1, upper_bound.type())); @@ -1641,7 +1665,7 @@ static void initial_index_set( if(it->id() == ID_index && is_char_type(it->type())) { const auto &index_expr = to_index_expr(*it); - const auto &s = index_expr.array(); + const auto s = convert_string_representation_to_array(index_expr.array()); initial_index_set(index_set, ns, qvar, bound, s, index_expr.index()); it.next_sibling_or_parent(); } @@ -1835,7 +1859,9 @@ exprt string_refinementt::get(const exprt &expr) const else UNREACHABLE; } - const auto array = supert::get(current.get()); + const auto array = + convert_string_representation_to_array(supert::get(current.get())); + const auto index = get(index_expr->index()); // If the underlying solver does not know about the existence of an array, diff --git a/src/solvers/strings/string_refinement.h b/src/solvers/strings/string_refinement.h index 98b4d9c785d..1cdd7ba3094 100644 --- a/src/solvers/strings/string_refinement.h +++ b/src/solvers/strings/string_refinement.h @@ -56,6 +56,7 @@ Author: Alberto Griggio, alberto.griggio@gmail.com // it is not turned on by default and not all options are available. #define OPT_STRING_REFINEMENT_CBMC \ "(refine-strings)" \ + "(max-nondet-string-length):" \ "(string-printable)" #define HELP_STRING_REFINEMENT_CBMC \ diff --git a/src/solvers/strings/string_refinement_util.cpp b/src/solvers/strings/string_refinement_util.cpp index dd0cd19cfd0..3b44fcbf577 100644 --- a/src/solvers/strings/string_refinement_util.cpp +++ b/src/solvers/strings/string_refinement_util.cpp @@ -14,19 +14,22 @@ Author: Diffblue Ltd. #include #include #include +#include #include #include #include #include #include #include -#include +#include +#include #include bool is_char_type(const typet &type) { - return type.id() == ID_unsignedbv && to_unsignedbv_type(type).get_width() <= - STRING_REFINEMENT_MAX_CHAR_WIDTH; + return (type.id() == ID_unsignedbv || type.id() == ID_signedbv) && + to_bitvector_type(type).get_width() <= + STRING_REFINEMENT_MAX_CHAR_WIDTH; } bool is_char_array_type(const typet &type, const namespacet &ns) @@ -192,3 +195,52 @@ array_exprt interval_sparse_arrayt::concretize( size, it == entries.end() ? default_value : it->second); return array; } + +exprt byte_extract_object_with_constant_offset( + const byte_extract_exprt &byte_extract_expr) +{ + const auto &offset = byte_extract_expr.offset(); + PRECONDITION(offset.id() == ID_constant); + + const auto &constant_offset = to_constant_expr(offset); + const auto &op = byte_extract_expr.op(); + auto numeric_offset = + string2optional_int(id2string(constant_offset.get_value())); + CHECK_RETURN(numeric_offset.has_value()); + + return *numeric_offset == 0 + ? op + : array_exprt{ + array_exprt::operandst{op.operands().begin() + *numeric_offset, + op.operands().end()}, + to_array_type(op.type())}; +} + +exprt convert_string_representation_to_array(const exprt &expr) +{ + exprt result = expr; + if(can_cast_expr(result)) + { + result = + byte_extract_object_with_constant_offset(to_byte_extract_expr(result)); + } + if( + auto const &string_constant = + expr_try_dynamic_cast(result)) + { + result = static_cast(string_constant->to_array_expr()); + } + CHECK_RETURN(result.type().id() == ID_array); + + CHECK_RETURN( + result.id() == ID_array || result.id() == ID_if || + result.id() == ID_symbol || result.id() == ID_array_list); + if(result.id() == ID_if) + { + return if_exprt{result.op0(), + convert_string_representation_to_array(result.op1()), + convert_string_representation_to_array(result.op2()), + result.type()}; + } + return result; +} diff --git a/src/solvers/strings/string_refinement_util.h b/src/solvers/strings/string_refinement_util.h index dee45c60a75..e10011d1054 100644 --- a/src/solvers/strings/string_refinement_util.h +++ b/src/solvers/strings/string_refinement_util.h @@ -15,8 +15,8 @@ Author: Diffblue Ltd. #include "string_constraint.h" #include "string_constraint_generator.h" -/// For now, any unsigned bitvector type of width smaller or equal to 16 is -/// considered a character. +/// For now, any signed or unsigned bitvector type of width smaller or equal to +/// 16 is considered a character. /// \note type that are not characters maybe detected as characters (for /// instance unsigned char in C), this will make dec_solve do unnecessary /// steps for these, but should not affect correctness. @@ -140,4 +140,7 @@ class interval_sparse_arrayt final : public sparse_arrayt } }; +exprt byte_extract_object_with_constant_offset(const exprt &expr); + +exprt convert_string_representation_to_array(const exprt &expr); #endif // CPROVER_SOLVERS_REFINEMENT_STRING_REFINEMENT_UTIL_H diff --git a/src/util/config.cpp b/src/util/config.cpp index cac312f4a97..a4f74332ab0 100644 --- a/src/util/config.cpp +++ b/src/util/config.cpp @@ -1047,6 +1047,11 @@ bool configt::set(const cmdlinet &cmdline) else ansi_c.string_abstraction=false; + if(cmdline.isset("refine-strings")) + ansi_c.string_c_refinement = true; + else + ansi_c.string_c_refinement = false; + if(cmdline.isset("no-library")) ansi_c.lib=configt::ansi_ct::libt::LIB_NONE; diff --git a/src/util/config.h b/src/util/config.h index 608bbbeaf98..925a6a034ce 100644 --- a/src/util/config.h +++ b/src/util/config.h @@ -129,6 +129,7 @@ class configt libt lib; bool string_abstraction; + bool string_c_refinement; static const std::size_t default_object_bits=8; } ansi_c; diff --git a/src/util/pointer_offset_size.cpp b/src/util/pointer_offset_size.cpp index bae960a1021..73fa82a28aa 100644 --- a/src/util/pointer_offset_size.cpp +++ b/src/util/pointer_offset_size.cpp @@ -632,6 +632,17 @@ optionalt get_subexpression_at_offset( return typecast_exprt(expr, target_type_raw); } + if( + offset_bytes == 0 && expr.type().id() == ID_pointer && + target_type_raw.id() == ID_array) + { + // subexpression at offset zero is the whole thing even for arrays + return byte_extract_exprt{byte_extract_id(), + expr, + from_integer(offset_bytes, index_type()), + target_type_raw}; + } + const typet &source_type = ns.follow(expr.type()); const auto target_size_bits = pointer_offset_bits(target_type_raw, ns); if(!target_size_bits.has_value()) diff --git a/src/util/string_constant.h b/src/util/string_constant.h index eab8d29badc..ac8b5ce5297 100644 --- a/src/util/string_constant.h +++ b/src/util/string_constant.h @@ -50,4 +50,10 @@ inline string_constantt &to_string_constant(typet &type) return to_string_constant((exprt &)type); } +template <> +inline bool can_cast_expr(const exprt &base) +{ + return base.id() == ID_string_constant; +} + #endif // CPROVER_ANSI_C_STRING_CONSTANT_H