diff --git a/regression/.gitignore b/regression/.gitignore new file mode 100644 index 00000000000..750ecf9f446 --- /dev/null +++ b/regression/.gitignore @@ -0,0 +1 @@ +tests.log diff --git a/regression/cbmc-java/assume1/Assume1.class b/regression/cbmc-java/assume1/Assume1.class new file mode 100644 index 00000000000..b9a49bc9971 Binary files /dev/null and b/regression/cbmc-java/assume1/Assume1.class differ diff --git a/regression/cbmc-java/assume1/Assume1.java b/regression/cbmc-java/assume1/Assume1.java new file mode 100644 index 00000000000..4de0afd025b --- /dev/null +++ b/regression/cbmc-java/assume1/Assume1.java @@ -0,0 +1,10 @@ +import org.cprover.CProver; + +class Assume1 +{ + static void foo(int x) + { + CProver.assume(x>3); + assert x>0; + } +} diff --git a/regression/cbmc-java/assume1/test.desc b/regression/cbmc-java/assume1/test.desc new file mode 100644 index 00000000000..1f80a1c1e86 --- /dev/null +++ b/regression/cbmc-java/assume1/test.desc @@ -0,0 +1,8 @@ +CORE +Assume1.class +--function Assume1.foo +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring diff --git a/regression/cbmc-java/assume2/Assume2.class b/regression/cbmc-java/assume2/Assume2.class new file mode 100644 index 00000000000..36e09875d7d Binary files /dev/null and b/regression/cbmc-java/assume2/Assume2.class differ diff --git a/regression/cbmc-java/assume2/Assume2.java b/regression/cbmc-java/assume2/Assume2.java new file mode 100644 index 00000000000..6991167bfb8 --- /dev/null +++ b/regression/cbmc-java/assume2/Assume2.java @@ -0,0 +1,10 @@ +import org.cprover.CProver; + +class Assume2 +{ + static void foo(int x) + { + CProver.assume(x>3); + assert x>4; + } +} diff --git a/regression/cbmc-java/assume2/test.desc b/regression/cbmc-java/assume2/test.desc new file mode 100644 index 00000000000..35a954116f8 --- /dev/null +++ b/regression/cbmc-java/assume2/test.desc @@ -0,0 +1,8 @@ +CORE +Assume2.class +--function Assume2.foo +^EXIT=10$ +^SIGNAL=0$ +^VERIFICATION FAILED$ +-- +^warning: ignoring diff --git a/regression/cbmc-java/assume3/Assume3.class b/regression/cbmc-java/assume3/Assume3.class new file mode 100644 index 00000000000..916ad4065fb Binary files /dev/null and b/regression/cbmc-java/assume3/Assume3.class differ diff --git a/regression/cbmc-java/assume3/Assume3.java b/regression/cbmc-java/assume3/Assume3.java new file mode 100644 index 00000000000..895deee9ca0 --- /dev/null +++ b/regression/cbmc-java/assume3/Assume3.java @@ -0,0 +1,10 @@ +import org.cprover.CProver; + +class Assume3 +{ + public static void main(String[] args) + { + CProver.assume(false); + assert false; + } +} diff --git a/regression/cbmc-java/assume3/test.desc b/regression/cbmc-java/assume3/test.desc new file mode 100644 index 00000000000..0ad8c00399f --- /dev/null +++ b/regression/cbmc-java/assume3/test.desc @@ -0,0 +1,8 @@ +CORE +Assume3.class + +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring diff --git a/regression/cbmc-java/covered1/covered1.class b/regression/cbmc-java/covered1/covered1.class new file mode 100644 index 00000000000..7cb91496b7d Binary files /dev/null and b/regression/cbmc-java/covered1/covered1.class differ diff --git a/regression/cbmc-java/covered1/covered1.java b/regression/cbmc-java/covered1/covered1.java new file mode 100644 index 00000000000..018e65336a6 --- /dev/null +++ b/regression/cbmc-java/covered1/covered1.java @@ -0,0 +1,37 @@ +public class covered1 +{ + // this is a variable + int x=1; + //these are two, one line off the first + int y=2; + int z=3; + //this is part of static init + static int z0=0; + + //another non-static + int a; + int b; + static boolean odd; + + static + { + odd=(z0+1)%2==0; + } + + covered1(int a, int b) + { + this.a=a*b; + this.b=a+b; + if(this.a==a) + z0++; + else + odd=!odd; + } + // at the back + int z1=2; + int z2=3; + int z3=4; + // + static int z4=5; + int z5=5; +} diff --git a/regression/cbmc-java/covered1/test.desc b/regression/cbmc-java/covered1/test.desc new file mode 100644 index 00000000000..32af766bdb7 --- /dev/null +++ b/regression/cbmc-java/covered1/test.desc @@ -0,0 +1,19 @@ +CORE +covered1.class +--cover location --json-ui --show-properties +^EXIT=0$ +^SIGNAL=0$ +.*\"coveredLines\": \"22\",$ +.*\"coveredLines\": \"4,6,7,23-25,31-33,36\",$ +.*\"coveredLines\": \"26\",$ +.*\"coveredLines\": \"28\",$ +.*\"coveredLines\": \"28\",$ +.*\"coveredLines\": \"28\",$ +.*\"coveredLines\": \"28\",$ +.*\"coveredLines\": \"29\",$ +.*\"coveredLines\": \"9,18\",$ +.*\"coveredLines\": \"18\",$ +.*\"coveredLines\": \"18\",$ +.*\"coveredLines\": \"18,35\",$ +-- +^warning: ignoring diff --git a/scripts/bash-autocomplete/.gitignore b/scripts/bash-autocomplete/.gitignore new file mode 100644 index 00000000000..1e0acedc4f2 --- /dev/null +++ b/scripts/bash-autocomplete/.gitignore @@ -0,0 +1 @@ +cbmc.sh diff --git a/scripts/bash-autocomplete/Readme.md b/scripts/bash-autocomplete/Readme.md new file mode 100644 index 00000000000..00101d99159 --- /dev/null +++ b/scripts/bash-autocomplete/Readme.md @@ -0,0 +1,31 @@ +# CBMC Autocomplete Scripts for Bash +This directory contains an autocomplete script for bash. +## Installation +1. Compile cbmc and + +2. `cd scripts/bash-autocomplete` + +3. `./extract-switches.sh` + +4. Put the following at the end of you in your `~/.bashrc`, with the directories adapted to your directory structure: + ```bash + cbmcautocomplete=~/diffblue/cbmc/scripts/bash-autocomplete/cbmc.sh + if [ -f $cbmcautocomplete ]; then + . $cbmcautocomplete + fi + ``` +## Usage +As with the usual autocomplete in bash, start typing a switch to complete it, for example: +``` +cbmc --clas +``` +will complete to +``` +cbmc --classpath +``` + +## Features implemented + +* Completing all switches +* Completing values for `--cover`, `--mm` and `--arch` +* When completing a name of a file to analyze, only files with supported extensions are shown. diff --git a/scripts/bash-autocomplete/cbmc.sh.template b/scripts/bash-autocomplete/cbmc.sh.template new file mode 100644 index 00000000000..cbd64762b2d --- /dev/null +++ b/scripts/bash-autocomplete/cbmc.sh.template @@ -0,0 +1,43 @@ +#!/bin/bash +_cbmc_autocomplete() +{ + #list of all switches cbmc has. IMPORTANT: in the template file, this variable must be defined on line 5. + local switches="" + #word on which the cursor is + local cur=${COMP_WORDS[COMP_CWORD]} + #previous word (in case it is a switch with a parameter) + local prev=${COMP_WORDS[COMP_CWORD-1]} + + #check if the command before cursor is a switch that takes parameters, if yes, + #offer a choice of parameters + case "$prev" in + --cover) #for coverage we list the options explicitly + COMPREPLY=( $( compgen -W "assertion path branch location decision condition mcdc cover" -- $cur ) ) + return 0 + ;; + --mm) #for memory models we list the options explicitly + COMPREPLY=( $( compgen -W "sc tso pso" -- $cur ) ) + return 0 + ;; + --arch) #for architecture we list the options explicitly + COMPREPLY=( $( compgen -W "i386 x86_64" -- $cur ) ) + return 0 + ;; + -I|--classpath|-cp|--outfile|--existing-coverage|--graphml-cex) + #a switch that takes a file parameter of which we don't know an extension + #TODO probably we can do more for -I, --classpath, -cp + _filedir + return 0 + ;; + esac + + #complete a switch from a standard list, if the parameter under cursor starts with a hyphen + if [[ "$cur" == -* ]]; then + COMPREPLY=( $( compgen -W "$switches" -- $cur ) ) + return 0 + fi + + #if none of the above applies, offer directories and files that we can analyze + _filedir "@(class|jar|cpp|cc|c\+\+|ii|cxx|c|i|gb)" +} +complete -F _cbmc_autocomplete cbmc diff --git a/scripts/bash-autocomplete/extract_switches.sh b/scripts/bash-autocomplete/extract_switches.sh new file mode 100755 index 00000000000..e000b469bea --- /dev/null +++ b/scripts/bash-autocomplete/extract_switches.sh @@ -0,0 +1,48 @@ +#!/bin/bash +echo "Compiling the helper file to extract the raw list of parameters from cbmc" +g++ -c -MMD -MP -std=c++11 -Wall -I ../../src/ -ftrack-macro-expansion=0 -fno-diagnostics-show-caret switch_extractor_helper.c -o tmp.o 2> pragma.txt + +retval=$? + +#clean up compiled files, we don't need them. +rm tmp.o 2> /dev/null +rm tmp.d 2> /dev/null + +#check if compilation went fine +if [ $retval -ne 0 ]; then + echo "Problem compiling the helper file, parameter list not extracted." + exit 1; +fi + +echo "Converting the raw parameter list to the format required by autocomplete scripts" +rawstring=`sed "s/^.*pragma message: \(.*\)/\1/" pragma.txt` +#delete pragma file, we won't need it +rm pragma.txt 2> /dev/null + +#now the main bit, convert from raw format to a proper list of switches +cleanstring=`( + #extract 2-hyphen switches, such as --foo + #grep for '(foo)' expressions, and then use sed to remove parantheses and put '--' at the start + (echo $rawstring | grep -o "([^)]*)" | sed "s/^.\(.*\).$/--\1/") ; + #extract 1-hyphen switches, such as -F + #use sed to remove all (foo) expressions, then you're left with switches and ':', so grep the colons out and then use sed to include the '-' + (echo $rawstring | sed "s/([^)]*)//g" | grep -o "[a-zA-Z0-9]" | sed "s/\(.*\)/-\1/") + ) | tr '\n' ' '` + +#sanity check that there is only one line of output +if [ `echo $cleanstring | wc -l | awk '{print $1}'` -ne 1 ]; then + echo "Problem converting the parameter list to the correct format, I was expecting one line but either got 0 or >2. This is likely to be an error in this conversion script." + exit 1; +fi + +#sanity check that there are no dangerous characters +echo $cleanstring | grep -q "[^a-zA-Z0-9 -]" +if [ $? -eq 0 ]; then + echo "Problem converting the parameter list to the correct format, illegal characters detected. This is likely to be an error in this conversion script." + exit 1; +fi + +echo "Injecting the parameter list to the autocomplete file." +sed "5 s/.*/ local switches=\"$cleanstring\"/" cbmc.sh.template > cbmc.sh + +rm pragma.txt 2> /dev/null diff --git a/scripts/bash-autocomplete/switch_extractor_helper.c b/scripts/bash-autocomplete/switch_extractor_helper.c new file mode 100644 index 00000000000..4d9cc2e56fe --- /dev/null +++ b/scripts/bash-autocomplete/switch_extractor_helper.c @@ -0,0 +1,3 @@ +#include "cbmc/cbmc_parse_options.h" + +#pragma message CBMC_OPTIONS diff --git a/src/Makefile b/src/Makefile index 704a6754a45..7562863742e 100644 --- a/src/Makefile +++ b/src/Makefile @@ -84,4 +84,12 @@ glucose-download: @(cd ../glucose-syrup; patch -p1 < ../scripts/glucose-syrup-patch) @rm glucose-syrup.tgz -.PHONY: minisat2-download glucose-download +cprover-jar-build: + @echo "Building org.cprover.jar" + @(cd java_bytecode/library/; \ + mkdir -p target; \ + javac -d target/ `find src/ -name "*.java"`; \ + cd target; jar cf org.cprover.jar `find . -name "*.class"`; \ + mv org.cprover.jar ../../../) + +.PHONY: minisat2-download glucose-download cprover-jar-build diff --git a/src/ansi-c/ansi_c_language.cpp b/src/ansi-c/ansi_c_language.cpp index dc69515ede2..159b0d6df40 100644 --- a/src/ansi-c/ansi_c_language.cpp +++ b/src/ansi-c/ansi_c_language.cpp @@ -196,6 +196,8 @@ Function: ansi_c_languaget::final bool ansi_c_languaget::final(symbol_tablet &symbol_table) { + generate_opaque_method_stubs(symbol_table); + if(ansi_c_entry_point(symbol_table, "main", get_message_handler())) return true; diff --git a/src/ansi-c/expr2c.cpp b/src/ansi-c/expr2c.cpp index a6975bdab19..5de8c7688b0 100644 --- a/src/ansi-c/expr2c.cpp +++ b/src/ansi-c/expr2c.cpp @@ -518,28 +518,40 @@ std::string expr2ct::convert_rec( } else if(src.id()==ID_symbol) { - const typet &followed=ns.follow(src); + symbol_typet symbolic_type=to_symbol_type(src); + const irep_idt &typedef_identifer=symbolic_type.get(ID_typedef); - if(followed.id()==ID_struct) + // Providing we have a valid identifer, we can just use that rather than + // trying to find the concrete type + if(typedef_identifer!="") { - std::string dest=q+"struct"; - const irep_idt &tag=to_struct_type(followed).get_tag(); - if(tag!="") - dest+=" "+id2string(tag); - dest+=d; - return dest; + return q+id2string(typedef_identifer)+d; } - else if(followed.id()==ID_union) + else { - std::string dest=q+"union"; - const irep_idt &tag=to_union_type(followed).get_tag(); - if(tag!="") - dest+=" "+id2string(tag); - dest+=d; - return dest; + const typet &followed=ns.follow(src); + + if(followed.id()==ID_struct) + { + std::string dest=q+"struct"; + const irep_idt &tag=to_struct_type(followed).get_tag(); + if(tag!="") + dest+=" "+id2string(tag); + dest+=d; + return dest; + } + else if(followed.id()==ID_union) + { + std::string dest=q+"union"; + const irep_idt &tag=to_union_type(followed).get_tag(); + if(tag!="") + dest+=" "+id2string(tag); + dest+=d; + return dest; + } + else + return convert_rec(followed, new_qualifiers, declarator); } - else - return convert_rec(followed, new_qualifiers, declarator); } else if(src.id()==ID_struct_tag) { diff --git a/src/goto-instrument/cover.cpp b/src/goto-instrument/cover.cpp index fa92bc27cc7..2385c4204f3 100644 --- a/src/goto-instrument/cover.cpp +++ b/src/goto-instrument/cover.cpp @@ -10,8 +10,14 @@ Date: May 2016 #include #include +#include +#include #include +#include +#include + +#include #include "cover.h" @@ -28,6 +34,11 @@ class basic_blockst if(next_is_target || it->is_target()) block_count++; + const irep_idt &line=it->source_location.get_line(); + if(!line.empty()) + block_line_cover_map[block_count] + .insert(unsafe_string2unsigned(id2string(line))); + block_map[it]=block_count; if(!it->source_location.is_nil() && @@ -35,7 +46,26 @@ class basic_blockst source_location_map[block_count]=it->source_location; next_is_target= +#if 0 + // Disabled for being too messy it->is_goto() || it->is_function_call() || it->is_assume(); +#else + it->is_goto() || it->is_function_call(); +#endif + } + + // create list of covered lines as CSV string and set as property of source + // location of basic block, compress to ranges if applicable + format_number_ranget format_lines; + for(const auto &cover_set : block_line_cover_map) + { + assert(!cover_set.second.empty()); + std::vector + line_list{cover_set.second.begin(), cover_set.second.end()}; + + std::string covered_lines=format_lines(line_list); + source_location_map[cover_set.first] + .set_basic_block_covered_lines(covered_lines); } } @@ -47,6 +77,11 @@ class basic_blockst typedef std::map source_location_mapt; source_location_mapt source_location_map; + // map block numbers to set of line numbers + typedef std::map > + block_line_cover_mapt; + block_line_cover_mapt block_line_cover_map; + inline unsigned operator[](goto_programt::const_targett t) { return block_map[t]; @@ -66,6 +101,171 @@ class basic_blockst /*******************************************************************\ +Function: coverage_goalst::coverage_goalst + + Inputs: + + Outputs: + + Purpose: + +\*******************************************************************/ + +coverage_goalst::coverage_goalst() +{ + no_trivial_tests=false; +} + +/*******************************************************************\ + +Function: coverage_goalst::get_coverage + + Inputs: + + Outputs: + + Purpose: + +\*******************************************************************/ + +coverage_goalst coverage_goalst::get_coverage_goals(const std::string &coverage, + message_handlert &message_handler) +{ + jsont json; + coverage_goalst goals; + source_locationt source_location; + goals.set_no_trivial_tests(false); + + // check coverage file + if(parse_json(coverage, message_handler, json)) + { + messaget message(message_handler); + message.error() << coverage << " file is not a valid json file" + << messaget::eom; + exit(6); + } + + // make sure that we have an array of elements + if(!json.is_array()) + { + messaget message(message_handler); + message.error() << "expecting an array in the " << coverage + << " file, but got " + << json << messaget::eom; + exit(6); + } + + irep_idt file, function, line; + for(jsont::arrayt::const_iterator + it=json.array.begin(); + it!=json.array.end(); + it++) + { + // get the file of each existing goal + file=(*it)["file"].value; + source_location.set_file(file); + + // get the function of each existing goal + function=(*it)["function"].value; + source_location.set_function(function); + + // get the lines array + if((*it)["lines"].is_array()) + { + for(const jsont & entry : (*it)["lines"].array) + { + // get the line of each existing goal + line=entry["number"].value; + source_location.set_line(line); + goals.set_goals(source_location); + } + } + } + return goals; +} + +/*******************************************************************\ + +Function: coverage_goalst::set_goals + + Inputs: + + Outputs: + + Purpose: + +\*******************************************************************/ + +void coverage_goalst::set_goals(source_locationt goal) +{ + existing_goals.push_back(goal); +} + +/*******************************************************************\ + +Function: coverage_goalst::is_existing_goal + + Inputs: + + Outputs: + + Purpose: + +\*******************************************************************/ + +bool coverage_goalst::is_existing_goal(source_locationt source_location) +{ + std::vector::iterator it = existing_goals.begin(); + while(it!=existing_goals.end()) + { + if(!source_location.get_file().compare(it->get_file()) && + !source_location.get_function().compare(it->get_function()) && + !source_location.get_line().compare(it->get_line())) + break; + ++it; + } + if(it == existing_goals.end()) + return true; + else + return false; +} + +/*******************************************************************\ + +Function: coverage_goalst::set_no_trivial_tests + + Inputs: + + Outputs: + + Purpose: + +\*******************************************************************/ + +void coverage_goalst::set_no_trivial_tests(const bool trivial) +{ + no_trivial_tests=trivial; +} + +/*******************************************************************\ + +Function: coverage_goalst::get_no_trivial_tests + + Inputs: + + Outputs: + + Purpose: + +\*******************************************************************/ + +const bool coverage_goalst::get_no_trivial_tests() +{ + return no_trivial_tests; +} + +/*******************************************************************\ + Function: as_string Inputs: @@ -414,7 +614,7 @@ std::set collect_mcdc_controlling_nested( const std::set &decisions) { // To obtain the 1st-level controlling conditions - std::set controlling = collect_mcdc_controlling(decisions); + std::set controlling=collect_mcdc_controlling(decisions); std::set result; // For each controlling condition, to check if it contains @@ -627,7 +827,7 @@ void remove_repetition(std::set &exprs) **/ for(auto &y : new_exprs) { - bool iden = true; + bool iden=true; for(auto &c : conditions) { std::set signs1=sign_of_expr(c, x); @@ -669,7 +869,7 @@ void remove_repetition(std::set &exprs) } // update the original ''exprs'' - exprs = new_exprs; + exprs=new_exprs; } /*******************************************************************\ @@ -838,7 +1038,8 @@ bool is_mcdc_pair( if(diff_count==1) return true; - else return false; + else + return false; } /*******************************************************************\ @@ -963,7 +1164,8 @@ void minimize_mcdc_controlling( { controlling=new_controlling; } - else break; + else + break; } } @@ -1074,7 +1276,8 @@ Function: instrument_cover_goals void instrument_cover_goals( const symbol_tablet &symbol_table, goto_programt &goto_program, - coverage_criteriont criterion) + coverage_criteriont criterion, + coverage_goalst &goals) { const namespacet ns(symbol_table); basic_blockst basic_blocks(goto_program); @@ -1142,7 +1345,9 @@ void instrument_cover_goals( source_locationt source_location= basic_blocks.source_location_map[block_nr]; - if(!source_location.get_file().empty() && + // check whether the current goal already exists + if(goals.is_existing_goal(source_location) && + !source_location.get_file().empty() && source_location.get_file()[0]!='<') { std::string comment="block "+b; @@ -1297,9 +1502,12 @@ void instrument_cover_goals( const std::set decisions=collect_decisions(i_it); std::set both; - std::set_union(conditions.begin(), conditions.end(), - decisions.begin(), decisions.end(), - inserter(both, both.end())); + std::set_union( + conditions.begin(), + conditions.end(), + decisions.begin(), + decisions.end(), + inserter(both, both.end())); const source_locationt source_location=i_it->source_location; @@ -1386,6 +1594,38 @@ Function: instrument_cover_goals \*******************************************************************/ +void instrument_cover_goals( + const symbol_tablet &symbol_table, + goto_functionst &goto_functions, + coverage_criteriont criterion, + coverage_goalst &goals) +{ + Forall_goto_functions(f_it, goto_functions) + { + if(f_it->first==ID__start || + f_it->first==CPROVER_PREFIX "initialize") + continue; + + instrument_cover_goals( + symbol_table, + f_it->second.body, + criterion, + goals); + } +} + +/*******************************************************************\ + +Function: instrument_cover_goals + + Inputs: + + Outputs: + + Purpose: + +\*******************************************************************/ + void instrument_cover_goals( const symbol_tablet &symbol_table, goto_functionst &goto_functions, @@ -1397,6 +1637,53 @@ void instrument_cover_goals( f_it->first=="__CPROVER_initialize") continue; - instrument_cover_goals(symbol_table, f_it->second.body, criterion); + // empty set of existing goals + coverage_goalst goals; + instrument_cover_goals(symbol_table, f_it->second.body, + criterion, goals); } } + +/*******************************************************************\ + +Function: consider_goals + + Inputs: + + Outputs: + + Purpose: + +\*******************************************************************/ + +bool consider_goals( + const goto_programt &goto_program, + coverage_goalst &goals) +{ + // check whether we should eliminate trivial goals + if(!goals.get_no_trivial_tests()) + return true; + + bool result; + unsigned long count_assignments=0, count_goto=0, count_decl=0; + + forall_goto_program_instructions(i_it, goto_program) + { + if(i_it->is_goto()) + ++count_goto; + else if(i_it->is_assign()) + ++count_assignments; + else if(i_it->is_decl()) + ++count_decl; + } + + // check whether this is a constructor/destructor or a get/set (pattern) + if(!count_goto && !count_assignments && !count_decl) + result=false; + else + result = !((count_decl==0) && (count_goto<=1) && + (count_assignments>0 && count_assignments<5)); + + return result; +} + diff --git a/src/goto-instrument/cover.h b/src/goto-instrument/cover.h index e897c88b753..07ced79178c 100644 --- a/src/goto-instrument/cover.h +++ b/src/goto-instrument/cover.h @@ -13,15 +13,42 @@ Date: May 2016 #include +class coverage_goalst +{ +public: + coverage_goalst(); + static coverage_goalst get_coverage_goals(const std::string &coverage, + message_handlert &message_handler); + void set_goals(source_locationt goal); + bool is_existing_goal(source_locationt source_location); + void set_no_trivial_tests(const bool trivial); + const bool get_no_trivial_tests(); + +private: + std::vector existing_goals; + bool no_trivial_tests; +}; + enum class coverage_criteriont { LOCATION, BRANCH, DECISION, CONDITION, PATH, MCDC, ASSERTION, COVER }; +bool consider_goals( + const goto_programt &goto_program, + coverage_goalst &goals); + void instrument_cover_goals( const symbol_tablet &symbol_table, goto_programt &goto_program, - coverage_criteriont); + coverage_criteriont, + coverage_goalst &goals); + +void instrument_cover_goals( + const symbol_tablet &symbol_table, + goto_functionst &goto_functions, + coverage_criteriont, + coverage_goalst &goals); void instrument_cover_goals( const symbol_tablet &symbol_table, diff --git a/src/goto-instrument/dump_c.cpp b/src/goto-instrument/dump_c.cpp index d06a79e01d8..24005138109 100644 --- a/src/goto-instrument/dump_c.cpp +++ b/src/goto-instrument/dump_c.cpp @@ -163,7 +163,8 @@ void dump_ct::operator()(std::ostream &os) } // we don't want to dump in full all definitions - if(!tag_added && ignore(symbol)) + if(!tag_added && + system_symbols.is_symbol_internal_symbol(symbol, system_headers)) continue; if(!symbols_sorted.insert(name_str).second) @@ -344,7 +345,7 @@ void dump_ct::convert_compound( ns.lookup(to_symbol_type(type).get_identifier()); assert(symbol.is_type); - if(!ignore(symbol)) + if(!system_symbols.is_symbol_internal_symbol(symbol, system_headers)) convert_compound(symbol.type, unresolved, recursive, os); } else if(type.id()==ID_c_enum_tag) @@ -353,7 +354,7 @@ void dump_ct::convert_compound( ns.lookup(to_c_enum_tag_type(type).get_identifier()); assert(symbol.is_type); - if(!ignore(symbol)) + if(!system_symbols.is_symbol_internal_symbol(symbol, system_headers)) convert_compound(symbol.type, unresolved, recursive, os); } else if(type.id()==ID_array || type.id()==ID_pointer) @@ -610,259 +611,6 @@ void dump_ct::convert_compound_enum( /*******************************************************************\ -Function: dump_ct::init_system_library_map - -Inputs: - -Outputs: - -Purpose: - -\*******************************************************************/ - -#define ADD_TO_SYSTEM_LIBRARY(v, header) \ - for(size_t i=0; isecond); - return true; - } - - return false; -} - -/*******************************************************************\ - Function: dump_ct::cleanup_decl Inputs: diff --git a/src/goto-instrument/dump_c_class.h b/src/goto-instrument/dump_c_class.h index 6bb7b79638e..c8c77da5102 100644 --- a/src/goto-instrument/dump_c_class.h +++ b/src/goto-instrument/dump_c_class.h @@ -15,6 +15,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include class dump_ct { @@ -30,7 +31,7 @@ class dump_ct language(factory()) { if(use_system_headers) - init_system_library_map(); + system_symbols=system_library_symbolst(); } virtual ~dump_ct() @@ -51,21 +52,15 @@ class dump_ct std::set system_headers; - typedef std::unordered_map - system_library_mapt; - system_library_mapt system_library_map; + system_library_symbolst system_symbols; typedef std::unordered_map declared_enum_constants_mapt; declared_enum_constants_mapt declared_enum_constants; - void init_system_library_map(); - std::string type_to_string(const typet &type); std::string expr_to_string(const exprt &expr); - bool ignore(const symbolt &symbol); - static std::string indent(const unsigned n) { return std::string(2*n, ' '); diff --git a/src/goto-instrument/goto_instrument_parse_options.cpp b/src/goto-instrument/goto_instrument_parse_options.cpp index b5486355277..f747f87f3dc 100644 --- a/src/goto-instrument/goto_instrument_parse_options.cpp +++ b/src/goto-instrument/goto_instrument_parse_options.cpp @@ -562,7 +562,7 @@ int goto_instrument_parse_optionst::doit() if(cmdline.isset("interpreter")) { status() << "Starting interpreter" << eom; - interpreter(symbol_table, goto_functions); + interpreter(symbol_table, goto_functions, get_message_handler()); return 0; } diff --git a/src/goto-programs/Makefile b/src/goto-programs/Makefile index d5cc5ea25eb..1b303c759b9 100644 --- a/src/goto-programs/Makefile +++ b/src/goto-programs/Makefile @@ -20,7 +20,11 @@ SRC = goto_convert.cpp goto_convert_function_call.cpp \ slice_global_inits.cpp goto_inline_class.cpp class_identifier.cpp \ show_goto_functions_json.cpp \ show_goto_functions_xml.cpp \ - remove_static_init_loops.cpp remove_instanceof.cpp + remove_static_init_loops.cpp \ + remove_instanceof.cpp \ + class_identifier.cpp \ + system_library_symbols.cpp \ + INCLUDES= -I .. diff --git a/src/goto-programs/goto_inline.cpp b/src/goto-programs/goto_inline.cpp index efe049ec146..3c7359edebd 100644 --- a/src/goto-programs/goto_inline.cpp +++ b/src/goto-programs/goto_inline.cpp @@ -213,7 +213,12 @@ void goto_partial_inline( // called function const goto_functiont &goto_function=f_it->second; - if(!goto_function.body_available()) + // We can't take functions without bodies to find functions + // inside them to be inlined. + // We also don't allow for the _start function to have any of its + // function calls to be inlined + if(!goto_function.body_available() || + f_it->first==ID__start) continue; const goto_programt &goto_program=goto_function.body; diff --git a/src/goto-programs/goto_trace.cpp b/src/goto-programs/goto_trace.cpp index ad8bdcdfdd9..4179afb5395 100644 --- a/src/goto-programs/goto_trace.cpp +++ b/src/goto-programs/goto_trace.cpp @@ -65,6 +65,7 @@ void goto_trace_stept::output( case goto_trace_stept::ASSIGNMENT: out << "ASSIGNMENT"; break; case goto_trace_stept::GOTO: out << "GOTO"; break; case goto_trace_stept::DECL: out << "DECL"; break; + case goto_trace_stept::DEAD: out << "DEAD"; break; case goto_trace_stept::OUTPUT: out << "OUTPUT"; break; case goto_trace_stept::INPUT: out << "INPUT"; break; case goto_trace_stept::ATOMIC_BEGIN: out << "ATOMC_BEGIN"; break; @@ -73,7 +74,9 @@ void goto_trace_stept::output( case goto_trace_stept::SHARED_WRITE: out << "SHARED WRITE"; break; case goto_trace_stept::FUNCTION_CALL: out << "FUNCTION CALL"; break; case goto_trace_stept::FUNCTION_RETURN: out << "FUNCTION RETURN"; break; - default: assert(false); + default: + out << "unknown type: " << type << std::endl; + assert(false); } if(type==ASSERT || type==ASSUME || type==GOTO) diff --git a/src/goto-programs/goto_trace.h b/src/goto-programs/goto_trace.h index 507d538ada0..a669fac5e6b 100644 --- a/src/goto-programs/goto_trace.h +++ b/src/goto-programs/goto_trace.h @@ -154,6 +154,13 @@ class goto_tracet steps.push_back(step); } + // retrieves the final step in the trace for manipulation + // (used to fill a trace from code, hence non-const) + inline goto_trace_stept &get_last_step() + { + return steps.back(); + } + // delete all steps after (not including) s void trim_after(stepst::iterator s) { diff --git a/src/goto-programs/interpreter.cpp b/src/goto-programs/interpreter.cpp index 2b4841a7c22..8d1565db914 100644 --- a/src/goto-programs/interpreter.cpp +++ b/src/goto-programs/interpreter.cpp @@ -9,13 +9,23 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include +#include #include +#include #include #include +#include +#include +#include +#include +#include #include "interpreter.h" #include "interpreter_class.h" +#include "remove_returns.h" + +const size_t interpretert::npos=std::numeric_limits::max(); /*******************************************************************\ @@ -30,9 +40,46 @@ Function: interpretert::operator() \*******************************************************************/ void interpretert::operator()() +{ + status() << "0- Initialize:" << eom; + initialize(true); + try + { + status() << "Type h for help\n" << eom; + + while(!done) + command(); + + status() << total_steps << "- Program End.\n" << eom; + } + catch (const char *e) + { + error() << e << "\n" << eom; + } + + while(!done) + command(); +} + +/*******************************************************************\ + +Function: interpretert::initialize + + Inputs: + + Outputs: + + Purpose: Initializes the memory map of the interpreter and + [optionally] runs up to the entry point (thus doing + the cprover initialization) + +\*******************************************************************/ + +void interpretert::initialize(bool init) { build_memory_map(); + total_steps=0; const goto_functionst::function_mapt::const_iterator main_it=goto_functions.function_map.find(goto_functionst::entry_point()); @@ -44,17 +91,27 @@ void interpretert::operator()() if(!goto_function.body_available()) throw "main has no body"; - PC=goto_function.body.instructions.begin(); + pc=goto_function.body.instructions.begin(); function=main_it; done=false; - - while(!done) + if(init) { + stack_depth=call_stack.size()+1; show_state(); - command(); - if(!done) + step(); + while(!done && (stack_depth<=call_stack.size()) && (stack_depth!=npos)) + { + show_state(); + step(); + } + while(!done && (call_stack.size()==0)) + { + show_state(); step(); + } + clear_input_flags(); + input_vars.clear(); } } @@ -66,26 +123,31 @@ Function: interpretert::show_state Outputs: - Purpose: + Purpose: displays the current position of the pc and corresponding + code \*******************************************************************/ void interpretert::show_state() { - std::cout << std::endl; - std::cout << "----------------------------------------------------" - << std::endl; + if(!show) + return; + status() << "\n" + << total_steps+1 + << " ----------------------------------------------------\n"; - if(PC==function->second.body.instructions.end()) + if(pc==function->second.body.instructions.end()) { - std::cout << "End of function `" - << function->first << "'" << std::endl; + status() << "End of function `" << function->first << "'\n"; } else function->second.body.output_instruction( - ns, function->first, std::cout, PC); + ns, + function->first, + status(), + pc); - std::cout << std::endl; + status() << eom; } /*******************************************************************\ @@ -96,7 +158,7 @@ Function: interpretert::command Outputs: - Purpose: + Purpose: reads a user command and executes it. \*******************************************************************/ @@ -111,9 +173,97 @@ void interpretert::command() } char ch=tolower(command[0]); - if(ch=='q') done=true; + else if(ch=='h') + { + status() + << "Interpreter help\n" + << "h: display this menu\n" + << "j: output json trace\n" + << "m: output memory dump\n" + << "o: output goto trace\n" + << "q: quit\n" + << "r: run until completion\n" + << "s#: step a number of instructions\n" + << "sa: step across a function\n" + << "so: step out of a function\n" + << eom; + } + else if(ch=='j') + { + jsont json_steps; + convert(ns, steps, json_steps); + ch=tolower(command[1]); + if(ch==' ') + { + std::ofstream file; + file.open(command+2); + if(file.is_open()) + { + json_steps.output(file); + file.close(); + return; + } + } + json_steps.output(result()); + } + else if(ch=='m') + { + ch=tolower(command[1]); + print_memory(ch=='i'); + } + else if(ch=='o') + { + ch=tolower(command[1]); + if(ch==' ') + { + std::ofstream file; + file.open(command+2); + if(file.is_open()) + { + steps.output(ns, file); + file.close(); + return; + } + } + steps.output(ns, result()); + } + else if(ch=='r') + { + ch=tolower(command[1]); + initialize(ch!='0'); + } + else if((ch=='s') || (ch==0)) + { + num_steps=1; + stack_depth=npos; + ch=tolower(command[1]); + if(ch=='e') + num_steps=npos; + else if(ch=='o') + stack_depth=call_stack.size(); + else if(ch=='a') + stack_depth=call_stack.size()+1; + else + { + num_steps=atoi(command+1); + if(num_steps==0) + num_steps=1; + } + while(!done && ((num_steps==npos) || ((num_steps--)>0))) + { + step(); + show_state(); + } + while(!done && (stack_depth<=call_stack.size()) && (stack_depth!=npos)) + { + step(); + show_state(); + } + return; + } + show_state(); } /*******************************************************************\ @@ -124,41 +274,50 @@ Function: interpretert::step Outputs: - Purpose: + Purpose: executes a single step and updates the program counter \*******************************************************************/ void interpretert::step() { - if(PC==function->second.body.instructions.end()) + total_steps++; + if(pc==function->second.body.instructions.end()) { if(call_stack.empty()) done=true; else { - PC=call_stack.top().return_PC; + pc=call_stack.top().return_pc; function=call_stack.top().return_function; - stack_pointer=call_stack.top().old_stack_pointer; + // TODO: this increases memory size quite quickly. + // Should check alternatives call_stack.pop(); } return; } - next_PC=PC; - next_PC++; + next_pc=pc; + next_pc++; - switch(PC->type) + steps.add_step(goto_trace_stept()); + goto_trace_stept &trace_step=steps.get_last_step(); + trace_step.thread_nr=thread_id; + trace_step.pc=pc; + switch(pc->type) { case GOTO: + trace_step.type=goto_trace_stept::GOTO; execute_goto(); break; case ASSUME: + trace_step.type=goto_trace_stept::ASSUME; execute_assume(); break; case ASSERT: + trace_step.type=goto_trace_stept::ASSERT; execute_assert(); break; @@ -167,38 +326,46 @@ void interpretert::step() break; case DECL: + trace_step.type=goto_trace_stept::DECL; execute_decl(); break; case SKIP: case LOCATION: + trace_step.type=goto_trace_stept::LOCATION; + break; case END_FUNCTION: + trace_step.type=goto_trace_stept::FUNCTION_RETURN; break; case RETURN: + trace_step.type=goto_trace_stept::FUNCTION_RETURN; if(call_stack.empty()) throw "RETURN without call"; // NOLINT(readability/throw) - if(PC->code.operands().size()==1 && + if(pc->code.operands().size()==1 && call_stack.top().return_value_address!=0) { - std::vector rhs; - evaluate(PC->code.op0(), rhs); + mp_vectort rhs; + evaluate(pc->code.op0(), rhs); assign(call_stack.top().return_value_address, rhs); } - next_PC=function->second.body.instructions.end(); + next_pc=function->second.body.instructions.end(); break; case ASSIGN: + trace_step.type=goto_trace_stept::ASSIGNMENT; execute_assign(); break; case FUNCTION_CALL: + trace_step.type=goto_trace_stept::FUNCTION_CALL; execute_function_call(); break; case START_THREAD: + trace_step.type=goto_trace_stept::SPAWN; throw "START_THREAD not yet implemented"; // NOLINT(readability/throw) case END_THREAD: @@ -206,19 +373,44 @@ void interpretert::step() break; case ATOMIC_BEGIN: + trace_step.type=goto_trace_stept::ATOMIC_BEGIN; throw "ATOMIC_BEGIN not yet implemented"; // NOLINT(readability/throw) case ATOMIC_END: + trace_step.type=goto_trace_stept::ATOMIC_END; throw "ATOMIC_END not yet implemented"; // NOLINT(readability/throw) case DEAD: - throw "DEAD not yet implemented"; // NOLINT(readability/throw) - + trace_step.type=goto_trace_stept::DEAD; + break; + case THROW: + trace_step.type=goto_trace_stept::GOTO; + while(!done && (pc->type!=CATCH)) + { + if(pc==function->second.body.instructions.end()) + { + if(call_stack.empty()) + done=true; + else + { + pc=call_stack.top().return_pc; + function=call_stack.top().return_function; + call_stack.pop(); + } + } + else + { + next_pc=pc; + next_pc++; + } + } + break; + case CATCH: + break; default: throw "encountered instruction with undefined instruction type"; } - - PC=next_PC; + pc=next_pc; } /*******************************************************************\ @@ -229,21 +421,21 @@ Function: interpretert::execute_goto Outputs: - Purpose: + Purpose: executes a goto instruction \*******************************************************************/ void interpretert::execute_goto() { - if(evaluate_boolean(PC->guard)) + if(evaluate_boolean(pc->guard)) { - if(PC->targets.empty()) + if(pc->targets.empty()) throw "taken goto without target"; - if(PC->targets.size()>=2) + if(pc->targets.size()>=2) throw "non-deterministic goto encountered"; - next_PC=PC->targets.front(); + next_pc=pc->targets.front(); } } @@ -255,19 +447,37 @@ Function: interpretert::execute_other Outputs: - Purpose: + Purpose: executes side effects of 'other' instructions \*******************************************************************/ void interpretert::execute_other() { - const irep_idt &statement=PC->code.get_statement(); - + const irep_idt &statement=pc->code.get_statement(); if(statement==ID_expression) { - assert(PC->code.operands().size()==1); - std::vector rhs; - evaluate(PC->code.op0(), rhs); + assert(pc->code.operands().size()==1); + mp_vectort rhs; + evaluate(pc->code.op0(), rhs); + } + else if(statement==ID_array_set) + { + mp_vectort tmp, rhs; + evaluate(pc->code.op1(), tmp); + mp_integer address=evaluate_address(pc->code.op0()); + size_t size=get_size(pc->code.op0().type()); + while(rhs.size()code.get_statement()==ID_decl); + assert(pc->code.get_statement()==ID_decl); +} + +/*******************************************************************\ + +Function: interpretert::get_component_id + + Inputs: an object and a memory offset + + Outputs: + + Purpose: retrieves the member at offset + +\*******************************************************************/ + +irep_idt interpretert::get_component_id( + const irep_idt &object, + unsigned offset) +{ + const symbolt &symbol=ns.lookup(object); + const typet real_type=ns.follow(symbol.type); + if(real_type.id()!=ID_struct) + throw "request for member of non-struct"; + + const struct_typet &struct_type=to_struct_type(real_type); + const struct_typet::componentst &components=struct_type.components(); + for(struct_typet::componentst::const_iterator it=components.begin(); + it!=components.end(); it++) + { + if(offset<=0) + return it->id(); + size_t size=get_size(it->type()); + offset-=size; + } + return object; +} + +/*******************************************************************\ + +Function: interpretert::get_type + + Inputs: + + Outputs: + + Purpose: returns the type object corresponding to id + +\*******************************************************************/ + +typet interpretert::get_type(const irep_idt &id) const +{ + dynamic_typest::const_iterator it=dynamic_types.find(id); + if(it==dynamic_types.end()) + return symbol_table.lookup(id).type; + return it->second; +} + +/*******************************************************************\ + +Function: interpretert::get_value + + Inputs: + + Outputs: + + Purpose: retrives the constant value at memory location offset + as an object of type type + +\*******************************************************************/ + +exprt interpretert::get_value( + const typet &type, + std::size_t offset, + bool use_non_det) +{ + const typet real_type=ns.follow(type); + if(real_type.id()==ID_struct) + { + exprt result=struct_exprt(real_type); + const struct_typet &struct_type=to_struct_type(real_type); + const struct_typet::componentst &components=struct_type.components(); + + // Retrieve the values for the individual members + result.reserve_operands(components.size()); + for(struct_typet::componentst::const_iterator it=components.begin(); + it!=components.end(); it++) + { + size_t size=get_size(it->type()); + const exprt operand=get_value(it->type(), offset); + offset+=size; + result.copy_to_operands(operand); + } + return result; + } + else if(real_type.id()==ID_array) + { + // Get size of array + exprt result=array_exprt(to_array_type(real_type)); + const exprt &size_expr=static_cast(type.find(ID_size)); + size_t subtype_size=get_size(type.subtype()); + mp_integer mp_count; + to_integer(size_expr, mp_count); + unsigned count=integer2unsigned(mp_count); + + // Retrieve the value for each member in the array + result.reserve_operands(count); + for(unsigned i=0; i=0)) + return side_effect_expr_nondett(type); + mp_vectort rhs; + rhs.push_back(memory[offset].value); + return get_value(type, rhs); +} + +/*******************************************************************\ + + Function: interpretert::get_value + + Inputs: + + Outputs: + + Purpose: returns the value at offset in the form of type given a + memory buffer rhs which is typically a structured type + +\*******************************************************************/ + +exprt interpretert::get_value( + const typet &type, + mp_vectort &rhs, + std::size_t offset) +{ + const typet real_type=ns.follow(type); + assert(!rhs.empty()); + + if(real_type.id()==ID_struct) + { + exprt result=struct_exprt(real_type); + const struct_typet &struct_type=to_struct_type(real_type); + const struct_typet::componentst &components=struct_type.components(); + + // Retrieve the values for the individual members + result.reserve_operands(components.size()); + for(const struct_union_typet::componentt &expr : components) + { + size_t size=get_size(expr.type()); + const exprt operand=get_value(expr.type(), rhs, offset); + offset+=size; + result.copy_to_operands(operand); + } + return result; + } + else if(real_type.id()==ID_array) + { + exprt result(ID_constant, type); + const exprt &size_expr=static_cast(type.find(ID_size)); + + // Get size of array + size_t subtype_size=get_size(type.subtype()); + mp_integer mp_count; + to_integer(size_expr, mp_count); + unsigned count=integer2unsigned(mp_count); + + // Retrieve the value for each member in the array + result.reserve_operands(count); + for(unsigned i=0; i object count " << memory.size() << eom; + throw "interpreter: reading from invalid pointer"; + } + else if(real_type.id()==ID_string) + { + // Strings are currently encoded by their irep_idt ID. + return constant_exprt( + irep_idt(rhs[offset].to_long(), 0), + type); + } + // Retrieve value of basic data type + return from_integer(rhs[offset], type); } /*******************************************************************\ @@ -298,29 +754,56 @@ Function: interpretert::execute_assign Outputs: - Purpose: + Purpose: executes the assign statement at the current pc value \*******************************************************************/ void interpretert::execute_assign() { const code_assignt &code_assign= - to_code_assign(PC->code); + to_code_assign(pc->code); - std::vector rhs; + mp_vectort rhs; evaluate(code_assign.rhs(), rhs); if(!rhs.empty()) { mp_integer address=evaluate_address(code_assign.lhs()); - unsigned size=get_size(code_assign.lhs().type()); + size_t size=get_size(code_assign.lhs().type()); if(size!=rhs.size()) - std::cout << "!! failed to obtain rhs (" - << rhs.size() << " vs. " - << size << ")" << std::endl; + error() << "!! failed to obtain rhs (" + << rhs.size() << " vs. " + << size << ")\n" + << eom; else + { + goto_trace_stept &trace_step=steps.get_last_step(); assign(address, rhs); + trace_step.full_lhs=code_assign.lhs(); + + // TODO: need to look at other cases on ssa_exprt + // (dereference should be handled on ssa) + if(ssa_exprt::can_build_identifier(trace_step.full_lhs)) + { + trace_step.lhs_object=ssa_exprt(trace_step.full_lhs); + } + trace_step.full_lhs_value=get_value(trace_step.full_lhs.type(), rhs); + trace_step.lhs_object_value=trace_step.full_lhs_value; + } + } + else if(code_assign.rhs().id()==ID_side_effect) + { + side_effect_exprt side_effect=to_side_effect_expr(code_assign.rhs()); + if(side_effect.get_statement()==ID_nondet) + { + unsigned address=integer2unsigned(evaluate_address(code_assign.lhs())); + size_t size=get_size(code_assign.lhs().type()); + for(size_t i=0; i &rhs) + const mp_vectort &rhs) { - for(unsigned i=0; iguard)) + if(!evaluate_boolean(pc->guard)) throw "assumption failed"; } @@ -384,8 +875,14 @@ Function: interpretert::execute_assert void interpretert::execute_assert() { - if(!evaluate_boolean(PC->guard)) - throw "assertion failed"; + if(!evaluate_boolean(pc->guard)) + { + if((target_assert==pc) || stop_on_assertion) + throw "program assertion reached"; + else if(show) + error() << "assertion failed at " << pc->location_number + << "\n" << eom; + } } /*******************************************************************\ @@ -403,7 +900,7 @@ Function: interpretert::execute_function_call void interpretert::execute_function_call() { const code_function_callt &function_call= - to_code_function_call(PC->code); + to_code_function_call(pc->code); // function to be called mp_integer a=evaluate_address(function_call.function()); @@ -413,8 +910,12 @@ void interpretert::execute_function_call() else if(a>=memory.size()) throw "out-of-range function call"; + // Retrieve the empty last trace step struct we pushed for this step + // of the interpreter run to fill it with the corresponding data + goto_trace_stept &trace_step=steps.get_last_step(); const memory_cellt &cell=memory[integer2size_t(a)]; const irep_idt &identifier=cell.identifier; + trace_step.identifier=identifier; const goto_functionst::function_mapt::const_iterator f_it= goto_functions.function_map.find(identifier); @@ -432,7 +933,7 @@ void interpretert::execute_function_call() return_value_address=0; // values of the arguments - std::vector > argument_values; + std::vector argument_values; argument_values.resize(function_call.arguments().size()); @@ -446,7 +947,7 @@ void interpretert::execute_function_call() call_stack.push(stack_framet()); stack_framet &frame=call_stack.top(); - frame.return_PC=next_PC; + frame.return_pc=next_pc; frame.return_function=function; frame.old_stack_pointer=stack_pointer; frame.return_value_address=return_value_address; @@ -458,24 +959,7 @@ void interpretert::execute_function_call() for(const auto &id : locals) { const symbolt &symbol=ns.lookup(id); - unsigned size=get_size(symbol.type); - - if(size!=0) - { - frame.local_map[id]=stack_pointer; - - for(unsigned i=0; i=memory.size()) - memory.resize(address+1); - memory[address].value=0; - memory[address].identifier=id; - memory[address].offset=i; - } - - stack_pointer+=size; - } + frame.local_map[id]=integer2unsigned(build_memory_map(id, symbol.type)); } // assign the arguments @@ -485,7 +969,7 @@ void interpretert::execute_function_call() if(argument_values.size()second.body.instructions.begin(); + next_pc=f_it->second.body.instructions.begin(); } else - throw "no body for "+id2string(identifier); + { + list_input_varst::iterator it= + function_input_vars.find(function_call.function().get(ID_identifier)); + if(it!=function_input_vars.end()) + { + mp_vectort value; + assert(!it->second.empty()); + assert(!it->second.front().return_assignments.empty()); + evaluate(it->second.front().return_assignments.back().value, value); + if(return_value_address>0) + { + assign(return_value_address, value); + } + it->second.pop_front(); + return; + } + if(show) + error() << "no body for "+id2string(identifier) << eom; + } } /*******************************************************************\ @@ -510,7 +1012,7 @@ Function: interpretert::build_memory_map Outputs: - Purpose: + Purpose: Creates a memory map of all static symbols in the program \*******************************************************************/ @@ -520,6 +1022,10 @@ void interpretert::build_memory_map() memory.resize(1); memory[0].offset=0; memory[0].identifier="NULL-OBJECT"; + memory[0].initialized=0; + + num_dynamic_objects=0; + dynamic_types.clear(); // now do regular static symbols for(const auto &s : symbol_table.symbols) @@ -543,7 +1049,7 @@ Function: interpretert::build_memory_map void interpretert::build_memory_map(const symbolt &symbol) { - unsigned size=0; + size_t size=0; if(symbol.type.id()==ID_code) { @@ -560,29 +1066,120 @@ void interpretert::build_memory_map(const symbolt &symbol) memory.resize(address+size); memory_map[symbol.name]=address; - for(unsigned i=0; i(type.find(ID_size)); + mp_vectort computed_size; + evaluate(size_expr, computed_size); + if(computed_size.size()==1 && + computed_size[0]>=0) + { + result() << "Concretized array with size " << computed_size[0] + << eom; + return array_typet(type.subtype(), + constant_exprt::integer_constant(computed_size[0].to_ulong())); + } + else + { + error() << "Failed to concretize variable array" + << eom; + } + } + return type; +} + +/*******************************************************************\ + +Function: interpretert::build_memory_map + + Inputs: + + Outputs: Updtaes the memory map to include variable id if it does + not exist + + Purpose: Populates dynamic entries of the memory map + +\*******************************************************************/ + +mp_integer interpretert::build_memory_map( + const irep_idt &id, + const typet &type) +{ + typet alloc_type=concretize_type(type); + size_t size=get_size(alloc_type); + auto it=dynamic_types.find(id); + + if(it!=dynamic_types.end()) + { + unsigned offset=1; + unsigned address=memory_map[id]; + while(memory[address+offset].offset>0) offset++; + + // current size <= size already recorded + if(size<=offset) + return memory_map[id]; + } + + // The current size is bigger then the one previously recorded + // in the memory map + + if(size==0) + size=1; // This is a hack to create existence + + unsigned address=memory.size(); + memory.resize(address+size); + memory_map[id]=address; + dynamic_types.insert(std::pair(id, alloc_type)); + + for(size_t i=0; i(type.find(ID_size)); - unsigned subtype_size=get_size(type.subtype()); + size_t subtype_size=get_size(type.subtype()); - mp_integer i; - if(!to_integer(size_expr, i)) - return subtype_size*integer2unsigned(i); - else - return subtype_size; + mp_vectort i; + evaluate(size_expr, i); + if(i.size()==1) + { + // Go via the binary representation to reproduce any + // overflow behaviour. + exprt size_const=from_integer(i[0], size_expr.type()); + mp_integer size_mp; + assert(!to_integer(size_const, size_mp)); + return subtype_size*integer2unsigned(size_mp); + } + return subtype_size; } else if(type.id()==ID_symbol) { return get_size(ns.follow(type)); } + return 1; +} + +/******************************************************************* \ + +Function: interpretert::get_value + + Inputs: + + Outputs: + + Purpose: + +\*******************************************************************/ + +exprt interpretert::get_value(const irep_idt &id) +{ + // The dynamic type and the static symbol type may differ for VLAs, + // where the symbol carries a size expression and the dynamic type + // registry contains its actual length. + auto findit=dynamic_types.find(id); + typet get_type; + if(findit!=dynamic_types.end()) + get_type=findit->second; else - return 1; + get_type=symbol_table.lookup(id).type; + + symbol_exprt symbol_expr(id, get_type); + mp_integer whole_lhs_object_address=evaluate_address(symbol_expr); + + return get_value(get_type, integer2unsigned(whole_lhs_object_address)); } /*******************************************************************\ @@ -652,8 +1285,39 @@ Function: interpreter void interpreter( const symbol_tablet &symbol_table, - const goto_functionst &goto_functions) + const goto_functionst &goto_functions, + message_handlert &message_handler) { - interpretert interpreter(symbol_table, goto_functions); + interpretert interpreter( + symbol_table, + goto_functions, + message_handler); interpreter(); } + +/*******************************************************************\ + +Function: interpretert::print_memory + + Inputs: + + Outputs: + + Purpose: Prints the current state of the memory map + Since messaget mdofifies class members, print functions are nonconst + +\*******************************************************************/ + +void interpretert::print_memory(bool input_flags) +{ + for(size_t i=0; i(cell.initialized) << ")" + << eom; + debug() << eom; + } +} diff --git a/src/goto-programs/interpreter.h b/src/goto-programs/interpreter.h index bf909502793..1888a009969 100644 --- a/src/goto-programs/interpreter.h +++ b/src/goto-programs/interpreter.h @@ -9,10 +9,13 @@ Author: Daniel Kroening, kroening@kroening.com #ifndef CPROVER_GOTO_PROGRAMS_INTERPRETER_H #define CPROVER_GOTO_PROGRAMS_INTERPRETER_H +#include + #include "goto_functions.h" void interpreter( const symbol_tablet &symbol_table, - const goto_functionst &goto_functions); + const goto_functionst &goto_functions, + message_handlert &message_handler); #endif // CPROVER_GOTO_PROGRAMS_INTERPRETER_H diff --git a/src/goto-programs/interpreter_class.h b/src/goto-programs/interpreter_class.h index 14c429a1a86..83836eea072 100644 --- a/src/goto-programs/interpreter_class.h +++ b/src/goto-programs/interpreter_class.h @@ -14,24 +14,93 @@ Author: Daniel Kroening, kroening@kroening.com #include #include "goto_functions.h" +#include "goto_trace.h" +#include "json_goto_trace.h" +#include "util/message.h" -class interpretert +class interpretert:public messaget { public: interpretert( const symbol_tablet &_symbol_table, - const goto_functionst &_goto_functions): + const goto_functionst &_goto_functions, + message_handlert &_message_handler): + messaget(_message_handler), symbol_table(_symbol_table), ns(_symbol_table), - goto_functions(_goto_functions) + goto_functions(_goto_functions), + stop_on_assertion(false) { + show=true; } void operator()(); + void print_memory(bool input_flags); + + // An assertion that identifier 'id' carries value in some particular context. + // Used to record parameter (id) assignment (value) lists for function calls. + struct function_assignmentt + { + irep_idt id; + exprt value; + }; + + // A list of such assignments. + typedef std::vector function_assignmentst; + + typedef std::vector mp_vectort; + + // Maps an assignment id to the name of the parameter being assigned + typedef std::pair assignment_idt; + + // Identifies an expression before and after some change; + typedef std::pair diff_pairt; + + // Records a diff between an assignment pre and post conditions + typedef std::map side_effects_differencet; + + // A entry in the input_valuest map + typedef std::pair input_entryt; + + // List of the input variables with their corresponding initial values + typedef std::map input_valuest; + + // List of dynamically allocated symbols that are not in the symbol table + typedef std::map dynamic_typest; + + // An assignment list annotated with the calling context. + struct function_assignments_contextt + { + irep_idt calling_function; + function_assignmentst return_assignments; + function_assignmentst param_assignments; + }; + + // list_input_varst maps function identifiers onto a vector of [name = value] + // assignments per call to that function. + typedef std::list + function_assignments_contextst; + typedef std::map > + list_input_varst; + + const dynamic_typest &get_dynamic_types() { return dynamic_types; } protected: + struct trace_stack_entryt + { + irep_idt func_name; + mp_integer this_address; + irep_idt capture_symbol; + bool is_super_call; + std::vector param_values; + }; + typedef std::vector trace_stackt; + const symbol_tablet &symbol_table; + + // This is a cache so that we don't have to create it when a call needs it const namespacet ns; + const goto_functionst &goto_functions; typedef std::unordered_map memory_mapt; @@ -43,16 +112,40 @@ class interpretert irep_idt identifier; unsigned offset; mp_integer value; + // Initialized is annotated even during reads + // Set to 1 when written-before-read, -1 when read-before-written + mutable char initialized; }; typedef std::vector memoryt; - memoryt memory; + typedef std::map parameter_sett; + // mapping -> value + typedef std::pair struct_member_idt; + typedef std::map struct_valuest; + + + // The memory is being annotated/reshaped even during reads + // (ie to find a read-before-write location) thus memory + // properties need to be mutable to avoid making all calls nonconst + mutable memoryt memory; std::size_t stack_pointer; void build_memory_map(); void build_memory_map(const symbolt &symbol); - unsigned get_size(const typet &type) const; + mp_integer build_memory_map(const irep_idt &id, const typet &type); + typet concretize_type(const typet &type); + size_t get_size(const typet &type); + + irep_idt get_component_id(const irep_idt &object, unsigned offset); + typet get_type(const irep_idt &id) const; + exprt get_value( + const typet &type, + std::size_t offset=0, + bool use_non_det=false); + exprt get_value(const typet &type, mp_vectort &rhs, std::size_t offset=0); + exprt get_value(const irep_idt &id); + void step(); void execute_assert(); @@ -62,21 +155,26 @@ class interpretert void execute_function_call(); void execute_other(); void execute_decl(); + void clear_input_flags(); + + void allocate( + mp_integer address, + size_t size); void assign( mp_integer address, - const std::vector &rhs); + const mp_vectort &rhs); void read( mp_integer address, - std::vector &dest) const; + mp_vectort &dest) const; - void command(); + virtual void command(); class stack_framet { public: - goto_programt::const_targett return_PC; + goto_programt::const_targett return_pc; goto_functionst::function_mapt::const_iterator return_function; mp_integer return_value_address; memory_mapt local_map; @@ -84,27 +182,56 @@ class interpretert }; typedef std::stack call_stackt; + call_stackt call_stack; + input_valuest input_vars; + list_input_varst function_input_vars; goto_functionst::function_mapt::const_iterator function; - goto_programt::const_targett PC, next_PC; + goto_programt::const_targett pc, next_pc, target_assert; + goto_tracet steps; bool done; - - bool evaluate_boolean(const exprt &expr) const + bool show; + bool stop_on_assertion; + static const size_t npos; + size_t num_steps; + size_t total_steps; + + dynamic_typest dynamic_types; + int num_dynamic_objects; + size_t stack_depth; + int thread_id; + + bool evaluate_boolean(const exprt &expr) { - std::vector v; + mp_vectort v; evaluate(expr, v); if(v.size()!=1) throw "invalid boolean value"; return v.front()!=0; } + bool count_type_leaves( + const typet &source_type, + mp_integer &result); + + bool byte_offset_to_memory_offset( + const typet &source_type, + mp_integer byte_offset, + mp_integer &result); + + bool memory_offset_to_byte_offset( + const typet &source_type, + mp_integer cell_offset, + mp_integer &result); + void evaluate( const exprt &expr, - std::vector &dest) const; + mp_vectort &dest); - mp_integer evaluate_address(const exprt &expr) const; + mp_integer evaluate_address(const exprt &expr, bool fail_quietly=false); + void initialize(bool init); void show_state(); }; diff --git a/src/goto-programs/interpreter_evaluate.cpp b/src/goto-programs/interpreter_evaluate.cpp index a9d62740cc3..712d4b9d807 100644 --- a/src/goto-programs/interpreter_evaluate.cpp +++ b/src/goto-programs/interpreter_evaluate.cpp @@ -8,42 +8,323 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include #include #include #include +#include +#include #include "interpreter_class.h" /*******************************************************************\ -Function: interpretert::evaluate +Function: interpretert::read Inputs: Outputs: - Purpose: + Purpose: reads a memory address and loads it into the dest variable + marks cell as read before written if cell has never been written \*******************************************************************/ void interpretert::read( mp_integer address, - std::vector &dest) const + mp_vectort &dest) const { // copy memory region - for(auto &d : dest) + for(size_t i=0; i0) + cell.initialized=0; + } +} + +/*******************************************************************\ + +Function: interpretert::count_type_leaves + + Inputs: Type + + Outputs: Number of leaf primitive types; returns true on error + + Purpose: + +\*******************************************************************/ + +bool interpretert::count_type_leaves(const typet &ty, mp_integer &result) +{ + if(ty.id()==ID_struct) + { + result=0; + mp_integer subtype_count; + for(const auto &component : to_struct_type(ty).components()) + { + if(count_type_leaves(component.type(), subtype_count)) + return true; + result+=subtype_count; + } + return false; + } + else if(ty.id()==ID_array) + { + const auto &at=to_array_type(ty); + mp_vectort array_size_vec; + evaluate(at.size(), array_size_vec); + if(array_size_vec.size()!=1) + return true; + mp_integer subtype_count; + if(count_type_leaves(at.subtype(), subtype_count)) + return true; + result=array_size_vec[0]*subtype_count; + return false; + } + else + { + result=1; + return false; + } +} + +/*******************************************************************\ + +Function: interpretert::byte_offset_to_memory_offset + + Inputs: 'source_type', 'offset' (unit: bytes), + + Outputs: Offset into a vector of interpreter values; returns true on error + + Purpose: Supposing the caller has an mp_vector representing + a value with type 'source_type', this yields the offset into that + vector at which to find a value at *byte* address 'offset'. + We need this because the interpreter's memory map uses unlabelled + variable-width values -- for example, a C value { { 1, 2 }, 3, 4 } + of type struct { int x[2]; char y; unsigned long z; } + would be represented [1,2,3,4], with the source type needed alongside + to figure out which member is targeted by a byte-extract operation. + +\*******************************************************************/ + +bool interpretert::byte_offset_to_memory_offset( + const typet &source_type, + mp_integer offset, + mp_integer &result) +{ + if(source_type.id()==ID_struct) + { + const auto &st=to_struct_type(source_type); + const struct_typet::componentst &components=st.components(); + member_offset_iterator component_offsets(st, ns); + mp_integer previous_member_offsets=0; + for(; component_offsets->firstsecond!=-1 && + component_offsets->second<=offset; + ++component_offsets) + { + const auto &component_type=components[component_offsets->first].type(); + mp_integer component_byte_size=pointer_offset_size(component_type, ns); + if(component_byte_size<0) + return true; + if((component_offsets->second+component_byte_size)>offset) + { + mp_integer subtype_result; + bool ret=byte_offset_to_memory_offset( + component_type, + offset-(component_offsets->second), + subtype_result); + result=previous_member_offsets+subtype_result; + return ret; + } + else + { + mp_integer component_count; + if(count_type_leaves(component_type, component_count)) + return true; + previous_member_offsets+=component_count; + } + } + // Ran out of struct members, or struct contained a variable-length field. + return true; + } + else if(source_type.id()==ID_array) + { + const auto &at=to_array_type(source_type); + mp_vectort array_size_vec; + evaluate(at.size(), array_size_vec); + if(array_size_vec.size()!=1) + return true; + mp_integer array_size=array_size_vec[0]; + mp_integer elem_size_bytes=pointer_offset_size(at.subtype(), ns); + if(elem_size_bytes<=0) + return true; + mp_integer elem_size_leaves; + if(count_type_leaves(at.subtype(), elem_size_leaves)) + return true; + mp_integer this_idx=offset/elem_size_bytes; + if(this_idx>=array_size_vec[0]) + return true; + mp_integer subtype_result; + bool ret=byte_offset_to_memory_offset( + at.subtype(), + offset%elem_size_bytes, + subtype_result); + result=subtype_result+(elem_size_leaves*this_idx); + return ret; + } + else + { + result=0; + // Can't currently subdivide a primitive. + return offset!=0; + } +} + +/*******************************************************************\ + +Function: interpretert::memory_offset_to_byte_offset - ++address; + Inputs: An interpreter memory offset and the type to interpret that memory + + Outputs: The corresponding byte offset. Returns true on error + + Purpose: Similarly to the above, the interpreter's memory objects contain + mp_integers that represent variable-sized struct members. This + counts the size of type leaves to determine the byte offset + corresponding to a memory offset. + +\*******************************************************************/ + +bool interpretert::memory_offset_to_byte_offset( + const typet &source_type, + mp_integer cell_offset, + mp_integer &result) +{ + if(source_type.id()==ID_struct) + { + const auto &st=to_struct_type(source_type); + const struct_typet::componentst &components=st.components(); + member_offset_iterator offsets(st, ns); + mp_integer previous_member_sizes; + for(; offsets->firstsecond!=-1; ++offsets) + { + const auto &component_type=components[offsets->first].type(); + mp_integer component_count; + if(count_type_leaves(component_type, component_count)) + return true; + if(component_count>cell_offset) + { + mp_integer subtype_result; + bool ret=memory_offset_to_byte_offset( + component_type, + cell_offset, + subtype_result); + result=previous_member_sizes+subtype_result; + return ret; + } + else + { + cell_offset-=component_count; + mp_integer component_size=pointer_offset_size(component_type, ns); + if(component_size<0) + return true; + previous_member_sizes+=component_size; + } + } + // Ran out of members, or member of indefinite size + return true; + } + else if(source_type.id()==ID_array) + { + const auto &at=to_array_type(source_type); + mp_vectort array_size_vec; + evaluate(at.size(), array_size_vec); + if(array_size_vec.size()!=1) + return true; + mp_integer elem_size=pointer_offset_size(at.subtype(), ns); + if(elem_size==-1) + return true; + mp_integer elem_count; + if(count_type_leaves(at.subtype(), elem_count)) + return true; + mp_integer this_idx=cell_offset/elem_count; + if(this_idx>=array_size_vec[0]) + return true; + mp_integer subtype_result; + bool ret= + memory_offset_to_byte_offset(at.subtype(), + cell_offset%elem_count, + subtype_result); + result=subtype_result+(elem_size*this_idx); + return ret; + } + else + { + // Primitive type. + result=0; + return cell_offset!=0; } } @@ -61,12 +342,55 @@ Function: interpretert::evaluate void interpretert::evaluate( const exprt &expr, - std::vector &dest) const + mp_vectort &dest) { if(expr.id()==ID_constant) { if(expr.type().id()==ID_struct) { + dest.reserve(get_size(expr.type())); + bool error=false; + + forall_operands(it, expr) + { + if(it->type().id()==ID_code) + continue; + + size_t sub_size=get_size(it->type()); + if(sub_size==0) + continue; + + mp_vectort tmp; + evaluate(*it, tmp); + + if(tmp.size()==sub_size) + { + for(size_t i=0; itype().id()==ID_code) continue; - unsigned sub_size=get_size(it->type()); + size_t sub_size=get_size(it->type()); if(sub_size==0) continue; - std::vector tmp; + mp_vectort tmp; evaluate(*it, tmp); if(tmp.size()==sub_size) { - for(unsigned i=0; i tmp0, tmp1; + mp_vectort tmp0, tmp1; evaluate(expr.op0(), tmp0); evaluate(expr.op1(), tmp1); @@ -172,7 +691,7 @@ void interpretert::evaluate( forall_operands(it, expr) { - std::vector tmp; + mp_vectort tmp; evaluate(*it, tmp); if(tmp.size()==1 && tmp.front()!=0) @@ -191,20 +710,20 @@ void interpretert::evaluate( if(expr.operands().size()!=3) throw "if expects three operands"; - std::vector tmp0, tmp1, tmp2; + mp_vectort tmp0, tmp1; evaluate(expr.op0(), tmp0); - evaluate(expr.op1(), tmp1); - evaluate(expr.op2(), tmp2); - if(tmp0.size()==1 && tmp1.size()==1 && tmp2.size()==1) + if(tmp0.size()==1) { - const mp_integer &op0=tmp0.front(); - const mp_integer &op1=tmp1.front(); - const mp_integer &op2=tmp2.front(); - - dest.push_back(op0!=0?op1:op2); + if(tmp0.front()!=0) + evaluate(expr.op1(), tmp1); + else + evaluate(expr.op2(), tmp1); } + if(tmp1.size()==1) + dest.push_back(tmp1.front()); + return; } else if(expr.id()==ID_and) @@ -216,7 +735,7 @@ void interpretert::evaluate( forall_operands(it, expr) { - std::vector tmp; + mp_vectort tmp; evaluate(*it, tmp); if(tmp.size()==1 && tmp.front()==0) @@ -235,7 +754,7 @@ void interpretert::evaluate( if(expr.operands().size()!=1) throw id2string(expr.id())+" expects one operand"; - std::vector tmp; + mp_vectort tmp; evaluate(expr.op0(), tmp); if(tmp.size()==1) @@ -249,7 +768,7 @@ void interpretert::evaluate( forall_operands(it, expr) { - std::vector tmp; + mp_vectort tmp; evaluate(*it, tmp); if(tmp.size()==1) result+=tmp.front(); @@ -281,7 +800,7 @@ void interpretert::evaluate( forall_operands(it, expr) { - std::vector tmp; + mp_vectort tmp; evaluate(*it, tmp); if(tmp.size()==1) { @@ -317,7 +836,7 @@ void interpretert::evaluate( if(expr.operands().size()!=2) throw "- expects two operands"; - std::vector tmp0, tmp1; + mp_vectort tmp0, tmp1; evaluate(expr.op0(), tmp0); evaluate(expr.op1(), tmp1); @@ -330,7 +849,7 @@ void interpretert::evaluate( if(expr.operands().size()!=2) throw "/ expects two operands"; - std::vector tmp0, tmp1; + mp_vectort tmp0, tmp1; evaluate(expr.op0(), tmp0); evaluate(expr.op1(), tmp1); @@ -343,7 +862,7 @@ void interpretert::evaluate( if(expr.operands().size()!=1) throw "unary- expects one operand"; - std::vector tmp0; + mp_vectort tmp0; evaluate(expr.op0(), tmp0); if(tmp0.size()==1) @@ -358,22 +877,130 @@ void interpretert::evaluate( dest.push_back(evaluate_address(expr.op0())); return; } + else if(expr.id()==ID_pointer_offset) + { + if(expr.operands().size()!=1) + throw "pointer_offset expects one operand"; + if(expr.op0().type().id()!=ID_pointer) + throw "pointer_offset expects a pointer operand"; + mp_vectort result; + evaluate(expr.op0(), result); + if(result.size()==1) + { + // Return the distance, in bytes, between the address returned + // and the beginning of the underlying object. + mp_integer address=result[0]; + if(address>0 && address0) + { + dest.insert(dest.end(), + extract_from.begin()+memory_offset.to_long(), + extract_from.begin()+(memory_offset+target_type_leaves).to_long()); + return; + } + } + } + } else if(expr.id()==ID_dereference || expr.id()==ID_index || expr.id()==ID_symbol || expr.id()==ID_member) { - mp_integer a=evaluate_address(expr); - dest.resize(get_size(expr.type())); - read(a, dest); - return; + mp_integer address=evaluate_address( + expr, + true); // fail quietly + if(address.is_zero() && expr.id()==ID_index) + { + // Try reading from a constant array: + mp_vectort idx; + evaluate(expr.op1(), idx); + if(idx.size()==1) + { + mp_integer read_from_index=idx[0]; + if(expr.op0().id()==ID_array) + { + const auto &ops=expr.op0().operands(); + assert(read_from_index.is_long()); + if(read_from_index>=0 && read_from_index tmp; + mp_vectort tmp; evaluate(expr.op0(), tmp); if(tmp.size()==1) @@ -399,31 +1026,38 @@ void interpretert::evaluate( dest.push_back(binary2integer(s, false)); return; } - else if(expr.type().id()==ID_bool) + else if((expr.type().id()==ID_bool) || (expr.type().id()==ID_c_bool)) { dest.push_back(value!=0); return; } } } - else if(expr.id()==ID_ashr) + else if((expr.id()==ID_array) || (expr.id()==ID_array_of)) { - if(expr.operands().size()!=2) - throw "ashr expects two operands"; - - std::vector tmp0, tmp1; - evaluate(expr.op0(), tmp0); - evaluate(expr.op1(), tmp1); - - if(tmp0.size()==1 && tmp1.size()==1) - dest.push_back(tmp0.front()/power(2, tmp1.front())); - + forall_operands(it, expr) + { + evaluate(*it, dest); + } return; } - - std::cout << "!! failed to evaluate expression: " - << from_expr(ns, function->first, expr) - << std::endl; + else if(expr.id()==ID_nil) + { + dest.push_back(0); + return; + } + else if(expr.id()==ID_infinity) + { + if(expr.type().id()==ID_signedbv) + { + error() << "Infinite size arrays not supported" << eom; + return; + } + } + error() << "!! failed to evaluate expression: " + << from_expr(ns, function->first, expr) << "\n" + << expr.id() << "[" << expr.type().id() << "]" + << eom; } /*******************************************************************\ @@ -438,11 +1072,16 @@ Function: interpretert::evaluate_address \*******************************************************************/ -mp_integer interpretert::evaluate_address(const exprt &expr) const +mp_integer interpretert::evaluate_address( + const exprt &expr, + bool fail_quietly) { if(expr.id()==ID_symbol) { - const irep_idt &identifier=expr.get(ID_identifier); + const irep_idt &identifier= + is_ssa_expr(expr) ? + to_ssa_expr(expr).get_original_name() : + expr.get(ID_identifier); interpretert::memory_mapt::const_iterator m_it1= memory_map.find(identifier); @@ -458,13 +1097,16 @@ mp_integer interpretert::evaluate_address(const exprt &expr) const if(m_it2!=call_stack.top().local_map.end()) return m_it2->second; } + mp_integer address=memory.size(); + build_memory_map(to_symbol_expr(expr).get_identifier(), expr.type()); + return address; } else if(expr.id()==ID_dereference) { if(expr.operands().size()!=1) throw "dereference expects one operand"; - std::vector tmp0; + mp_vectort tmp0; evaluate(expr.op0(), tmp0); if(tmp0.size()==1) @@ -475,11 +1117,15 @@ mp_integer interpretert::evaluate_address(const exprt &expr) const if(expr.operands().size()!=2) throw "index expects two operands"; - std::vector tmp1; + mp_vectort tmp1; evaluate(expr.op1(), tmp1); if(tmp1.size()==1) - return evaluate_address(expr.op0())+tmp1.front(); + { + auto base=evaluate_address(expr.op0(), fail_quietly); + if(!base.is_zero()) + return base+tmp1.front(); + } } else if(expr.id()==ID_member) { @@ -502,12 +1148,44 @@ mp_integer interpretert::evaluate_address(const exprt &expr) const offset+=get_size(comp.type()); } - return evaluate_address(expr.op0())+offset; + auto base=evaluate_address(expr.op0(), fail_quietly); + if(!base.is_zero()) + return base+offset; } - - std::cout << "!! failed to evaluate address: " + else if(expr.id()==ID_byte_extract_little_endian || + expr.id()==ID_byte_extract_big_endian) + { + if(expr.operands().size()!=2) + throw "byte_extract should have two operands"; + mp_vectort extract_offset; + evaluate(expr.op1(), extract_offset); + mp_vectort extract_from; + evaluate(expr.op0(), extract_from); + if(extract_offset.size()==1 && !extract_from.empty()) + { + mp_integer memory_offset; + if(!byte_offset_to_memory_offset(expr.op0().type(), + extract_offset[0], memory_offset)) + return evaluate_address(expr.op0(), fail_quietly)+memory_offset; + } + } + else if(expr.id()==ID_if) + { + mp_vectort result; + if_exprt address_cond( + expr.op0(), + address_of_exprt(expr.op1()), + address_of_exprt(expr.op2())); + evaluate(address_cond, result); + if(result.size()==1) + return result[0]; + } + if(!fail_quietly) + { + error() << "!! failed to evaluate address: " << from_expr(ns, function->first, expr) - << std::endl; + << eom; + } return 0; } diff --git a/src/goto-programs/show_properties.cpp b/src/goto-programs/show_properties.cpp index c7a6bdedf3f..cbbb00225c7 100644 --- a/src/goto-programs/show_properties.cpp +++ b/src/goto-programs/show_properties.cpp @@ -130,6 +130,10 @@ void show_properties_json( json_properties.push_back(jsont()).make_object(); json_property["name"]=json_stringt(id2string(property_id)); json_property["class"]=json_stringt(id2string(property_class)); + if(!source_location.get_basic_block_covered_lines().empty()) + json_property["coveredLines"]= + json_stringt( + id2string(source_location.get_basic_block_covered_lines())); json_property["sourceLocation"]=json(source_location); json_property["description"]=json_stringt(id2string(description)); json_property["expression"]= diff --git a/src/goto-programs/string_refine_preprocess.cpp b/src/goto-programs/string_refine_preprocess.cpp new file mode 100644 index 00000000000..fe19c6fb0d9 --- /dev/null +++ b/src/goto-programs/string_refine_preprocess.cpp @@ -0,0 +1,1405 @@ +/*******************************************************************\ + +Module: Preprocess a goto-programs so that calls to the java String + library are recognized by the string solver + +Author: Romain Brenguier + +Date: September 2016 + +\*******************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "string_refine_preprocess.h" + +/*******************************************************************\ + +Function: string_refine_preprocesst::check_java_type + + Inputs: a type and a string + + Outputs: Boolean telling whether the type is a struct with the given + tag or a symbolic type with the tag prefixed by "java::" + +\*******************************************************************/ + +bool string_refine_preprocesst::check_java_type( + const typet &type, const std::string &tag) +{ + if(type.id()==ID_symbol) + { + irep_idt tag_id=to_symbol_type(type).get_identifier(); + return tag_id=="java::"+tag; + } + else if(type.id()==ID_struct) + { + irep_idt tag_id=to_struct_type(type).get_tag(); + return tag_id==tag; + } + return false; +} +/*******************************************************************\ + +Function: string_refine_preprocesst::is_java_string_pointer_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of java string pointers + +\*******************************************************************/ + +bool string_refine_preprocesst::is_java_string_pointer_type(const typet &type) +{ + if(type.id()==ID_pointer) + { + const pointer_typet &pt=to_pointer_type(type); + const typet &subtype=pt.subtype(); + return is_java_string_type(subtype); + } + return false; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::is_java_string_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of java string + +\*******************************************************************/ + +bool string_refine_preprocesst::is_java_string_type(const typet &type) +{ + return check_java_type(type, "java.lang.String"); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::is_java_string_builder_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of java string builder + +\*******************************************************************/ + +bool string_refine_preprocesst::is_java_string_builder_type(const typet &type) +{ + return check_java_type(type, "java.lang.StringBuilder"); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::is_java_string_builder_pointer_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of java StringBuilder + pointers + +\*******************************************************************/ + +bool string_refine_preprocesst::is_java_string_builder_pointer_type( + const typet &type) +{ + if(type.id()==ID_pointer) + { + const pointer_typet &pt=to_pointer_type(type); + const typet &subtype=pt.subtype(); + return is_java_string_builder_type(subtype); + } + return false; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::is_java_char_sequence_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of java char sequence + +\*******************************************************************/ + +bool string_refine_preprocesst::is_java_char_sequence_type(const typet &type) +{ + return check_java_type(type, "java.lang.CharSequence"); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::is_java_char_sequence_pointer_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of a pointer + to a java char sequence + +\*******************************************************************/ + +bool string_refine_preprocesst::is_java_char_sequence_pointer_type( + const typet &type) +{ + if(type.id()==ID_pointer) + { + const pointer_typet &pt=to_pointer_type(type); + const typet &subtype=pt.subtype(); + return is_java_char_sequence_type(subtype); + } + return false; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::is_java_char_array_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of java char array + +\*******************************************************************/ + +bool string_refine_preprocesst::is_java_char_array_type(const typet &type) +{ + return check_java_type(type, "array[char]"); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::is_java_char_array_pointer_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of a pointer + to a java char array + +\*******************************************************************/ + +bool string_refine_preprocesst::is_java_char_array_pointer_type( + const typet &type) +{ + if(type.id()==ID_pointer) + { + const pointer_typet &pt=to_pointer_type(type); + const typet &subtype=pt.subtype(); + return is_java_char_array_type(subtype); + } + return false; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::fresh_array + + Inputs: + type - an array type + location - a location in the program + + Outputs: a new symbol + + Purpose: add a symbol with static lifetime and name containing + `cprover_string_array` and given type + +\*******************************************************************/ + +symbol_exprt string_refine_preprocesst::fresh_array( + const typet &type, const source_locationt &location) +{ + symbolt array_symbol=get_fresh_aux_symbol( + type, + "cprover_string_array", + "cprover_string_array", + location, + ID_java, + symbol_table); + array_symbol.is_static_lifetime=true; + return array_symbol.symbol_expr(); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::fresh_string + + Inputs: + type - a type for refined strings + location - a location in the program + + Outputs: a new symbol + + Purpose: add a symbol with static lifetime and name containing + `cprover_string` and given type + +\*******************************************************************/ + +symbol_exprt string_refine_preprocesst::fresh_string( + const typet &type, const source_locationt &location) +{ + symbolt array_symbol=get_fresh_aux_symbol( + type, "cprover_string", "cprover_string", location, ID_java, symbol_table); + array_symbol.is_static_lifetime=true; + return array_symbol.symbol_expr(); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::declare_function + + Inputs: a name and a type + + Purpose: declare a function with the given name and type + +\*******************************************************************/ + +void string_refine_preprocesst::declare_function( + irep_idt function_name, const typet &type) +{ + auxiliary_symbolt func_symbol; + func_symbol.base_name=function_name; + func_symbol.is_static_lifetime=false; + func_symbol.mode=ID_java; + func_symbol.name=function_name; + func_symbol.type=type; + symbol_table.add(func_symbol); + goto_functions.function_map[function_name]; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::get_data_and_length_type_of_char_array + + Inputs: an expression, a reference to a data type and a reference to a + length type + + Purpose: assuming the expression is a char array, figure out what + the types for length and data are and put them into the references + given as argument + +\*******************************************************************/ + +void string_refine_preprocesst::get_data_and_length_type_of_char_array( + const exprt &expr, typet &data_type, typet &length_type) +{ + typet object_type=ns.follow(expr.type()); + assert(object_type.id()==ID_struct); + const struct_typet &struct_type=to_struct_type(object_type); + for(auto component : struct_type.components()) + if(component.get_name()=="length") + length_type=component.type(); + else if(component.get_name()=="data") + data_type=component.type(); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::get_data_and_length_type_of_string + + Inputs: an expression, a reference to a data type and a reference to a + length type + + Purpose: assuming the expression is a java string, figure out what + the types for length and data are and put them into the references + given as argument + +\*******************************************************************/ + +void string_refine_preprocesst::get_data_and_length_type_of_string( + const exprt &expr, typet &data_type, typet &length_type) +{ + assert(is_java_string_type(expr.type()) || + is_java_string_builder_type(expr.type()) || + is_java_char_sequence_type(expr.type())); + typet object_type=ns.follow(expr.type()); + const struct_typet &struct_type=to_struct_type(object_type); + for(const auto &component : struct_type.components()) + if(component.get_name()=="length") + length_type=component.type(); + else if(component.get_name()=="data") + data_type=component.type(); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::make_cprover_string_assign + + Inputs: a goto_program, a position in this program, an expression and a + location + + Outputs: an expression + + Purpose: Introduce a temporary variable for cprover strings; + returns the cprover_string corresponding to rhs if it is a string + pointer and the original rhs otherwise. + +\*******************************************************************/ + +exprt string_refine_preprocesst::make_cprover_string_assign( + goto_programt &goto_program, + goto_programt::targett &target, + const exprt &rhs, + const source_locationt &location) +{ + if(implements_java_char_sequence(rhs.type())) + { + // We do the following assignments: + // 1 length= *(rhs->length) + // 2 cprover_string_array = *(rhs->data) + // 3 cprover_string = { length; cprover_string_array } + + dereference_exprt deref(rhs, rhs.type().subtype()); + typet data_type, length_type; + get_data_and_length_type_of_string(deref, data_type, length_type); + std::list assignments; + + // 1) cprover_string_length= *(rhs->length) + symbolt sym_length=get_fresh_aux_symbol( + length_type, + "cprover_string_length", + "cprover_string_length", + location, + ID_java, + symbol_table); + symbol_exprt cprover_length=sym_length.symbol_expr(); + member_exprt length(deref, "length", length_type); + assignments.emplace_back(cprover_length, length); + + // 2) cprover_string_array = *(rhs->data) + symbol_exprt array_lhs=fresh_array(data_type.subtype(), location); + member_exprt data(deref, "data", data_type); + dereference_exprt deref_data(data, data_type.subtype()); + assignments.emplace_back(array_lhs, deref_data); + + // 3) cprover_string = { cprover_string_length; cprover_string_array } + // This assignment is useful for finding witnessing strings for counter + // examples + refined_string_typet ref_type(length_type, java_char_type()); + string_exprt new_rhs(cprover_length, array_lhs, ref_type); + + symbol_exprt lhs=fresh_string(new_rhs.type(), location); + assignments.emplace_back(lhs, new_rhs); + + insert_assignments( + goto_program, target, target->function, location, assignments); + target=goto_program.insert_after(target); + return new_rhs; + } + else if(rhs.id()==ID_typecast && + implements_java_char_sequence(rhs.op0().type())) + { + exprt new_rhs=make_cprover_string_assign( + goto_program, target, rhs.op0(), location); + return typecast_exprt(new_rhs, rhs.type()); + } + else + return rhs; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::make_cprover_char_array_assign + + Inputs: a goto_program, a position in this program, an expression of + type char array pointer and a location + + Outputs: a string expression + + Purpose: Introduce a temporary variable for cprover strings; + returns the cprover_string corresponding to rhs + +\*******************************************************************/ + +string_exprt string_refine_preprocesst::make_cprover_char_array_assign( + goto_programt &goto_program, + goto_programt::targett &target, + const exprt &rhs, + const source_locationt &location) +{ + assert(is_java_char_array_pointer_type(rhs.type())); + + // We do the following assignments: + // deref=*(rhs->data) + // array= typecast(&deref); + // string={ rhs->length; array } + + dereference_exprt deref(rhs, rhs.type().subtype()); + typet length_type, data_type; + get_data_and_length_type_of_char_array(deref, data_type, length_type); + assert(data_type.id()==ID_pointer); + typet char_type=to_pointer_type(data_type).subtype(); + + refined_string_typet ref_type(length_type, java_char_type()); + typet content_type=ref_type.get_content_type(); + std::list assignments; + + // deref=*(rhs->data) + member_exprt array_rhs(deref, "data", data_type); + dereference_exprt deref_array(array_rhs, data_type.subtype()); + symbolt sym_lhs_deref=get_fresh_aux_symbol( + data_type.subtype(), + "char_array_assign$deref", + "char_array_assign$deref", + location, + ID_java, + symbol_table); + symbol_exprt lhs_deref=sym_lhs_deref.symbol_expr(); + assignments.emplace_back(lhs_deref, deref_array); + + // array=convert_pointer_to_char_array(*rhs->data) + declare_function(ID_cprover_string_array_of_char_pointer_func, content_type); + function_application_exprt fun_app(symbol_exprt( + ID_cprover_string_array_of_char_pointer_func), content_type); + fun_app.arguments().push_back(deref_array); + symbol_exprt array=fresh_array(content_type, location); + assignments.emplace_back(array, fun_app); + + // string={ rhs->length; string_array } + string_exprt new_rhs(get_length(deref, length_type), array, ref_type); + symbol_exprt lhs=fresh_string(ref_type, location); + assignments.emplace_back(lhs, new_rhs); + + insert_assignments( + goto_program, target, target->function, location, assignments); + target=goto_program.insert_after(target); + return new_rhs; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::make_normal_assign + + Inputs: a goto_program, a position in this program, an expression lhs, + a function type, a function name, a vector of arguments, a location + and a signature + + Purpose: replace the current instruction by: + > lhs=function_name(arguments) : return_type @ location + If given, signature can force String conversion of given arguments. + The convention for signature is one character by argument + and 'S' denotes string. + +\*******************************************************************/ + +void string_refine_preprocesst::make_normal_assign( + goto_programt &goto_program, + goto_programt::targett target, + const exprt &lhs, + const code_typet &function_type, + const irep_idt &function_name, + const exprt::operandst &arguments, + const source_locationt &location, + const std::string &signature) +{ + function_application_exprt rhs( + symbol_exprt(function_name), function_type.return_type()); + rhs.add_source_location()=location; + declare_function(function_name, function_type); + + exprt::operandst processed_arguments=process_arguments( + goto_program, target, arguments, location, signature); + rhs.arguments()=processed_arguments; + + code_assignt assignment(lhs, rhs); + assignment.add_source_location()=location; + target->make_assignment(); + target->code=assignment; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::insert_assignments + + Inputs: a goto_program, a position in this program, a list of assignments + + Purpose: add the assignments to the program in the order they are given + +\*******************************************************************/ + +void string_refine_preprocesst::insert_assignments( + goto_programt &goto_program, + goto_programt::targett &target, + irep_idt function, + source_locationt location, + const std::list &va) +{ + if(va.empty()) + return; + + auto i=va.begin(); + target->make_assignment(); + target->code=*i; + target->function=function; + target->source_location=location; + for(i++; i!=va.end(); i++) + { + target=goto_program.insert_after(target); + target->make_assignment(); + target->code=*i; + target->function=function; + target->source_location=location; + } +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::make_string_assign + + Inputs: a goto_program, a position in this program, an expression lhs, + a function type, a function name, a vector of arguments, a location + and a signature + + Purpose: replace the current instruction by: + > lhs=malloc(String *) + > lhs->length=function_name_length(arguments) + > tmp_data=function_name_data(arguments) + > lhs->data=&tmp_data + +\*******************************************************************/ + +void string_refine_preprocesst::make_string_assign( + goto_programt &goto_program, + goto_programt::targett &target, + const exprt &lhs, + const code_typet &function_type, + const irep_idt &function_name, + const exprt::operandst &arguments, + const source_locationt &location, + const std::string &signature) +{ + assert(implements_java_char_sequence(function_type.return_type())); + dereference_exprt deref(lhs, lhs.type().subtype()); + typet object_type=ns.follow(deref.type()); + exprt object_size=size_of_expr(object_type, ns); + typet length_type, data_type; + get_data_and_length_type_of_string(deref, data_type, length_type); + + std::string fnamel=id2string(function_name)+"_length"; + std::string fnamed=id2string(function_name)+"_data"; + declare_function(fnamel, length_type); + declare_function(fnamed, data_type); + function_application_exprt rhs_length(symbol_exprt(fnamel), length_type); + function_application_exprt rhs_data( + symbol_exprt(fnamed), data_type.subtype()); + + exprt::operandst processed_arguments=process_arguments( + goto_program, target, arguments, location, signature); + rhs_length.arguments()=processed_arguments; + rhs_data.arguments()=processed_arguments; + + symbolt sym_length=get_fresh_aux_symbol( + length_type, "length", "length", location, ID_java, symbol_table); + symbol_exprt tmp_length=sym_length.symbol_expr(); + symbol_exprt tmp_array=fresh_array(data_type.subtype(), location); + member_exprt lhs_length(deref, "length", length_type); + member_exprt lhs_data(deref, "data", tmp_array.type()); + + // lhs=malloc(String *) + assert(object_size.is_not_nil()); // got nil object_size + side_effect_exprt malloc_expr(ID_malloc); + malloc_expr.copy_to_operands(object_size); + malloc_expr.type()=pointer_typet(object_type); + malloc_expr.add_source_location()=location; + + refined_string_typet ref_type(length_type, data_type.subtype().subtype()); + string_exprt str(tmp_length, tmp_array, ref_type); + symbol_exprt cprover_string_sym=fresh_string(ref_type, location); + + std::list assigns; + assigns.emplace_back(lhs, malloc_expr); + assigns.emplace_back(tmp_length, rhs_length); + assigns.emplace_back(lhs_length, tmp_length); + assigns.emplace_back(tmp_array, rhs_data); + assigns.emplace_back(cprover_string_sym, str); + assigns.emplace_back(lhs_data, address_of_exprt(tmp_array)); + insert_assignments(goto_program, target, target->function, location, assigns); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::make_string_function + + Inputs: a position in a goto program, a function name, an expression lhs, + a function type, name, arguments, a location and a signature string + + Purpose: at the current position replace `lhs=s.some_function(x,...)` + by `lhs=function_name(s,x,...)`; + +\*******************************************************************/ + +void string_refine_preprocesst::make_string_function( + goto_programt &goto_program, + goto_programt::targett &target, + const exprt &lhs, + const code_typet &function_type, + const irep_idt &function_name, + const exprt::operandst &arguments, + const source_locationt &location, + const std::string &signature) +{ + if(signature.length()>0) + { + if(signature.back()=='S') + { + code_typet ft=function_type; + ft.return_type()=jls_ptr; + typecast_exprt lhs2(lhs, jls_ptr); + + make_string_assign( + goto_program, + target, + lhs2, + ft, + function_name, + arguments, + location, + signature); + } + else + make_normal_assign( + goto_program, + target, + lhs, + function_type, + function_name, + arguments, + location, + signature); + } + else if(implements_java_char_sequence(function_type.return_type())) + make_string_assign( + goto_program, + target, + lhs, + function_type, + function_name, + arguments, + location, + signature); + else + make_normal_assign( + goto_program, + target, + lhs, + function_type, + function_name, + arguments, + location, + signature); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::make_string_function + + Inputs: a position in a goto program, a function name and two Boolean options + + Purpose: at the current position replace `lhs=s.some_function(x,...)` + by `lhs=function_name(s,x,...)`; + option `assign_first_arg` uses `s` instead of `lhs` in the resulting + expression, Warning : it assumes that `s` is string-like + option `skip_first_arg`, removes `s` from the arguments, ie `x` is + the first one; + arguments that are string (TODO: and char array) are replaced + by string_exprt + +\*******************************************************************/ + +void string_refine_preprocesst::make_string_function( + goto_programt &goto_program, + goto_programt::targett &target, + const irep_idt &function_name, + const std::string &signature, + bool assign_first_arg, + bool skip_first_arg) +{ + code_function_callt &function_call=to_code_function_call(target->code); + code_typet function_type=to_code_type(function_call.function().type()); + code_typet new_type; + const source_locationt &loc=function_call.source_location(); + declare_function(function_name, function_type); + function_application_exprt rhs; + std::vector args; + if(assign_first_arg) + { + assert(!function_call.arguments().empty()); + rhs.type()=function_call.arguments()[0].type(); + } + else + rhs.type()=function_type.return_type(); + rhs.add_source_location()=function_call.source_location(); + rhs.function()=symbol_exprt(function_name); + + std::size_t start_index=skip_first_arg?1:0; + for(std::size_t i=start_index; i tmp=function_name(x,...) + > s->data=tmp.data + > s->length=tmp.length + > r=s + +\*******************************************************************/ + +void string_refine_preprocesst::make_string_function_side_effect( + goto_programt &goto_program, + goto_programt::targett &target, + const irep_idt &function_name, + const std::string &signature) +{ + // TODO: Here we create a copy of the code_function_cally. We would prefer + // accessing it by reference, but this creates problems. + code_function_callt function_call=to_code_function_call(target->code); + source_locationt loc=function_call.source_location(); + std::list assignments; + const exprt &lhs=function_call.lhs(); + assert(!function_call.arguments().empty()); + const exprt &s=function_call.arguments()[0]; + code_typet function_type=to_code_type(function_call.type()); + + function_type.return_type()=s.type(); + + if(lhs.is_not_nil()) + { + symbol_exprt tmp_string=fresh_string(lhs.type(), loc); + + make_string_assign( + goto_program, + target, + tmp_string, + function_type, + function_name, + function_call.arguments(), + loc, + signature); + dereference_exprt deref_lhs(s, s.type().subtype()); + typet data_type, length_type; + get_data_and_length_type_of_string(deref_lhs, data_type, length_type); + member_exprt lhs_data(deref_lhs, "data", data_type); + member_exprt lhs_length(deref_lhs, "length", length_type); + dereference_exprt deref_rhs(tmp_string, s.type().subtype()); + member_exprt rhs_data(deref_rhs, "data", data_type); + member_exprt rhs_length(deref_rhs, "length", length_type); + assignments.emplace_back(lhs_length, rhs_length); + assignments.emplace_back(lhs_data, rhs_data); + assignments.emplace_back(lhs, s); + target=goto_program.insert_after(target); + insert_assignments( + goto_program, target, target->function, loc, assignments); + } + else + { + make_string_function( + goto_program, target, function_name, signature, true, false); + } +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::build_function_application + + Inputs: a function name, a type, a location and a vector of arguments + + Outputs: a function application expression + + Purpose: declare a function and construct an function application expression + with the given function name, type, location and arguments + +\*******************************************************************/ + +function_application_exprt + string_refine_preprocesst::build_function_application( + const irep_idt &function_name, + const typet &type, + const source_locationt &location, + const exprt::operandst &arguments) +{ + declare_function(function_name, type); + function_application_exprt function_app(symbol_exprt(function_name), type); + function_app.add_source_location()=location; + for(const auto &arg : arguments) + function_app.arguments().push_back(arg); + + return function_app; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::make_to_char_array_function + + Inputs: a goto program and a position in that goto program + + Purpose: at the given position replace `return_tmp0=s.toCharArray()` with: + > return_tmp0 = malloc(array[char]); + > return_tmp0->data=&((s->data)[0]) + > return_tmp0->length=s->length + +\*******************************************************************/ + +void string_refine_preprocesst::make_to_char_array_function( + goto_programt &goto_program, goto_programt::targett &target) +{ + const code_function_callt &function_call=to_code_function_call(target->code); + source_locationt location=function_call.source_location(); + + assert(!function_call.arguments().empty()); + const exprt &string_argument=function_call.arguments()[0]; + assert(is_java_string_pointer_type(string_argument.type())); + + typet deref_type=function_call.lhs().type().subtype(); + const exprt &lhs=function_call.lhs(); + dereference_exprt deref_lhs(lhs, deref_type); + + dereference_exprt deref(string_argument, string_argument.type().subtype()); + typet length_type, data_type; + get_data_and_length_type_of_string(deref, data_type, length_type); + std::list assignments; + + // lhs=malloc(array[char]) + typet object_type=ns.follow(deref_type); + exprt object_size=size_of_expr(object_type, ns); + + if(object_size.is_nil()) + debug() << "string_refine_preprocesst::make_to_char_array_function " + << "got nil object_size" << eom; + + side_effect_exprt malloc_expr(ID_malloc); + malloc_expr.copy_to_operands(object_size); + malloc_expr.type()=pointer_typet(object_type); + malloc_expr.add_source_location()=location; + assignments.emplace_back(lhs, malloc_expr); + + + // &((s->data)[0]) + exprt rhs_data=get_data(deref, data_type); + dereference_exprt rhs_array(rhs_data, data_type.subtype()); + exprt first_index=from_integer(0, java_int_type()); + index_exprt first_element(rhs_array, first_index, java_char_type()); + address_of_exprt rhs_pointer(first_element); + + // return_tmp0->data=&((s->data)[0]) + exprt lhs_data=get_data(deref_lhs, data_type); + assignments.emplace_back(lhs_data, rhs_pointer); + + // return_tmp0->length=s->length + exprt rhs_length=get_length(deref, length_type); + exprt lhs_length=get_length(deref_lhs, length_type); + assignments.emplace_back(lhs_length, rhs_length); + insert_assignments( + goto_program, target, target->function, location, assignments); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::process_arguments + + Inputs: a goto program, a position, a list of expressions, a location and a + signature + + Outputs: a list of expressions + + Purpose: for each expression that is a string or that is at a position with + an 'S' character in the signature, we declare a new `cprover_string` + whose contents is deduced from the expression and replace the + expression by this cprover_string in the output list; + in the other case the expression is kept as is for the output list. + +\*******************************************************************/ + +exprt::operandst string_refine_preprocesst::process_arguments( + goto_programt &goto_program, + goto_programt::targett &target, + const exprt::operandst &arguments, + const source_locationt &location, + const std::string &signature) +{ + exprt::operandst new_arguments; + + for(std::size_t i=0; isecond; + else + return ""; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::replace_string_calls + + Inputs: a function in a goto_program + + Purpose: goes through the instructions, replace function calls to string + function by equivalent instructions using functions defined + for the string solver, replace string literals by string + expressions of the type used by the string solver + TODO: the current implementation is only for java functions, + we should add support for other languages + +\*******************************************************************/ + +void string_refine_preprocesst::replace_string_calls( + goto_functionst::function_mapt::iterator f_it) +{ + goto_programt &goto_program=f_it->second.body; + + Forall_goto_program_instructions(target, goto_program) + { + if(target->is_function_call()) + { + code_function_callt &function_call=to_code_function_call(target->code); + + if(function_call.function().id()==ID_symbol) + { + const irep_idt &function_id= + to_symbol_expr(function_call.function()).get_identifier(); + std::string signature=function_signature(function_id); + + auto it=string_functions.find(function_id); + if(it!=string_functions.end()) + make_string_function( + goto_program, target, it->second, signature, false, false); + + it=side_effect_functions.find(function_id); + if(it!=side_effect_functions.end()) + make_string_function_side_effect( + goto_program, target, it->second, signature); + + it=string_function_calls.find(function_id); + if(it!=string_function_calls.end()) + make_string_function_call( + goto_program, target, it->second, signature); + + if(function_id==irep_idt("java::java.lang.String.toCharArray:()[C")) + make_to_char_array_function(goto_program, target); + } + } + else + { + if(target->is_assign()) + { + // In assignments we replace string literals and C string functions + code_assignt assignment=to_code_assign(target->code); + + exprt new_rhs=assignment.rhs(); + code_assignt new_assignment(assignment.lhs(), new_rhs); + + if(new_rhs.id()==ID_function_application) + { + function_application_exprt f=to_function_application_expr(new_rhs); + const exprt &name=f.function(); + assert(name.id()==ID_symbol); + const irep_idt &id=to_symbol_expr(name).get_identifier(); + auto it=c_string_functions.find(id); + if(it!=c_string_functions.end()) + { + declare_function(it->second, f.type()); + f.function()=symbol_exprt(it->second); + new_assignment=code_assignt(assignment.lhs(), f); + } + } + + new_assignment.add_source_location()=assignment.source_location(); + target->make_assignment(); + target->code=new_assignment; + } + } + } + return; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::initialize_string_function_table + + Purpose: fill maps with correspondance from java method names to cprover + functions + +\*******************************************************************/ + +void string_refine_preprocesst::initialize_string_function_table() +{ + string_functions["java::java.lang.String.codePointAt:(I)I"]= + ID_cprover_string_code_point_at_func; + string_functions["java::java.lang.String.codePointBefore:(I)I"]= + ID_cprover_string_code_point_before_func; + string_functions["java::java.lang.String.codePointCount:(II)I"]= + ID_cprover_string_code_point_count_func; + string_functions["java::java.lang.String.offsetByCodePoints:(II)I"]= + ID_cprover_string_offset_by_code_point_func; + string_functions["java::java.lang.String.hashCode:()I"]= + ID_cprover_string_hash_code_func; + string_functions["java::java.lang.String.indexOf:(I)I"]= + ID_cprover_string_index_of_func; + string_functions["java::java.lang.String.indexOf:(II)I"]= + ID_cprover_string_index_of_func; + string_functions["java::java.lang.String.indexOf:(Ljava/lang/String;)I"]= + ID_cprover_string_index_of_func; + string_functions["java::java.lang.String.indexOf:(Ljava/lang/String;I)I"]= + ID_cprover_string_index_of_func; + string_functions["java::java.lang.String.lastIndexOf:(I)I"]= + ID_cprover_string_last_index_of_func; + string_functions["java::java.lang.String.lastIndexOf:(II)I"]= + ID_cprover_string_last_index_of_func; + string_functions + ["java::java.lang.String.lastIndexOf:(Ljava/lang/String;)I"]= + ID_cprover_string_last_index_of_func; + string_functions + ["java::java.lang.String.lastIndexOf:(Ljava/lang/String;I)I"]= + ID_cprover_string_last_index_of_func; + string_functions + ["java::java.lang.String.concat:(Ljava/lang/String;)Ljava/lang/String;"]= + ID_cprover_string_concat_func; + string_functions["java::java.lang.String.length:()I"]= + ID_cprover_string_length_func; + string_functions["java::java.lang.StringBuilder.length:()I"]= + ID_cprover_string_length_func; + string_functions["java::java.lang.String.equals:(Ljava/lang/Object;)Z"]= + ID_cprover_string_equal_func; + string_functions + ["java::java.lang.String.equalsIgnoreCase:(Ljava/lang/String;)Z"]= + ID_cprover_string_equals_ignore_case_func; + string_functions["java::java.lang.String.startsWith:(Ljava/lang/String;)Z"]= + ID_cprover_string_startswith_func; + string_functions + ["java::java.lang.String.startsWith:(Ljava/lang/String;I)Z"]= + ID_cprover_string_startswith_func; + string_functions["java::java.lang.String.endsWith:(Ljava/lang/String;)Z"]= + ID_cprover_string_endswith_func; + string_functions["java::java.lang.String.substring:(II)Ljava/lang/String;"]= + ID_cprover_string_substring_func; + string_functions["java::java.lang.String.substring:(II)Ljava/lang/String;"]= + ID_cprover_string_substring_func; + string_functions["java::java.lang.String.substring:(I)Ljava/lang/String;"]= + ID_cprover_string_substring_func; + string_functions + ["java::java.lang.StringBuilder.substring:(II)Ljava/lang/String;"]= + ID_cprover_string_substring_func; + string_functions + ["java::java.lang.StringBuilder.substring:(I)Ljava/lang/String;"]= + ID_cprover_string_substring_func; + string_functions + ["java::java.lang.String.subSequence:(II)Ljava/lang/CharSequence;"]= + ID_cprover_string_substring_func; + string_functions["java::java.lang.String.trim:()Ljava/lang/String;"]= + ID_cprover_string_trim_func; + string_functions["java::java.lang.String.toLowerCase:()Ljava/lang/String;"]= + ID_cprover_string_to_lower_case_func; + string_functions["java::java.lang.String.toUpperCase:()Ljava/lang/String;"]= + ID_cprover_string_to_upper_case_func; + string_functions["java::java.lang.String.replace:(CC)Ljava/lang/String;"]= + ID_cprover_string_replace_func; + string_functions + ["java::java.lang.String.contains:(Ljava/lang/CharSequence;)Z"]= + ID_cprover_string_contains_func; + string_functions["java::java.lang.String.compareTo:(Ljava/lang/String;)I"]= + ID_cprover_string_compare_to_func; + string_functions["java::java.lang.String.intern:()Ljava/lang/String;"]= + ID_cprover_string_intern_func; + string_functions["java::java.lang.String.isEmpty:()Z"]= + ID_cprover_string_is_empty_func; + string_functions["java::java.lang.String.charAt:(I)C"]= + ID_cprover_string_char_at_func; + string_functions["java::java.lang.StringBuilder.charAt:(I)C"]= + ID_cprover_string_char_at_func; + string_functions["java::java.lang.CharSequence.charAt:(I)C"]= + ID_cprover_string_char_at_func; + string_functions + ["java::java.lang.StringBuilder.toString:()Ljava/lang/String;"]= + ID_cprover_string_copy_func; + + string_functions["java::java.lang.String.valueOf:(F)Ljava/lang/String;"]= + ID_cprover_string_of_float_func; + string_functions["java::java.lang.Float.toString:(F)Ljava/lang/String;"]= + ID_cprover_string_of_float_func; + string_functions["java::java.lang.Integer.toString:(I)Ljava/lang/String;"]= + ID_cprover_string_of_int_func; + string_functions["java::java.lang.String.valueOf:(I)Ljava/lang/String;"]= + ID_cprover_string_of_int_func; + string_functions["java::java.lang.Integer.toHexString:(I)Ljava/lang/String;"]= + ID_cprover_string_of_int_hex_func; + string_functions["java::java.lang.String.valueOf:(L)Ljava/lang/String;"]= + ID_cprover_string_of_long_func; + string_functions["java::java.lang.String.valueOf:(D)Ljava/lang/String;"]= + ID_cprover_string_of_double_func; + string_functions["java::java.lang.String.valueOf:(Z)Ljava/lang/String;"]= + ID_cprover_string_of_bool_func; + string_functions["java::java.lang.String.valueOf:(C)Ljava/lang/String;"]= + ID_cprover_string_of_char_func; + string_functions["java::java.lang.Integer.parseInt:(Ljava/lang/String;)I"]= + ID_cprover_string_parse_int_func; + + side_effect_functions + ["java::java.lang.StringBuilder.append:(Ljava/lang/String;)" + "Ljava/lang/StringBuilder;"]= + ID_cprover_string_concat_func; + side_effect_functions["java::java.lang.StringBuilder.setCharAt:(IC)V"]= + ID_cprover_string_char_set_func; + side_effect_functions + ["java::java.lang.StringBuilder.append:(I)Ljava/lang/StringBuilder;"]= + ID_cprover_string_concat_int_func; + side_effect_functions + ["java::java.lang.StringBuilder.append:(J)Ljava/lang/StringBuilder;"]= + ID_cprover_string_concat_long_func; + side_effect_functions + ["java::java.lang.StringBuilder.append:(Z)Ljava/lang/StringBuilder;"]= + ID_cprover_string_concat_bool_func; + side_effect_functions + ["java::java.lang.StringBuilder.append:(C)Ljava/lang/StringBuilder;"]= + ID_cprover_string_concat_char_func; + side_effect_functions + ["java::java.lang.StringBuilder.append:(D)Ljava/lang/StringBuilder;"]= + ID_cprover_string_concat_double_func; + side_effect_functions + ["java::java.lang.StringBuilder.append:(F)Ljava/lang/StringBuilder;"]= + ID_cprover_string_concat_float_func; + side_effect_functions + ["java::java.lang.StringBuilder.appendCodePoint:(I)" + "Ljava/lang/StringBuilder;"]= + ID_cprover_string_concat_code_point_func; + side_effect_functions + ["java::java.lang.StringBuilder.delete:(II)Ljava/lang/StringBuilder;"]= + ID_cprover_string_delete_func; + side_effect_functions + ["java::java.lang.StringBuilder.deleteCharAt:(I)Ljava/lang/StringBuilder;"]= + ID_cprover_string_delete_char_at_func; + side_effect_functions + ["java::java.lang.StringBuilder.insert:(ILjava/lang/String;)" + "Ljava/lang/StringBuilder;"]= + ID_cprover_string_insert_func; + side_effect_functions + ["java::java.lang.StringBuilder.insert:(II)Ljava/lang/StringBuilder;"]= + ID_cprover_string_insert_int_func; + side_effect_functions + ["java::java.lang.StringBuilder.insert:(IJ)Ljava/lang/StringBuilder;"]= + ID_cprover_string_insert_long_func; + side_effect_functions + ["java::java.lang.StringBuilder.insert:(IC)Ljava/lang/StringBuilder;"]= + ID_cprover_string_insert_char_func; + side_effect_functions + ["java::java.lang.StringBuilder.insert:(IZ)Ljava/lang/StringBuilder;"]= + ID_cprover_string_insert_bool_func; + side_effect_functions + ["java::java.lang.StringBuilder.setLength:(I)V"]= + ID_cprover_string_set_length_func; + + + + side_effect_functions + ["java::java.lang.StringBuilder.append:([C)" + "Ljava/lang/StringBuilder;"]= + ID_cprover_string_concat_func; + side_effect_functions + ["java::java.lang.StringBuilder.insert:(I[CII)Ljava/lang/StringBuilder;"]= + ID_cprover_string_insert_func; + side_effect_functions + ["java::java.lang.StringBuilder.insert:(I[C)Ljava/lang/StringBuilder;"]= + ID_cprover_string_insert_func; + // TODO clean irep ids from insert_char_array etc... + + string_function_calls + ["java::java.lang.String.:(Ljava/lang/String;)V"]= + ID_cprover_string_copy_func; + string_function_calls + ["java::java.lang.String.:(Ljava/lang/StringBuilder;)V"]= + ID_cprover_string_copy_func; + string_function_calls + ["java::java.lang.StringBuilder.:(Ljava/lang/String;)V"]= + ID_cprover_string_copy_func; + string_function_calls["java::java.lang.String.:()V"]= + ID_cprover_string_empty_string_func; + string_function_calls["java::java.lang.StringBuilder.:()V"]= + ID_cprover_string_empty_string_func; + + string_function_calls["java::java.lang.String.:([C)V"]= + ID_cprover_string_copy_func; + string_function_calls["java::java.lang.String.:([CII)V"]= + ID_cprover_string_copy_func; + + string_functions + ["java::java.lang.String.valueOf:([CII)Ljava/lang/String;"]= + ID_cprover_string_copy_func; + string_functions + ["java::java.lang.String.valueOf:([C)Ljava/lang/String;"]= + ID_cprover_string_copy_func; + string_functions + ["java::java.lang.String.copyValueOf:([CII)Ljava/lang/String;"]= + ID_cprover_string_copy_func; + string_functions + ["java::java.lang.String.copyValueOf:([C)Ljava/lang/String;"]= + ID_cprover_string_copy_func; + + c_string_functions["__CPROVER_uninterpreted_string_literal_func"]= + ID_cprover_string_literal_func; + c_string_functions["__CPROVER_uninterpreted_string_char_at_func"]= + ID_cprover_string_char_at_func; + c_string_functions["__CPROVER_uninterpreted_string_equal_func"]= + ID_cprover_string_equal_func; + c_string_functions["__CPROVER_uninterpreted_string_concat_func"]= + ID_cprover_string_concat_func; + c_string_functions["__CPROVER_uninterpreted_string_length_func"]= + ID_cprover_string_length_func; + c_string_functions["__CPROVER_uninterpreted_string_substring_func"]= + ID_cprover_string_substring_func; + c_string_functions["__CPROVER_uninterpreted_string_is_prefix_func"]= + ID_cprover_string_is_prefix_func; + c_string_functions["__CPROVER_uninterpreted_string_is_suffix_func"]= + ID_cprover_string_is_suffix_func; + c_string_functions["__CPROVER_uninterpreted_string_contains_func"]= + ID_cprover_string_contains_func; + c_string_functions["__CPROVER_uninterpreted_string_index_of_func"]= + ID_cprover_string_index_of_func; + c_string_functions["__CPROVER_uninterpreted_string_last_index_of_func"]= + ID_cprover_string_last_index_of_func; + c_string_functions["__CPROVER_uninterpreted_string_char_set_func"]= + ID_cprover_string_char_set_func; + c_string_functions["__CPROVER_uninterpreted_string_copy_func"]= + ID_cprover_string_copy_func; + c_string_functions["__CPROVER_uninterpreted_string_parse_int_func"]= + ID_cprover_string_parse_int_func; + c_string_functions["__CPROVER_uninterpreted_string_of_int_func"]= + ID_cprover_string_of_int_func; + + signatures["java::java.lang.String.equals:(Ljava/lang/Object;)Z"]="SSZ"; + signatures["java::java.lang.String.contains:(Ljava/lang/CharSequence;)Z"]= + "SSZ"; + signatures["java::java.lang.StringBuilder.insert:(IZ)" + "Ljava/lang/StringBuilder;"]="SIZS"; + signatures["java::java.lang.StringBuilder.insert:(IJ)" + "Ljava/lang/StringBuilder;"]="SIJS"; + signatures["java::java.lang.StringBuilder.insert:(II)" + "Ljava/lang/StringBuilder;"]="SIIS"; + signatures["java::java.lang.StringBuilder.insert:(IC)" + "Ljava/lang/StringBuilder;"]="SICS"; + signatures["java::java.lang.StringBuilder.insert:(ILjava/lang/String;)" + "Ljava/lang/StringBuilder;"]="SISS"; + signatures["java::java.lang.StringBuilder.insert:(ILjava/lang/String;)" + "Ljava/lang/StringBuilder;"]="SISS"; + signatures["java::java.lang.StringBuilder.insert:(I[C)" + "Ljava/lang/StringBuilder;"]="SI[S"; + signatures["java::java.lang.String.intern:()Ljava/lang/String;"]="SV"; +} + +/*******************************************************************\ + +Constructor: string_refine_preprocesst::string_refine_preprocesst + + Inputs: a symbol table, goto functions, a message handler + + Purpose: process the goto function by replacing calls to string functions + +\*******************************************************************/ + +string_refine_preprocesst::string_refine_preprocesst( + symbol_tablet &_symbol_table, + goto_functionst &_goto_functions, + message_handlert &_message_handler): + messaget(_message_handler), + ns(_symbol_table), + symbol_table(_symbol_table), + goto_functions(_goto_functions), + next_symbol_id(0), + jls_ptr(symbol_typet("java::java.lang.String")) +{ + initialize_string_function_table(); + Forall_goto_functions(it, goto_functions) + replace_string_calls(it); +} diff --git a/src/goto-programs/string_refine_preprocess.h b/src/goto-programs/string_refine_preprocess.h new file mode 100644 index 00000000000..5ae398b757a --- /dev/null +++ b/src/goto-programs/string_refine_preprocess.h @@ -0,0 +1,198 @@ +/*******************************************************************\ + +Module: Preprocess a goto-programs so that calls to the java String + library are recognized by the PASS algorithm + +Author: Romain Brenguier + +Date: September 2016 + +\*******************************************************************/ + +#ifndef CPROVER_GOTO_PROGRAMS_STRING_REFINE_PREPROCESS_H +#define CPROVER_GOTO_PROGRAMS_STRING_REFINE_PREPROCESS_H + +#include +#include +#include + +class string_refine_preprocesst:public messaget +{ + public: + string_refine_preprocesst( + symbol_tablet &, goto_functionst &, message_handlert &); + + private: + namespacet ns; + symbol_tablet & symbol_table; + goto_functionst & goto_functions; + + typedef std::unordered_map id_mapt; + typedef std::unordered_map expr_mapt; + + // Map name of Java string functions to there equivalent in the solver + id_mapt side_effect_functions; + id_mapt string_functions; + id_mapt c_string_functions; + id_mapt string_function_calls; + + std::unordered_map signatures; + + // unique id for each newly created symbols + int next_symbol_id; + + void initialize_string_function_table(); + + static bool check_java_type(const typet &type, const std::string &tag); + + static bool is_java_string_pointer_type(const typet &type); + + static bool is_java_string_type(const typet &type); + + static bool is_java_string_builder_type(const typet &type); + + static bool is_java_string_builder_pointer_type(const typet &type); + + static bool is_java_char_sequence_type(const typet &type); + + static bool is_java_char_sequence_pointer_type(const typet &type); + + static bool is_java_char_array_type(const typet &type); + + static bool is_java_char_array_pointer_type(const typet &type); + + static bool implements_java_char_sequence(const typet &type) + { + return + is_java_char_sequence_pointer_type(type) || + is_java_string_builder_pointer_type(type) || + is_java_string_pointer_type(type); + } + + symbol_exprt fresh_array( + const typet &type, const source_locationt &location); + symbol_exprt fresh_string( + const typet &type, const source_locationt &location); + + // get the data member of a java string + static exprt get_data(const exprt &string, const typet &data_type) + { + return member_exprt(string, "data", data_type); + } + + // get the length member of a java string + exprt get_length(const exprt &string, const typet &length_type) + { + return member_exprt(string, "length", length_type); + } + + // type of pointers to string + pointer_typet jls_ptr; + exprt replace_string(const exprt &in); + exprt replace_string_in_assign(const exprt &in); + + void insert_assignments( + goto_programt &goto_program, + goto_programt::targett &target, + irep_idt function, + source_locationt location, + const std::list &va); + + exprt replace_string_pointer(const exprt &in); + + // Replace string builders by expression of the mapping and make + // assignments for strings as string_exprt + exprt::operandst process_arguments( + goto_programt &goto_program, + goto_programt::targett &target, + const exprt::operandst &arguments, + const source_locationt &location, + const std::string &signature=""); + + // Signature of the named function if it is defined in the signature map, + // empty string otherwise + std::string function_signature(const irep_idt &function_id); + + void declare_function(irep_idt function_name, const typet &type); + + void get_data_and_length_type_of_string( + const exprt &expr, typet &data_type, typet &length_type); + + void get_data_and_length_type_of_char_array( + const exprt &expr, typet &data_type, typet &length_type); + + function_application_exprt build_function_application( + const irep_idt &function_name, + const typet &type, + const source_locationt &location, + const exprt::operandst &arguments); + + void make_normal_assign( + goto_programt &goto_program, + goto_programt::targett target, + const exprt &lhs, + const code_typet &function_type, + const irep_idt &function_name, + const exprt::operandst &arguments, + const source_locationt &location, + const std::string &signature=""); + + void make_string_assign( + goto_programt &goto_program, + goto_programt::targett &target, + const exprt &lhs, + const code_typet &function_type, + const irep_idt &function_name, + const exprt::operandst &arguments, + const source_locationt &location, + const std::string &signature); + + exprt make_cprover_string_assign( + goto_programt &goto_program, + goto_programt::targett &target, + const exprt &rhs, + const source_locationt &location); + + string_exprt make_cprover_char_array_assign( + goto_programt &goto_program, + goto_programt::targett &target, + const exprt &rhs, + const source_locationt &location); + + void make_string_function( + goto_programt &goto_program, + goto_programt::targett &target, + const irep_idt &function_name, + const std::string &signature, + bool assign_first_arg=false, + bool skip_first_arg=false); + + void make_string_function( + goto_programt &goto_program, + goto_programt::targett &target, + const exprt &lhs, + const code_typet &function_type, + const irep_idt &function_name, + const exprt::operandst &arguments, + const source_locationt &location, + const std::string &signature); + + void make_string_function_call( + goto_programt &goto_program, + goto_programt::targett &target, + const irep_idt &function_name, + const std::string &signature); + + void make_string_function_side_effect( + goto_programt &goto_program, + goto_programt::targett &target, + const irep_idt &function_name, + const std::string &signature); + + void make_to_char_array_function( + goto_programt &goto_program, goto_programt::targett &); + + void replace_string_calls(goto_functionst::function_mapt::iterator f_it); +}; + +#endif diff --git a/src/goto-programs/system_library_symbols.cpp b/src/goto-programs/system_library_symbols.cpp new file mode 100644 index 00000000000..250eb3b1cdb --- /dev/null +++ b/src/goto-programs/system_library_symbols.cpp @@ -0,0 +1,353 @@ +/*******************************************************************\ + +Module: Goto Programs + +Author: Thomas Kiley + +\*******************************************************************/ + +#include "system_library_symbols.h" +#include +#include +#include +#include + +system_library_symbolst::system_library_symbolst() +{ + init_system_library_map(); +} + +/*******************************************************************\ + +Function: system_library_symbolst::init_system_library_map + +Inputs: + +Outputs: + +Purpose: To generate a map of header file names -> list of symbols + The symbol names are reserved as the header and source files + will be compiled in to the goto program. + +\*******************************************************************/ + +void system_library_symbolst::init_system_library_map() +{ + // ctype.h + std::list ctype_syms= + { + "isalnum", "isalpha", "isblank", "iscntrl", "isdigit", "isgraph", + "islower", "isprint", "ispunct", "isspace", "isupper", "isxdigit", + "tolower", "toupper" + }; + add_to_system_library("ctype.h", ctype_syms); + + // fcntl.h + std::list fcntl_syms= + { + "creat", "fcntl", "open" + }; + add_to_system_library("fcntl.h", fcntl_syms); + + // locale.h + std::list locale_syms= + { + "setlocale" + }; + add_to_system_library("locale.h", locale_syms); + + // math.h + std::list math_syms= + { + "acos", "acosh", "asin", "asinh", "atan", "atan2", "atanh", + "cbrt", "ceil", "copysign", "cos", "cosh", "erf", "erfc", "exp", + "exp2", "expm1", "fabs", "fdim", "floor", "fma", "fmax", "fmin", + "fmod", "fpclassify", "fpclassifyl", "fpclassifyf", "frexp", + "hypot", "ilogb", "isfinite", "isinf", "isnan", "isnormal", + "j0", "j1", "jn", "ldexp", "lgamma", "llrint", "llround", "log", + "log10", "log1p", "log2", "logb", "lrint", "lround", "modf", "nan", + "nearbyint", "nextafter", "pow", "remainder", "remquo", "rint", + "round", "scalbln", "scalbn", "signbit", "sin", "sinh", "sqrt", + "tan", "tanh", "tgamma", "trunc", "y0", "y1", "yn", "isinff", + "isinfl", "isnanf", "isnanl" + }; + add_to_system_library("math.h", math_syms); + + // for some reason the math functions can sometimes be prefixed with + // a double underscore + std::list underscore_math_syms; + for(const irep_idt &math_sym : math_syms) + { + std::ostringstream underscore_id; + underscore_id << "__" << math_sym; + underscore_math_syms.push_back(irep_idt(underscore_id.str())); + } + add_to_system_library("math.h", underscore_math_syms); + + // pthread.h + std::list pthread_syms= + { + "pthread_cleanup_pop", "pthread_cleanup_push", + "pthread_cond_broadcast", "pthread_cond_destroy", + "pthread_cond_init", "pthread_cond_signal", + "pthread_cond_timedwait", "pthread_cond_wait", "pthread_create", + "pthread_detach", "pthread_equal", "pthread_exit", + "pthread_getspecific", "pthread_join", "pthread_key_delete", + "pthread_mutex_destroy", "pthread_mutex_init", + "pthread_mutex_lock", "pthread_mutex_trylock", + "pthread_mutex_unlock", "pthread_once", "pthread_rwlock_destroy", + "pthread_rwlock_init", "pthread_rwlock_rdlock", + "pthread_rwlock_unlock", "pthread_rwlock_wrlock", + "pthread_rwlockattr_destroy", "pthread_rwlockattr_getpshared", + "pthread_rwlockattr_init", "pthread_rwlockattr_setpshared", + "pthread_self", "pthread_setspecific" + }; + add_to_system_library("pthread.h", pthread_syms); + + // setjmp.h + std::list setjmp_syms= + { + "_longjmp", "_setjmp", "longjmp", "longjmperror", "setjmp", + "siglongjmp", "sigsetjmp" + }; + add_to_system_library("setjmp.h", setjmp_syms); + + // stdio.h + std::list stdio_syms= + { + "asprintf", "clearerr", "fclose", "fdopen", "feof", "ferror", + "fflush", "fgetc", "fgetln", "fgetpos", "fgets", "fgetwc", + "fgetws", "fileno", "fopen", "fprintf", "fpurge", "fputc", + "fputs", "fputwc", "fputws", "fread", "freopen", "fropen", + "fscanf", "fseek", "fsetpos", "ftell", "funopen", "fwide", + "fwopen", "fwprintf", "fwrite", "getc", "getchar", "getdelim", + "getline", "gets", "getw", "getwc", "getwchar", "mkdtemp", + "mkstemp", "mktemp", "perror", "printf", "putc", "putchar", + "puts", "putw", "putwc", "putwchar", "remove", "rewind", "scanf", + "setbuf", "setbuffer", "setlinebuf", "setvbuf", "snprintf", + "sprintf", "sscanf", "strerror", "swprintf", "sys_errlist", + "sys_nerr", "tempnam", "tmpfile", "tmpnam", "ungetc", "ungetwc", + "vasprintf", "vfprintf", "vfscanf", "vfwprintf", "vprintf", + "vscanf", "vsnprintf", "vsprintf", "vsscanf", "vswprintf", + "vwprintf", "wprintf", + /* non-public struct types */ + "tag-__sFILE", "tag-__sbuf", // OS X + "tag-_IO_FILE", "tag-_IO_marker", // Linux + }; + add_to_system_library("stdio.h", stdio_syms); + + // stdlib.h + std::list stdlib_syms= + { + "abort", "abs", "atexit", "atof", "atoi", "atol", "atoll", + "bsearch", "calloc", "div", "exit", "free", "getenv", "labs", + "ldiv", "llabs", "lldiv", "malloc", "mblen", "mbstowcs", "mbtowc", + "qsort", "rand", "realloc", "srand", "strtod", "strtof", "strtol", + "strtold", "strtoll", "strtoul", "strtoull", "system", "wcstombs", + "wctomb" + }; + add_to_system_library("stdlib.h", stdlib_syms); + + // string.h + std::list string_syms= + { + "strcat", "strncat", "strchr", "strrchr", "strcmp", "strncmp", + "strcpy", "strncpy", "strerror", "strlen", "strpbrk", "strspn", + "strcspn", "strstr", "strtok", "strcasecmp", "strncasecmp", "strdup", + "memset" + }; + add_to_system_library("string.h", string_syms); + + // time.h + std::list time_syms= + { + "asctime", "asctime_r", "ctime", "ctime_r", "difftime", "gmtime", + "gmtime_r", "localtime", "localtime_r", "mktime", + /* non-public struct types */ + "tag-timespec", "tag-timeval" + }; + add_to_system_library("time.h", time_syms); + + // unistd.h + std::list unistd_syms= + { + "_exit", "access", "alarm", "chdir", "chown", "close", "dup", + "dup2", "execl", "execle", "execlp", "execv", "execve", "execvp", + "fork", "fpathconf", "getcwd", "getegid", "geteuid", "getgid", + "getgroups", "getlogin", "getpgrp", "getpid", "getppid", "getuid", + "isatty", "link", "lseek", "pathconf", "pause", "pipe", "read", + "rmdir", "setgid", "setpgid", "setsid", "setuid", "sleep", + "sysconf", "tcgetpgrp", "tcsetpgrp", "ttyname", "ttyname_r", + "unlink", "write" + }; + add_to_system_library("unistd.h", unistd_syms); + + // sys/select.h + std::list sys_select_syms= + { + "select" + }; + add_to_system_library("sys/select.h", sys_select_syms); + + // sys/socket.h + std::list sys_socket_syms= + { + "accept", "bind", "connect" + }; + add_to_system_library("sys/socket.h", sys_socket_syms); + + // sys/stat.h + std::list sys_stat_syms= + { + "fstat", "lstat", "stat" + }; + add_to_system_library("sys/stat.h", sys_stat_syms); + + std::list fenv_syms= + { + "fenv_t", "fexcept_t", "feclearexcept", "fegetexceptflag", + "feraiseexcept", "fesetexceptflag", "fetestexcept", + "fegetround", "fesetround", "fegetenv", "feholdexcept", + "fesetenv", "feupdateenv" + }; + add_to_system_library("fenv.h", fenv_syms); + + std::list errno_syms= + { + "__error", "__errno_location", "__errno" + }; + add_to_system_library("errno.c", errno_syms); + + std::list noop_syms= + { + "__noop" + }; + add_to_system_library("noop.c", noop_syms); + +#if 0 + // sys/types.h + std::list sys_types_syms= + { + }; + add_to_system_library("sys/types.h", sys_types_syms); +#endif + + // sys/wait.h + std::list sys_wait_syms= + { + "wait", "waitpid" + }; + add_to_system_library("sys/wait.h", sys_wait_syms); +} + +/*******************************************************************\ + +Function: system_library_symbolst::add_to_system_library + +Inputs: + header_file - the name of the header file the symbol came from + symbols - a list of the names of the symbols in the header file + +Outputs: + +Purpose: To add the symbols from a specific header file to the + system library map. The symbol is used as the key so that + we can easily look up symbols. + +\*******************************************************************/ + +void system_library_symbolst::add_to_system_library( + irep_idt header_file, + std::list symbols) +{ + for(const irep_idt &symbol : symbols) + { + system_library_map[symbol]=header_file; + } +} + + +/*******************************************************************\ + +Function: system_library_symbolst::is_symbol_internal_symbol + +Inputs: + symbol - the symbol to check + +Outputs: True if the symbol is an internal symbol. If specific system + headers need to be included, the out_system_headers will contain + the headers. + +Purpose: To find out if a symbol is an internal symbol. + +\*******************************************************************/ + +bool system_library_symbolst::is_symbol_internal_symbol( + const symbolt &symbol, + std::set &out_system_headers) const +{ + const std::string &name_str=id2string(symbol.name); + + if(has_prefix(name_str, CPROVER_PREFIX) || + name_str=="__func__" || + name_str=="__FUNCTION__" || + name_str=="__PRETTY_FUNCTION__" || + name_str=="argc'" || + name_str=="argv'" || + name_str=="envp'" || + name_str=="envp_size'") + return true; + + // exclude nondet instructions + if(has_prefix(name_str, "nondet_")) + { + return true; + } + + if(has_prefix(name_str, "__VERIFIER")) + { + return true; + } + + const std::string &file_str=id2string(symbol.location.get_file()); + + // don't dump internal GCC builtins + if((file_str=="gcc_builtin_headers_alpha.h" || + file_str=="gcc_builtin_headers_arm.h" || + file_str=="gcc_builtin_headers_ia32.h" || + file_str=="gcc_builtin_headers_mips.h" || + file_str=="gcc_builtin_headers_power.h" || + file_str=="gcc_builtin_headers_generic.h") && + has_prefix(name_str, "__builtin_")) + return true; + + if(name_str=="__builtin_va_start" || + name_str=="__builtin_va_end" || + symbol.name==ID_gcc_builtin_va_arg) + { + out_system_headers.insert("stdarg.h"); + return true; + } + + // don't dump asserts + else if(name_str=="__assert_fail" || + name_str=="_assert" || + name_str=="__assert_c99" || + name_str=="_wassert") + { + return true; + } + + if(name_str.find("$link")!=std::string::npos) + return false; + + const auto &it=system_library_map.find(symbol.name); + + if(it!=system_library_map.end()) + { + out_system_headers.insert(id2string(it->second)); + return true; + } + + return false; +} diff --git a/src/goto-programs/system_library_symbols.h b/src/goto-programs/system_library_symbols.h new file mode 100644 index 00000000000..9438e052a76 --- /dev/null +++ b/src/goto-programs/system_library_symbols.h @@ -0,0 +1,38 @@ +/*******************************************************************\ + +Module: Goto Programs + +Author: Thomas Kiley + +\*******************************************************************/ + +#ifndef CPROVER_GOTO_PROGRAMS_SYSTEM_LIBRARY_SYMBOLS_H +#define CPROVER_GOTO_PROGRAMS_SYSTEM_LIBRARY_SYMBOLS_H + +#include +#include +#include +#include + +class symbolt; + +class system_library_symbolst +{ +public: + system_library_symbolst(); + + bool is_symbol_internal_symbol( + const symbolt &symbol, + std::set &out_system_headers) const; + +private: + void init_system_library_map(); + + void add_to_system_library( + irep_idt header_file, + std::list symbols); + + std::map system_library_map; +}; + +#endif // CPROVER_GOTO_PROGRAMS_SYSTEM_LIBRARY_SYMBOLS_H diff --git a/src/java_bytecode/java_bytecode_convert_class.cpp b/src/java_bytecode/java_bytecode_convert_class.cpp index f21b0bb1ce2..7f52c6ef77b 100644 --- a/src/java_bytecode/java_bytecode_convert_class.cpp +++ b/src/java_bytecode/java_bytecode_convert_class.cpp @@ -50,7 +50,16 @@ class java_bytecode_convert_classt:public messaget convert(parse_tree.parsed_class); else if(string_refinement_enabled && parse_tree.parsed_class.name=="java.lang.String") - add_string_type(); + add_string_type("java.lang.String"); + else if(string_refinement_enabled && + parse_tree.parsed_class.name=="java.lang.StringBuilder") + add_string_type("java.lang.StringBuilder"); + else if(string_refinement_enabled && + parse_tree.parsed_class.name=="java.lang.CharSequence") + add_string_type("java.lang.CharSequence"); + else if(string_refinement_enabled && + parse_tree.parsed_class.name=="java.lang.StringBuffer") + add_string_type("java.lang.StringBuffer"); else generate_class_stub(parse_tree.parsed_class.name); } @@ -72,7 +81,7 @@ class java_bytecode_convert_classt:public messaget void generate_class_stub(const irep_idt &class_name); void add_array_types(); - void add_string_type(); + void add_string_type(const irep_idt &class_name); }; /*******************************************************************\ @@ -406,15 +415,17 @@ bool java_bytecode_convert_class( Function: java_bytecode_convert_classt::add_string_type + Inputs: a name for the class such as "java.lang.String" + Purpose: Implements the java.lang.String type in the case that we provide an internal implementation. \*******************************************************************/ -void java_bytecode_convert_classt::add_string_type() +void java_bytecode_convert_classt::add_string_type(const irep_idt &class_name) { class_typet string_type; - string_type.set_tag("java.lang.String"); + string_type.set_tag(class_name); string_type.components().resize(3); string_type.components()[0].set_name("@java.lang.Object"); string_type.components()[0].set_pretty_name("@java.lang.Object"); @@ -432,8 +443,8 @@ void java_bytecode_convert_classt::add_string_type() string_type.add_base(symbol_typet("java::java.lang.Object")); symbolt string_symbol; - string_symbol.name="java::java.lang.String"; - string_symbol.base_name="java.lang.String"; + string_symbol.name="java::"+id2string(class_name); + string_symbol.base_name=id2string(class_name); string_symbol.type=string_type; string_symbol.is_type=true; @@ -445,8 +456,8 @@ void java_bytecode_convert_classt::add_string_type() symbolt string_equals_symbol; string_equals_symbol.name= "java::java.lang.String.equals:(Ljava/lang/Object;)Z"; - string_equals_symbol.base_name="java.lang.String.equals"; - string_equals_symbol.pretty_name="java.lang.String.equals"; + string_equals_symbol.base_name=id2string(class_name)+".equals"; + string_equals_symbol.pretty_name=id2string(class_name)+".equals"; string_equals_symbol.mode=ID_java; code_typet string_equals_type; diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index 0912ec8b22a..8e14d5731dc 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -55,6 +55,56 @@ class patternt const char *p; }; +/*******************************************************************\ + +Function: assign_parameter_names + + Inputs: `ftype`: Function type whose parameters should be named + `name_prefix`: Prefix for parameter names, typically the + parent function's name. + `symbol_table`: Global symbol table + + Outputs: Assigns parameter names (side-effects on `ftype`) to + function stub parameters, which are initially nameless + as method conversion hasn't happened. + Also creates symbols in `symbol_table`. + + Purpose: See above + +\*******************************************************************/ + +void assign_parameter_names( + code_typet &ftype, + const irep_idt &name_prefix, + symbol_tablet &symbol_table) +{ + code_typet::parameterst ¶meters=ftype.parameters(); + + // Mostly borrowed from java_bytecode_convert.cpp; maybe factor this out. + // assign names to parameters + for(std::size_t i=0; isource_location; + loc.set_function(method_id); + c.add_source_location()=loc; + } else if(statement=="invokeinterface" || statement=="invokespecial" || statement=="invokevirtual" || @@ -1246,9 +1352,18 @@ codet java_bytecode_convert_methodt::convert_instructions( symbolt symbol; symbol.name=id; symbol.base_name=arg0.get(ID_C_base_name); + symbol.pretty_name= + id2string(arg0.get(ID_C_class)).substr(6)+"."+ + id2string(symbol.base_name)+"()"; symbol.type=arg0.type(); symbol.value.make_nil(); symbol.mode=ID_java; + + assign_parameter_names( + to_code_type(symbol.type), + symbol.name, + symbol_table); + symbol_table.add(symbol); } diff --git a/src/java_bytecode/java_bytecode_convert_method_class.h b/src/java_bytecode/java_bytecode_convert_method_class.h index 4fb24c47b84..263d0c68810 100644 --- a/src/java_bytecode/java_bytecode_convert_method_class.h +++ b/src/java_bytecode/java_bytecode_convert_method_class.h @@ -220,6 +220,10 @@ class java_bytecode_convert_methodt:public messaget const code_typet &); const bytecode_infot &get_bytecode_info(const irep_idt &statement); + + void check_static_field_stub( + const symbol_exprt &se, + const irep_idt &basename); }; #endif diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index 0e2eb25d0ec..9faa16f676a 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -777,7 +777,6 @@ bool java_bytecode_languaget::final(symbol_tablet &symbol_table) */ java_internal_additions(symbol_table); - main_function_resultt res= get_main_symbol(symbol_table, main_class, get_message_handler()); if(res.stop_convert) diff --git a/src/java_bytecode/java_bytecode_language.h b/src/java_bytecode/java_bytecode_language.h index 77689d6e12a..b3bdc8a5aa1 100644 --- a/src/java_bytecode/java_bytecode_language.h +++ b/src/java_bytecode/java_bytecode_language.h @@ -48,7 +48,7 @@ class java_bytecode_languaget:public languaget symbol_tablet &context, const std::string &module) override; - bool final( + virtual bool final( symbol_tablet &context) override; void show_parse(std::ostream &out) override; diff --git a/src/java_bytecode/java_entry_point.cpp b/src/java_bytecode/java_entry_point.cpp index 60678dadfa1..5234a53200f 100644 --- a/src/java_bytecode/java_entry_point.cpp +++ b/src/java_bytecode/java_entry_point.cpp @@ -290,7 +290,8 @@ void java_record_outputs( codet output(ID_output); output.operands().resize(2); - const symbolt &return_symbol=symbol_table.lookup("return'"); + const symbolt &return_symbol= + symbol_table.lookup(JAVA_ENTRY_POINT_RETURN_SYMBOL); output.op0()= address_of_exprt( @@ -602,7 +603,7 @@ bool java_entry_point( auxiliary_symbolt return_symbol; return_symbol.mode=ID_C; return_symbol.is_static_lifetime=false; - return_symbol.name="return'"; + return_symbol.name=JAVA_ENTRY_POINT_RETURN_SYMBOL; return_symbol.base_name="return"; return_symbol.type=to_code_type(symbol.type).return_type(); diff --git a/src/java_bytecode/java_entry_point.h b/src/java_bytecode/java_entry_point.h index e6575734d80..69f5023609f 100644 --- a/src/java_bytecode/java_entry_point.h +++ b/src/java_bytecode/java_entry_point.h @@ -11,6 +11,8 @@ Author: Daniel Kroening, kroening@kroening.com #include +#define JAVA_ENTRY_POINT_RETURN_SYMBOL "return'" + bool java_entry_point( class symbol_tablet &symbol_table, const irep_idt &main_class, @@ -31,4 +33,4 @@ main_function_resultt get_main_symbol( message_handlert &, bool allow_no_body=false); -#endif // CPROVER_JAVA_BYTECODE_JAVA_ENTRY_POINT_H +#endif diff --git a/src/java_bytecode/java_object_factory.cpp b/src/java_bytecode/java_object_factory.cpp index 088a25f7038..c06387469cf 100644 --- a/src/java_bytecode/java_object_factory.cpp +++ b/src/java_bytecode/java_object_factory.cpp @@ -16,6 +16,8 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include +#include +#include #include #include @@ -25,18 +27,6 @@ Author: Daniel Kroening, kroening@kroening.com #include "java_types.h" #include "java_utils.h" -/*******************************************************************\ - -Function: gen_nondet_init - - Inputs: - - Outputs: - - Purpose: - -\*******************************************************************/ - class java_object_factoryt:public messaget { code_blockt &init_code; @@ -44,8 +34,18 @@ class java_object_factoryt:public messaget bool assume_non_null; size_t max_nondet_array_length; symbol_tablet &symbol_table; - message_handlert &message_handler; namespacet ns; + const source_locationt &loc; + + void set_null( + const exprt &expr, + const pointer_typet &ptr_type); + + void gen_pointer_target_init( + const exprt &expr, + const typet &target_type, + bool create_dynamic_objects, + update_in_placet update_in_place); public: java_object_factoryt( @@ -53,38 +53,40 @@ class java_object_factoryt:public messaget bool _assume_non_null, size_t _max_nondet_array_length, symbol_tablet &_symbol_table, - message_handlert &_message_handler): + message_handlert &_message_handler, + const source_locationt &_loc): + messaget(_message_handler), init_code(_init_code), assume_non_null(_assume_non_null), max_nondet_array_length(_max_nondet_array_length), symbol_table(_symbol_table), - message_handler(_message_handler), - ns(_symbol_table) + ns(_symbol_table), + loc(_loc) {} exprt allocate_object( const exprt &, const typet &, - const source_locationt &, bool create_dynamic_objects); - void gen_nondet_array_init(const exprt &expr, const source_locationt &); + void gen_nondet_array_init( + const exprt &expr, + update_in_placet); void gen_nondet_init( const exprt &expr, bool is_sub, irep_idt class_identifier, - const source_locationt &loc, bool skip_classid, bool create_dynamic_objects, - bool override=false, - const typet &override_type=empty_typet()); + bool override, + const typet &override_type, + update_in_placet); }; - /*******************************************************************\ -Function: gen_nondet_array_init +Function: java_object_factoryt::allocate_object Inputs: the target expression, desired object type, source location and Boolean whether to create a dynamic object @@ -100,7 +102,6 @@ Function: gen_nondet_array_init exprt java_object_factoryt::allocate_object( const exprt &target_expr, const typet &allocate_type, - const source_locationt &loc, bool create_dynamic_objects) { const typet &allocate_type_resolved=ns.follow(allocate_type); @@ -165,19 +166,142 @@ exprt java_object_factoryt::allocate_object( } } -// Override type says to ignore the LHS' real type, and init it with the given -// type regardless. Used at the moment for reference arrays, which are -// implemented as void* arrays but should be init'd as their true type with -// appropriate casts. +/*******************************************************************\ + +Function: java_object_factoryt::set_null + + Inputs: `expr`: pointer-typed lvalue expression to initialise + `ptr_type`: pointer type to write + + Outputs: + + Purpose: Adds an instruction to `init_code` null-initialising + `expr`. + +\*******************************************************************/ + +void java_object_factoryt::set_null( + const exprt &expr, + const pointer_typet &ptr_type) +{ + null_pointer_exprt null_pointer_expr(ptr_type); + code_assignt code(expr, null_pointer_expr); + code.add_source_location()=loc; + init_code.move_to_operands(code); +} + +/*******************************************************************\ + +Function: java_object_factoryt::gen_pointer_target_init + + Inputs: `expr`: pointer-typed lvalue expression to initialise + `target_type`: structure type to initialise, which may + not match *expr (for example, expr might be void*) + `create_dynamic_objects`: if true, use malloc to allocate + objects; otherwise generate fresh static symbols. + `update_in_place`: + NO_UPDATE_IN_PLACE: initialise `expr` from scratch + MUST_UPDATE_IN_PLACE: reinitialise an existing object + MAY_UPDATE_IN_PLACE: invalid input + + Outputs: + + Purpose: Initialises an object tree rooted at `expr`, allocating + child objects as necessary and nondet-initialising their + members, or if MUST_UPDATE_IN_PLACE is set, re-initialising + already-allocated objects. + +\*******************************************************************/ + +void java_object_factoryt::gen_pointer_target_init( + const exprt &expr, + const typet &target_type, + bool create_dynamic_objects, + update_in_placet update_in_place) +{ + assert(update_in_place!=MAY_UPDATE_IN_PLACE); + + if(target_type.id()==ID_struct && + has_prefix( + id2string(to_struct_type(target_type).get_tag()), + "java::array[")) + { + gen_nondet_array_init( + expr, + update_in_place); + } + else + { + exprt target; + if(update_in_place==NO_UPDATE_IN_PLACE) + { + target=allocate_object( + expr, + target_type, + create_dynamic_objects); + } + else + { + target=expr; + } + exprt init_expr; + if(target.id()==ID_address_of) + init_expr=target.op0(); + else + init_expr= + dereference_exprt(target, target.type().subtype()); + gen_nondet_init( + init_expr, + false, + "", + false, + create_dynamic_objects, + false, + typet(), + update_in_place); + } +} + +/*******************************************************************\ + +Function: java_object_factoryt::gen_nondet_init + + Inputs: `expr`: lvalue expression to initialise + `is_sub`: If true, `expr` is a substructure of a larger + object, which in Java necessarily means a base class. + not match *expr (for example, expr might be void*) + `class_identifier`: clsid to initialise @java.lang.Object. + @class_identifier + `skip_classid`: if true, skip initialising @class_identifier + `create_dynamic_objects`: if true, use malloc to allocate + objects; otherwise generate fresh static symbols. + `override`: If true, initialise with `override_type` instead + of `expr.type()` + `override_type`: Override type per above + `update_in_place`: + NO_UPDATE_IN_PLACE: initialise `expr` from scratch + MUST_UPDATE_IN_PLACE: reinitialise an existing object + MAY_UPDATE_IN_PLACE: generate a runtime nondet branch + between the NO_ and MUST_ cases. + + Outputs: + + Purpose: Initialises a primitive or object tree rooted at `expr`, + allocating child objects as necessary and nondet-initialising + their members, or if MUST_UPDATE_IN_PLACE is set, + re-initialising already-allocated objects. + +\*******************************************************************/ + void java_object_factoryt::gen_nondet_init( const exprt &expr, bool is_sub, irep_idt class_identifier, - const source_locationt &loc, bool skip_classid, bool create_dynamic_objects, bool override, - const typet &override_type) + const typet &override_type, + update_in_placet update_in_place) { const typet &type= override ? ns.follow(override_type) : ns.follow(expr.type()); @@ -196,82 +320,76 @@ void java_object_factoryt::gen_nondet_init( if(recursion_set.find(struct_tag)!=recursion_set.end() && struct_tag==class_identifier) { - // make null - null_pointer_exprt null_pointer_expr(pointer_type); - code_assignt code(expr, null_pointer_expr); - code.add_source_location()=loc; - init_code.copy_to_operands(code); - + if(update_in_place==NO_UPDATE_IN_PLACE) + set_null(expr, pointer_type); + // Otherwise leave it as it is. return; } } - code_labelt set_null_label; - code_labelt init_done_label; - - static size_t synthetic_constructor_count=0; + nondet_ifthenelset update_in_place_diamond( + init_code, + symbol_table, + loc, + ID_java, + "nondet_update_in_place"); - if(!assume_non_null) + if(update_in_place==MAY_UPDATE_IN_PLACE || + update_in_place==MUST_UPDATE_IN_PLACE) { - auto returns_null_sym= - new_tmp_symbol( - symbol_table, - loc, - c_bool_typet(1), - "opaque_returns_null"); - auto returns_null=returns_null_sym.symbol_expr(); - auto assign_returns_null= - code_assignt(returns_null, get_nondet_bool(returns_null_sym.type)); - assign_returns_null.add_source_location()=loc; - init_code.move_to_operands(assign_returns_null); - - auto set_null_inst= - code_assignt(expr, null_pointer_exprt(pointer_type)); - set_null_inst.add_source_location()=loc; - - std::string fresh_label= - "post_synthetic_malloc_"+std::to_string(++synthetic_constructor_count); - set_null_label=code_labelt(fresh_label, set_null_inst); - - init_done_label=code_labelt(fresh_label+"_init_done", code_skipt()); - + if(update_in_place==MAY_UPDATE_IN_PLACE) + update_in_place_diamond.begin_if(); + + // If we're trying to update an object in place + // but it is null, just leave it alone. + static unsigned long null_check_count=0; + std::ostringstream oss; + oss << "null_check_failed_" << (++null_check_count); + code_labelt post_null_check(oss.str(), code_skipt()); code_ifthenelset null_check; - exprt null_return= - zero_initializer(returns_null_sym.type, loc, ns, message_handler); - null_check.cond()= - notequal_exprt(returns_null, null_return); - null_check.then_case()=code_gotot(fresh_label); + null_check.cond()=equal_exprt(expr, null_pointer_exprt(pointer_type)); + null_check.then_case()=code_gotot(post_null_check.get_label()); init_code.move_to_operands(null_check); - } - if(java_is_array_type(subtype)) - gen_nondet_array_init(expr, loc); - else - { - exprt allocated= - allocate_object(expr, subtype, loc, create_dynamic_objects); - { - exprt init_expr; - if(allocated.id()==ID_address_of) - init_expr=allocated.op0(); - else - init_expr=dereference_exprt(allocated, allocated.type().subtype()); - gen_nondet_init( - init_expr, - false, - "", - loc, - false, - create_dynamic_objects); - } + gen_pointer_target_init( + expr, + subtype, + create_dynamic_objects, + MUST_UPDATE_IN_PLACE); + + init_code.move_to_operands(post_null_check); + + if(update_in_place==MUST_UPDATE_IN_PLACE) + return; + + update_in_place_diamond.begin_else(); } + nondet_ifthenelset init_null_diamond( + init_code, + symbol_table, + loc, + ID_java, + "nondet_ptr_is_null"); + if(!assume_non_null) { - init_code.copy_to_operands(code_gotot(init_done_label.get_label())); - init_code.move_to_operands(set_null_label); - init_code.move_to_operands(init_done_label); + init_null_diamond.begin_if(); + set_null(expr, pointer_type); + init_null_diamond.begin_else(); } + + gen_pointer_target_init( + expr, + subtype, + create_dynamic_objects, + NO_UPDATE_IN_PLACE); + + if(!assume_non_null) + init_null_diamond.finish(); + + if(update_in_place==MAY_UPDATE_IN_PLACE) + update_in_place_diamond.finish(); } else if(type.id()==ID_struct) { @@ -297,7 +415,7 @@ void java_object_factoryt::gen_nondet_init( if(name=="@class_identifier") { - if(skip_classid) + if(skip_classid || update_in_place==MUST_UPDATE_IN_PLACE) continue; irep_idt qualified_clsid="java::"+as_string(class_identifier); @@ -308,6 +426,8 @@ void java_object_factoryt::gen_nondet_init( } else if(name=="@lock") { + if(update_in_place==MUST_UPDATE_IN_PLACE) + continue; code_assignt code(me, from_integer(0, me.type())); code.add_source_location()=loc; init_code.copy_to_operands(code); @@ -322,13 +442,22 @@ void java_object_factoryt::gen_nondet_init( _is_sub?(class_identifier.empty()?struct_tag:class_identifier):""; #endif + // MUST_UPDATE_IN_PLACE only applies to this object. + // If this is a pointer to another object, offer the chance + // to leave it alone by setting MAY_UPDATE_IN_PLACE instead. + update_in_placet substruct_in_place= + update_in_place==MUST_UPDATE_IN_PLACE && !_is_sub ? + MAY_UPDATE_IN_PLACE : + update_in_place; gen_nondet_init( me, _is_sub, class_identifier, - loc, false, - create_dynamic_objects); + create_dynamic_objects, + false, + typet(), + substruct_in_place); } } recursion_set.erase(struct_tag); @@ -345,23 +474,28 @@ void java_object_factoryt::gen_nondet_init( /*******************************************************************\ -Function: gen_nondet_array_init - - Inputs: +Function: java_object_factoryt::gen_nondet_array_init + Inputs: `expr`: Array-typed expression to initialise + `update_in_place`: + NO_UPDATE_IN_PLACE: initialise `expr` from scratch + MUST_UPDATE_IN_PLACE: reinitialise an existing array + MAY_UPDATE_IN_PLACE: invalid input Outputs: Purpose: create code to initialize a Java array with size `max_nondet_array_length`, loop over elements and initialize - them + or reinitialize them \*******************************************************************/ void java_object_factoryt::gen_nondet_array_init( const exprt &expr, - const source_locationt &loc) + update_in_placet update_in_place) { + assert(update_in_place!=MAY_UPDATE_IN_PLACE); assert(expr.type().id()==ID_pointer); + const typet &type=ns.follow(expr.type().subtype()); const struct_typet &struct_type=to_struct_type(type); assert(expr.type().subtype().id()==ID_symbol); @@ -371,38 +505,61 @@ void java_object_factoryt::gen_nondet_array_init( auto max_length_expr=from_integer(max_nondet_array_length, java_int_type()); typet allocate_type; - symbolt &length_sym=new_tmp_symbol( - symbol_table, - loc, - java_int_type(), - "nondet_array_length"); - const auto &length_sym_expr=length_sym.symbol_expr(); - - // Initialize array with some undetermined length: - gen_nondet_init(length_sym_expr, false, irep_idt(), loc, false, false); - - // Insert assumptions to bound its length: - binary_relation_exprt - assume1(length_sym_expr, ID_ge, from_integer(0, java_int_type())); - binary_relation_exprt - assume2(length_sym_expr, ID_le, max_length_expr); - code_assumet assume_inst1(assume1); - code_assumet assume_inst2(assume2); - init_code.move_to_operands(assume_inst1); - init_code.move_to_operands(assume_inst2); - - side_effect_exprt java_new_array(ID_java_new_array, expr.type()); - java_new_array.copy_to_operands(length_sym_expr); - java_new_array.type().subtype().set(ID_C_element_type, element_type); - codet assign=code_assignt(expr, java_new_array); - assign.add_source_location()=loc; - init_code.copy_to_operands(assign); - - exprt init_array_expr= - member_exprt( - dereference_exprt(expr, expr.type().subtype()), - "data", - struct_type.components()[2].type()); + exprt length_sym_expr; + + if(update_in_place==NO_UPDATE_IN_PLACE) + { + const symbolt &length_sym=new_tmp_symbol( + symbol_table, + loc, + java_int_type(), + "nondet_array_length"); + length_sym_expr=length_sym.symbol_expr(); + + // Initialise array with some undetermined length: + gen_nondet_init( + length_sym_expr, + false, + irep_idt(), + false, + false, + false, + typet(), + NO_UPDATE_IN_PLACE); + + // Insert assumptions to bound its length: + binary_relation_exprt assume1(length_sym_expr, ID_ge, + from_integer(0, java_int_type())); + binary_relation_exprt assume2(length_sym_expr, ID_le, + max_length_expr); + + code_assumet assume_inst1(assume1); + code_assumet assume_inst2(assume2); + init_code.move_to_operands(assume_inst1); + init_code.move_to_operands(assume_inst2); + + side_effect_exprt java_new_array(ID_java_new_array, expr.type()); + java_new_array.copy_to_operands(length_sym_expr); + java_new_array.set(ID_skip_initialize, true); + java_new_array.type().subtype().set(ID_C_element_type, element_type); + codet assign=code_assignt(expr, java_new_array); + assign.add_source_location()=loc; + init_code.copy_to_operands(assign); + } + else + { + // Update in place. Get existing array length: + length_sym_expr= + member_exprt( + dereference_exprt(expr, expr.type().subtype()), + "length", + struct_type.component_type("length")); + } + + exprt init_array_expr=member_exprt( + dereference_exprt(expr, expr.type().subtype()), + "data", + struct_type.component_type("data")); if(init_array_expr.type()!=pointer_typet(element_type)) init_array_expr= typecast_exprt(init_array_expr, pointer_typet(element_type)); @@ -447,27 +604,36 @@ void java_object_factoryt::gen_nondet_array_init( init_code.move_to_operands(done_test); - // Add a redundant if(counter == max_length) break that is easier for the - // unwinder to understand. - code_ifthenelset max_test; - max_test.cond()=equal_exprt(counter_expr, max_length_expr); - max_test.then_case()=goto_done; - - init_code.move_to_operands(max_test); + if(update_in_place!=MUST_UPDATE_IN_PLACE) + { + // Add a redundant if(counter == max_length) break + // that is easier for the unwinder to understand. + code_ifthenelset max_test; + max_test.cond()=equal_exprt(counter_expr, max_length_expr); + max_test.then_case()=goto_done; + init_code.move_to_operands(max_test); + } exprt arraycellref=dereference_exprt( plus_exprt(array_init_symexpr, counter_expr, array_init_symexpr.type()), array_init_symexpr.type().subtype()); + // MUST_UPDATE_IN_PLACE only applies to this object. + // If this is a pointer to another object, offer the chance + // to leave it alone by setting MAY_UPDATE_IN_PLACE instead. + update_in_placet child_update_in_place= + update_in_place==MUST_UPDATE_IN_PLACE ? + MAY_UPDATE_IN_PLACE : + update_in_place; gen_nondet_init( arraycellref, false, irep_idt(), - loc, false, true, true, - element_type); + element_type, + child_update_in_place); exprt java_one=from_integer(1, java_int_type()); code_assignt incr(counter_expr, plus_exprt(counter_expr, java_one)); @@ -481,11 +647,32 @@ void java_object_factoryt::gen_nondet_array_init( Function: gen_nondet_init - Inputs: - - Outputs: - - Purpose: + Inputs: `expr`: lvalue expression to initialise + `loc`: source location for all generated code + `skip_classid`: if true, skip initialising @class_identifier + `create_dyn_objs`: if true, use malloc to allocate + objects; otherwise generate fresh static symbols. + `assume_non_null`: never initialise pointer members with + null, unless forced to by recursive datatypes + `message_handler`: logging + `max_nondet_array_length`: upper bound on size of initialised + arrays. + `update_in_place`: + NO_UPDATE_IN_PLACE: initialise `expr` from scratch + MUST_UPDATE_IN_PLACE: reinitialise an existing object + MAY_UPDATE_IN_PLACE: generate a runtime nondet branch + between the NO_ and MUST_ cases. + + Outputs: `init_code` gets an instruction sequence to initialise or + reinitialise `expr` and child objects it refers to. + `symbol_table` is modified with any new symbols created. This + includes any necessary temporaries, and if `create_dyn_objs` + is false, any allocated objects. + + Purpose: Initialises a primitive or object tree rooted at `expr`, + allocating child objects as necessary and nondet-initialising + their members, or if MAY_ or MUST_UPDATE_IN_PLACE is set, + re-initialising already-allocated objects. \*******************************************************************/ @@ -498,30 +685,35 @@ void gen_nondet_init( bool create_dyn_objs, bool assume_non_null, message_handlert &message_handler, - size_t max_nondet_array_length) + size_t max_nondet_array_length, + update_in_placet update_in_place) { java_object_factoryt state( init_code, assume_non_null, max_nondet_array_length, symbol_table, - message_handler); + message_handler, + loc); state.gen_nondet_init( expr, false, "", - loc, skip_classid, - create_dyn_objs); + create_dyn_objs, + false, + typet(), + update_in_place); } /*******************************************************************\ Function: new_tmp_symbol - Inputs: + Inputs: `prefix`: new symbol name prefix - Outputs: + Outputs: A fresh-named symbol is added to `symbol_table` and also + returned. Purpose: @@ -544,32 +736,23 @@ symbolt &new_tmp_symbol( /*******************************************************************\ -Function: get_nondet_bool - - Inputs: Desired type (C_bool or plain bool) - - Outputs: nondet expr of that type - - Purpose: - -\*******************************************************************/ - -exprt get_nondet_bool(const typet &type) -{ - // We force this to 0 and 1 and won't consider - // other values. - return typecast_exprt(side_effect_expr_nondett(bool_typet()), type); -} - -/*******************************************************************\ - Function: object_factory - Inputs: + Inputs: `type`: type of new object to create + `allow_null`: if true, may return null; otherwise always + allocates an object + `max_nondet_array_length`: upper bound on size of initialised + arrays. + `loc`: source location for all generated code + `message_handler`: logging - Outputs: + Outputs: `symbol_table` gains any new symbols created, as per + gen_nondet_init above. + `init_code` gains any instructions requried to initialise + either the returned value or its child objects - Purpose: + Purpose: Similar to `gen_nondet_init` above, but always creates a + fresh static global object or primitive nondet expression. \*******************************************************************/ @@ -592,7 +775,6 @@ exprt object_factory( exprt object=aux_symbol.symbol_expr(); - const namespacet ns(symbol_table); gen_nondet_init( object, init_code, diff --git a/src/java_bytecode/java_object_factory.h b/src/java_bytecode/java_object_factory.h index 6122d3e2780..9820efcb6cc 100644 --- a/src/java_bytecode/java_object_factory.h +++ b/src/java_bytecode/java_object_factory.h @@ -22,6 +22,13 @@ exprt object_factory( const source_locationt &, message_handlert &message_handler); +enum update_in_placet +{ + NO_UPDATE_IN_PLACE, + MAY_UPDATE_IN_PLACE, + MUST_UPDATE_IN_PLACE +}; + void gen_nondet_init( const exprt &expr, code_blockt &init_code, @@ -31,7 +38,8 @@ void gen_nondet_init( bool create_dynamic_objects, bool assume_non_null, message_handlert &message_handler, - size_t max_nondet_array_length=5); + size_t max_nondet_array_length, + update_in_placet update_in_place=NO_UPDATE_IN_PLACE); exprt get_nondet_bool(const typet &); diff --git a/src/java_bytecode/java_types.cpp b/src/java_bytecode/java_types.cpp index 735a2a0dbfb..9d78c7a36a7 100644 --- a/src/java_bytecode/java_types.cpp +++ b/src/java_bytecode/java_types.cpp @@ -9,6 +9,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include #include #include #include @@ -231,6 +232,23 @@ pointer_typet java_array_type(const char subtype) /*******************************************************************\ +Function: is_java_array_tag + + Inputs: Struct tag 'tag' + + Outputs: True if the given struct is a Java array + + Purpose: See above + +\*******************************************************************/ + +bool is_java_array_tag(const irep_idt& tag) +{ + return has_prefix(id2string(tag), "java::array["); +} + +/*******************************************************************\ + Function: is_reference_type Inputs: diff --git a/src/java_bytecode/java_types.h b/src/java_bytecode/java_types.h index e283a5acf23..2b32d5fec43 100644 --- a/src/java_bytecode/java_types.h +++ b/src/java_bytecode/java_types.h @@ -44,4 +44,6 @@ char java_char_from_type(const typet &type); typet java_bytecode_promotion(const typet &); exprt java_bytecode_promotion(const exprt &); +bool is_java_array_tag(const irep_idt& tag); + #endif // CPROVER_JAVA_BYTECODE_JAVA_TYPES_H diff --git a/src/java_bytecode/library/src/org/cprover/CProver.java b/src/java_bytecode/library/src/org/cprover/CProver.java new file mode 100644 index 00000000000..72c3eeb1d70 --- /dev/null +++ b/src/java_bytecode/library/src/org/cprover/CProver.java @@ -0,0 +1,11 @@ +package org.cprover; + +public final class CProver +{ + public static boolean enableAssume=true; + public static void assume(boolean condition) + { + if(enableAssume) + throw new RuntimeException("Cannot execute program with CProver.assume()"); + } +} diff --git a/src/langapi/language_ui.cpp b/src/langapi/language_ui.cpp index 95b5d104a4f..4b0f3fa0d75 100644 --- a/src/langapi/language_ui.cpp +++ b/src/langapi/language_ui.cpp @@ -183,6 +183,10 @@ bool language_uit::final() { language_files.set_message_handler(*message_handler); + // Enable/disable stub generation for opaque methods + bool stubs_enabled=_cmdline.isset("generate-opaque-stubs"); + language_files.set_should_generate_opaque_method_stubs(stubs_enabled); + if(language_files.final(symbol_table)) { if(get_ui()==ui_message_handlert::PLAIN) diff --git a/src/solvers/refinement/refined_string_type.cpp b/src/solvers/refinement/refined_string_type.cpp deleted file mode 100644 index 012d30eb8de..00000000000 --- a/src/solvers/refinement/refined_string_type.cpp +++ /dev/null @@ -1,145 +0,0 @@ -/********************************************************************\ - -Module: Type for string expressions used by the string solver. - These string expressions contain a field `length`, of type - `index_type`, a field `content` of type `content_type`. - This module also defines functions to recognise the C and java - string types. - -Author: Romain Brenguier, romain.brenguier@diffblue.com - -\*******************************************************************/ - -#include - -#include "refined_string_type.h" - -/*******************************************************************\ - -Constructor: refined_string_typet::refined_string_typet - - Inputs: type of characters - -\*******************************************************************/ - -refined_string_typet::refined_string_typet( - const typet &index_type, const typet &char_type) -{ - infinity_exprt infinite_index(index_type); - array_typet char_array(char_type, infinite_index); - components().emplace_back("length", index_type); - components().emplace_back("content", char_array); -} - -/*******************************************************************\ - -Function: refined_string_typet::is_c_string_type - - Inputs: a type - - Outputs: Boolean telling whether the type is that of C strings - -\*******************************************************************/ - -bool refined_string_typet::is_c_string_type(const typet &type) -{ - return - type.id()==ID_struct && - to_struct_type(type).get_tag()==CPROVER_PREFIX"string"; -} - -/*******************************************************************\ - -Function: refined_string_typet::is_java_string_pointer_type - - Inputs: a type - - Outputs: Boolean telling whether the type is that of java string pointers - -\*******************************************************************/ - -bool refined_string_typet::is_java_string_pointer_type(const typet &type) -{ - if(type.id()==ID_pointer) - { - const pointer_typet &pt=to_pointer_type(type); - const typet &subtype=pt.subtype(); - return is_java_string_type(subtype); - } - return false; -} - -/*******************************************************************\ - -Function: refined_string_typet::is_java_string_type - - Inputs: a type - - Outputs: Boolean telling whether the type is that of java string - -\*******************************************************************/ - -bool refined_string_typet::is_java_string_type(const typet &type) -{ - if(type.id()==ID_symbol) - { - irep_idt tag=to_symbol_type(type).get_identifier(); - return tag=="java::java.lang.String"; - } - else if(type.id()==ID_struct) - { - irep_idt tag=to_struct_type(type).get_tag(); - return tag=="java.lang.String"; - } - return false; -} - -/*******************************************************************\ - -Function: refined_string_typet::is_java_string_builder_type - - Inputs: a type - - Outputs: Boolean telling whether the type is that of java string builder - -\*******************************************************************/ - -bool refined_string_typet::is_java_string_builder_type(const typet &type) -{ - if(type.id()==ID_pointer) - { - const pointer_typet &pt=to_pointer_type(type); - const typet &subtype=pt.subtype(); - if(subtype.id()==ID_struct) - { - irep_idt tag=to_struct_type(subtype).get_tag(); - return tag=="java.lang.StringBuilder"; - } - } - return false; -} - -/*******************************************************************\ - -Function: refined_string_typet::is_java_char_sequence_type - - Inputs: a type - - Outputs: Boolean telling whether the type is that of java char sequence - -\*******************************************************************/ - -bool refined_string_typet::is_java_char_sequence_type(const typet &type) -{ - if(type.id()==ID_pointer) - { - const pointer_typet &pt=to_pointer_type(type); - const typet &subtype=pt.subtype(); - if(subtype.id()==ID_struct) - { - const irep_idt &tag=to_struct_type(subtype).get_tag(); - return tag=="java.lang.CharSequence"; - } - } - return false; -} diff --git a/src/solvers/refinement/string_constraint.h b/src/solvers/refinement/string_constraint.h index 61a6f3ebdd6..1bb460349b7 100644 --- a/src/solvers/refinement/string_constraint.h +++ b/src/solvers/refinement/string_constraint.h @@ -15,7 +15,7 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #include #include -#include +#include class string_constraintt: public exprt { @@ -48,7 +48,6 @@ class string_constraintt: public exprt return operands()[4]; } - private: string_constraintt(); diff --git a/src/solvers/refinement/string_constraint_generator.h b/src/solvers/refinement/string_constraint_generator.h index 2a56f09fddd..12447e0cf04 100644 --- a/src/solvers/refinement/string_constraint_generator.h +++ b/src/solvers/refinement/string_constraint_generator.h @@ -14,7 +14,8 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #define CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_GENERATOR_H #include -#include +#include +#include #include class string_constraint_generatort @@ -65,21 +66,20 @@ class string_constraint_generatort symbol_exprt fresh_univ_index(const irep_idt &prefix, const typet &type); symbol_exprt fresh_boolean(const irep_idt &prefix); string_exprt fresh_string(const refined_string_typet &type); + string_exprt get_string_expr(const exprt &expr); + string_exprt convert_java_string_to_string_exprt( + const exprt &underlying); + plus_exprt plus_exprt_with_overflow_check(const exprt &op1, const exprt &op2); - // We maintain a map from symbols to strings. - std::map symbol_to_string; + // Maps unresolved symbols to the string_exprt that was created for them + std::map unresolved_symbols; - string_exprt find_or_add_string_of_symbol(const symbol_exprt &sym); - void assign_to_symbol( - const symbol_exprt &sym, const string_exprt &expr) - { - symbol_to_string[sym.get_identifier()]=expr; - } + string_exprt find_or_add_string_of_symbol( + const symbol_exprt &sym, + const refined_string_typet &ref_type); - string_exprt add_axioms_for_string_expr(const exprt &expr); - void set_string_symbol_equal_to_expr( - const symbol_exprt &sym, const exprt &str); + string_exprt add_axioms_for_refined_string(const exprt &expr); exprt add_axioms_for_function_application( const function_application_exprt &expr); @@ -96,6 +96,8 @@ class string_constraint_generatort const std::size_t MAX_FLOAT_LENGTH=15; const std::size_t MAX_DOUBLE_LENGTH=30; + std::map function_application_cache; + static irep_idt extract_java_string(const symbol_exprt &s); exprt axiom_for_is_positive_index(const exprt &x); @@ -117,6 +119,9 @@ class string_constraint_generatort // The specification is partial: the actual value is not actually computed // but we ensure that hash codes of equal strings are equal. exprt add_axioms_for_hash_code(const function_application_exprt &f); + // To each string on which hash_code was called we associate a symbol + // representing the return value of the hash_code function. + std::map hash_code_of_string; exprt add_axioms_for_is_empty(const function_application_exprt &f); exprt add_axioms_for_is_prefix( @@ -224,7 +229,9 @@ class string_constraint_generatort // the start for negative number string_exprt add_axioms_from_float(const function_application_exprt &f); string_exprt add_axioms_from_float( - const exprt &f, bool double_precision=false); + const exprt &f, + const refined_string_typet &ref_type, + bool double_precision); // Add axioms corresponding to the String.valueOf(D) java function // TODO: the specifications is only partial @@ -253,6 +260,7 @@ class string_constraint_generatort string_exprt add_axioms_for_code_point( const exprt &code_point, const refined_string_typet &ref_type); string_exprt add_axioms_for_java_char_array(const exprt &char_array); + exprt add_axioms_for_char_pointer(const function_application_exprt &fun); string_exprt add_axioms_for_if(const if_exprt &expr); exprt add_axioms_for_char_literal(const function_application_exprt &f); @@ -270,6 +278,8 @@ class string_constraint_generatort const function_application_exprt &f); exprt add_axioms_for_parse_int(const function_application_exprt &f); + exprt add_axioms_for_correct_number_format( + const string_exprt &str, std::size_t max_size=10); exprt add_axioms_for_to_char_array(const function_application_exprt &f); exprt add_axioms_for_compare_to(const function_application_exprt &f); @@ -278,6 +288,9 @@ class string_constraint_generatort // string pointers symbol_exprt add_axioms_for_intern(const function_application_exprt &f); + // Pool used for the intern method + std::map intern_of_string; + // Tells which language is used. C and Java are supported irep_idt mode; @@ -293,14 +306,8 @@ class string_constraint_generatort exprt int_of_hex_char(const exprt &chr) const; exprt is_high_surrogate(const exprt &chr) const; exprt is_low_surrogate(const exprt &chr) const; - static exprt character_equals_ignore_case( + exprt character_equals_ignore_case( exprt char1, exprt char2, exprt char_a, exprt char_A, exprt char_Z); - - // Pool used for the intern method - std::map pool; - - // Used to determine whether hashcode should be equal - std::map hash; }; #endif diff --git a/src/solvers/refinement/string_constraint_generator_code_points.cpp b/src/solvers/refinement/string_constraint_generator_code_points.cpp index 7c035a0d3e4..16763dfc97c 100644 --- a/src/solvers/refinement/string_constraint_generator_code_points.cpp +++ b/src/solvers/refinement/string_constraint_generator_code_points.cpp @@ -160,17 +160,18 @@ exprt string_constraint_generatort::add_axioms_for_code_point_at( { typet return_type=f.type(); assert(return_type.id()==ID_signedbv); - string_exprt str=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt str=get_string_expr(args(f, 2)[0]); const exprt &pos=args(f, 2)[1]; symbol_exprt result=fresh_symbol("char", return_type); exprt index1=from_integer(1, str.length().type()); const exprt &char1=str[pos]; - const exprt &char2=str[plus_exprt(pos, index1)]; + const exprt &char2=str[plus_exprt_with_overflow_check(pos, index1)]; exprt char1_as_int=typecast_exprt(char1, return_type); exprt char2_as_int=typecast_exprt(char2, return_type); exprt pair=pair_value(char1_as_int, char2_as_int, return_type); - exprt is_low=is_low_surrogate(str[plus_exprt(pos, index1)]); + exprt is_low=is_low_surrogate( + str[plus_exprt_with_overflow_check(pos, index1)]); exprt return_pair=and_exprt(is_high_surrogate(str[pos]), is_low); axioms.push_back(implies_exprt(return_pair, equal_exprt(result, pair))); @@ -199,7 +200,7 @@ exprt string_constraint_generatort::add_axioms_for_code_point_before( typet return_type=f.type(); assert(return_type.id()==ID_signedbv); symbol_exprt result=fresh_symbol("char", return_type); - string_exprt str=add_axioms_for_string_expr(args[0]); + string_exprt str=get_string_expr(args[0]); const exprt &char1= str[minus_exprt(args[1], from_integer(2, str.length().type()))]; @@ -234,7 +235,7 @@ Function: string_constraint_generatort::add_axioms_for_code_point_count exprt string_constraint_generatort::add_axioms_for_code_point_count( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt str=get_string_expr(args(f, 3)[0]); const exprt &begin=args(f, 3)[1]; const exprt &end=args(f, 3)[2]; const typet &return_type=f.type(); @@ -265,14 +266,15 @@ Function: string_constraint_generatort::add_axioms_for_offset_by_code_point exprt string_constraint_generatort::add_axioms_for_offset_by_code_point( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt str=get_string_expr(args(f, 3)[0]); const exprt &index=args(f, 3)[1]; const exprt &offset=args(f, 3)[2]; const typet &return_type=f.type(); symbol_exprt result=fresh_symbol("offset_by_code_point", return_type); - exprt minimum=plus_exprt(index, offset); - exprt maximum=plus_exprt(index, plus_exprt(offset, offset)); + exprt minimum=plus_exprt_with_overflow_check(index, offset); + exprt maximum=plus_exprt_with_overflow_check( + index, plus_exprt_with_overflow_check(offset, offset)); axioms.push_back(binary_relation_exprt(result, ID_le, maximum)); axioms.push_back(binary_relation_exprt(result, ID_ge, minimum)); diff --git a/src/solvers/refinement/string_constraint_generator_comparison.cpp b/src/solvers/refinement/string_constraint_generator_comparison.cpp index 321282ee9c1..a51e2abe3cd 100644 --- a/src/solvers/refinement/string_constraint_generator_comparison.cpp +++ b/src/solvers/refinement/string_constraint_generator_comparison.cpp @@ -18,7 +18,9 @@ Function: string_constraint_generatort::add_axioms_for_equals Outputs: a expression of Boolean type Purpose: add axioms stating that the result is true exactly when the strings - represented by the arguments are equal + represented by the arguments are equal. + the variable ending in `witness_unequal` is -1 if the length differs + or an index at which the strings are different \*******************************************************************/ @@ -29,8 +31,8 @@ exprt string_constraint_generatort::add_axioms_for_equals( symbol_exprt eq=fresh_boolean("equal"); typecast_exprt tc_eq(eq, f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_for_string_expr(args(f, 2)[1]); + string_exprt s1=get_string_expr(args(f, 2)[0]); + string_exprt s2=get_string_expr(args(f, 2)[1]); typet index_type=s1.length().type(); // We want to write: @@ -54,9 +56,10 @@ exprt string_constraint_generatort::add_axioms_for_equals( binary_relation_exprt(witness, ID_lt, s1.length()), binary_relation_exprt(witness, ID_ge, zero)); and_exprt witnessing(bound_witness, notequal_exprt(s1[witness], s2[witness])); - implies_exprt a3( - not_exprt(eq), - or_exprt(notequal_exprt(s1.length(), s2.length()), witnessing)); + and_exprt diff_length( + notequal_exprt(s1.length(), s2.length()), + equal_exprt(witness, from_integer(-1, index_type))); + implies_exprt a3(not_exprt(eq), or_exprt(diff_length, witnessing)); axioms.push_back(a3); return tc_eq; @@ -92,8 +95,13 @@ exprt string_constraint_generatort::character_equals_ignore_case( // p3 : (is_up2&&'a'-'A'+char2=char1) equal_exprt p1(char1, char2); minus_exprt diff=minus_exprt(char_a, char_A); - and_exprt p2(is_upper_case_1, equal_exprt(plus_exprt(diff, char1), char2)); - and_exprt p3(is_upper_case_2, equal_exprt(plus_exprt(diff, char2), char1)); + + // Overflow is not a problem here because is_upper_case conditions + // ensure that we are within a safe range. + and_exprt p2(is_upper_case_1, + equal_exprt(plus_exprt(diff, char1), char2)); + and_exprt p3(is_upper_case_2, + equal_exprt(plus_exprt(diff, char2), char1)); return or_exprt(or_exprt(p1, p2), p3); } @@ -116,8 +124,8 @@ exprt string_constraint_generatort::add_axioms_for_equals_ignore_case( symbol_exprt eq=fresh_boolean("equal_ignore_case"); typecast_exprt tc_eq(eq, f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_for_string_expr(args(f, 2)[1]); + string_exprt s1=get_string_expr(args(f, 2)[0]); + string_exprt s2=get_string_expr(args(f, 2)[1]); typet char_type=to_refined_string_type(s1.type()).get_char_type(); exprt char_a=constant_char('a', char_type); exprt char_A=constant_char('A', char_type); @@ -174,37 +182,35 @@ Function: string_constraint_generatort::add_axioms_for_hash_code exprt string_constraint_generatort::add_axioms_for_hash_code( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt str=get_string_expr(args(f, 1)[0]); typet return_type=f.type(); typet index_type=str.length().type(); - // initialisation of the missing pool variable - std::map::iterator it; - for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) - if(hash.find(it->second)==hash.end()) - hash[it->second]=fresh_symbol("hash", return_type); + auto pair=hash_code_of_string.insert( + std::make_pair(str, fresh_symbol("hash", return_type))); + exprt hash=pair.first->second; // for each string s. either: // c1: hash(str)=hash(s) // c2: |str|!=|s| - // c3: (|str|==|s| &&exists i<|s|. s[i]!=str[i]) + // c3: (|str|==|s| && exists i<|s|. s[i]!=str[i]) // WARNING: the specification may be incomplete - for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) + for(auto it : hash_code_of_string) { symbol_exprt i=fresh_exist_index("index_hash", index_type); - equal_exprt c1(hash[it->second], hash[str]); - not_exprt c2(equal_exprt(it->second.length(), str.length())); + equal_exprt c1(it.second, hash); + not_exprt c2(equal_exprt(it.first.length(), str.length())); and_exprt c3( - equal_exprt(it->second.length(), str.length()), + equal_exprt(it.first.length(), str.length()), and_exprt( - not_exprt(equal_exprt(str[i], it->second[i])), + not_exprt(equal_exprt(str[i], it.first[i])), and_exprt( str.axiom_for_is_strictly_longer_than(i), axiom_for_is_positive_index(i)))); axioms.push_back(or_exprt(c1, or_exprt(c2, c3))); } - return hash[str]; + return hash; } /*******************************************************************\ @@ -222,8 +228,8 @@ Function: string_constraint_generatort::add_axioms_for_compare_to exprt string_constraint_generatort::add_axioms_for_compare_to( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_for_string_expr(args(f, 2)[1]); + string_exprt s1=get_string_expr(args(f, 2)[0]); + string_exprt s2=get_string_expr(args(f, 2)[1]); const typet &return_type=f.type(); symbol_exprt res=fresh_symbol("compare_to", return_type); typet index_type=s1.length().type(); @@ -234,12 +240,12 @@ exprt string_constraint_generatort::add_axioms_for_compare_to( // a1 : res==0 => |s1|=|s2| // a2 : forall i<|s1|. s1[i]==s2[i] // a3 : exists x. - // res!=0 ==> x> 0 && - // ((|s1| <= |s2| &&x<|s1|) || (|s1| >= |s2| &&x<|s2|) - // &&res=s1[x]-s2[x] ) - // || cond2: - // (|s1|<|s2| &&x=|s1|) || (|s1| > |s2| &&x=|s2|) &&res=|s1|-|s2|) - // a4 : forall i s1[i]=s2[i] + // res!=0 ==> x > 0 + // && ((|s1| <= |s2| && x<|s1|) || (|s1| >= |s2| &&x<|s2|) + // && res=s1[x]-s2[x] ) + // || cond2: + // (|s1|<|s2| &&x=|s1|) || (|s1| > |s2| &&x=|s2|) &&res=|s1|-|s2|) + // a4 : forall i' s1[i]=s2[i] assert(return_type.id()==ID_signedbv); @@ -282,7 +288,9 @@ exprt string_constraint_generatort::add_axioms_for_compare_to( or_exprt(cond1, cond2))); axioms.push_back(a3); - string_constraintt a4(i, x, not_exprt(res_null), equal_exprt(s1[i], s2[i])); + symbol_exprt i2=fresh_univ_index("QA_compare_to", index_type); + string_constraintt a4( + i2, x, not_exprt(res_null), equal_exprt(s1[i2], s2[i2])); axioms.push_back(a4); return res; @@ -304,15 +312,15 @@ Function: string_constraint_generatort::add_axioms_for_intern symbol_exprt string_constraint_generatort::add_axioms_for_intern( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt str=get_string_expr(args(f, 1)[0]); + // For now we only enforce content equality and not pointer equality const typet &return_type=f.type(); + typet index_type=str.length().type(); - // initialisation of the missing pool variable - std::map::iterator it; - for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) - if(pool.find(it->second)==pool.end()) - pool[it->second]=fresh_symbol("pool", return_type); + auto pair=intern_of_string.insert( + std::make_pair(str, fresh_symbol("pool", return_type))); + symbol_exprt intern=pair.first->second; // intern(str)=s_0 || s_1 || ... // for each string s. @@ -320,30 +328,30 @@ symbol_exprt string_constraint_generatort::add_axioms_for_intern( // || (|str|==|s| &&exists i<|s|. s[i]!=str[i]) exprt disj=false_exprt(); - for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) + for(auto it : intern_of_string) disj=or_exprt( - disj, equal_exprt(pool[str], symbol_exprt(it->first, return_type))); + disj, equal_exprt(intern, it.second)); axioms.push_back(disj); // WARNING: the specification may be incomplete or incorrect - for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) - if(it->second!=str) + for(auto it : intern_of_string) + if(it.second!=str) { symbol_exprt i=fresh_exist_index("index_intern", index_type); axioms.push_back( or_exprt( - equal_exprt(pool[it->second], pool[str]), + equal_exprt(it.second, intern), or_exprt( - not_exprt(str.axiom_for_has_same_length_as(it->second)), + not_exprt(str.axiom_for_has_same_length_as(it.first)), and_exprt( - str.axiom_for_has_same_length_as(it->second), + str.axiom_for_has_same_length_as(it.first), and_exprt( - not_exprt(equal_exprt(str[i], it->second[i])), + not_exprt(equal_exprt(str[i], it.first[i])), and_exprt(str.axiom_for_is_strictly_longer_than(i), axiom_for_is_positive_index(i))))))); } - return pool[str]; + return intern; } diff --git a/src/solvers/refinement/string_constraint_generator_concat.cpp b/src/solvers/refinement/string_constraint_generator_concat.cpp index a7d9c4921c4..fb8e24d5320 100644 --- a/src/solvers/refinement/string_constraint_generator_concat.cpp +++ b/src/solvers/refinement/string_constraint_generator_concat.cpp @@ -35,9 +35,7 @@ string_exprt string_constraint_generatort::add_axioms_for_concat( // a4 : forall i<|s1|. res[i]=s1[i] // a5 : forall i<|s2|. res[i+|s1|]=s2[i] - exprt a1=res.axiom_for_has_length( - plus_exprt(s1.length(), s2.length())); - axioms.push_back(a1); + res.length()=plus_exprt_with_overflow_check(s1.length(), s2.length()); axioms.push_back(s1.axiom_for_is_shorter_than(res)); axioms.push_back(s2.axiom_for_is_shorter_than(res)); @@ -73,8 +71,8 @@ string_exprt string_constraint_generatort::add_axioms_for_concat( const function_application_exprt::argumentst &args=f.arguments(); assert(args.size()==2); - string_exprt s1=add_axioms_for_string_expr(args[0]); - string_exprt s2=add_axioms_for_string_expr(args[1]); + string_exprt s1=get_string_expr(args[0]); + string_exprt s2=get_string_expr(args[1]); return add_axioms_for_concat(s1, s2); } @@ -95,7 +93,7 @@ string_exprt string_constraint_generatort::add_axioms_for_concat_int( const function_application_exprt &f) { const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[0]); string_exprt s2=add_axioms_from_int( args(f, 2)[1], MAX_INTEGER_LENGTH, ref_type); return add_axioms_for_concat(s1, s2); @@ -118,7 +116,7 @@ string_exprt string_constraint_generatort::add_axioms_for_concat_long( const function_application_exprt &f) { const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[0]); string_exprt s2=add_axioms_from_int(args(f, 2)[1], MAX_LONG_LENGTH, ref_type); return add_axioms_for_concat(s1, s2); } @@ -138,7 +136,7 @@ Function: string_constraint_generatort::add_axioms_for_concat_bool string_exprt string_constraint_generatort::add_axioms_for_concat_bool( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[0]); const refined_string_typet &ref_type=to_refined_string_type(s1.type()); string_exprt s2=add_axioms_from_bool(args(f, 2)[1], ref_type); return add_axioms_for_concat(s1, s2); @@ -159,7 +157,7 @@ Function: string_constraint_generatort::add_axioms_for_concat_char string_exprt string_constraint_generatort::add_axioms_for_concat_char( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[0]); const refined_string_typet &ref_type=to_refined_string_type(s1.type()); string_exprt s2=add_axioms_from_char(args(f, 2)[1], ref_type); return add_axioms_for_concat(s1, s2); @@ -180,9 +178,10 @@ Function: string_constraint_generatort::add_axioms_for_concat_double string_exprt string_constraint_generatort::add_axioms_for_concat_double( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_from_float( - args(f, 2)[1], MAX_DOUBLE_LENGTH); + string_exprt s1=get_string_expr(args(f, 2)[0]); + assert(refined_string_typet::is_refined_string_type(f.type())); + refined_string_typet ref_type=to_refined_string_type(f.type()); + string_exprt s2=add_axioms_from_float(args(f, 2)[1], ref_type, true); return add_axioms_for_concat(s1, s2); } @@ -201,8 +200,10 @@ Function: string_constraint_generatort::add_axioms_for_concat_float string_exprt string_constraint_generatort::add_axioms_for_concat_float( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_from_float(args(f, 2)[1], MAX_FLOAT_LENGTH); + string_exprt s1=get_string_expr(args(f, 2)[0]); + assert(refined_string_typet::is_refined_string_type(f.type())); + refined_string_typet ref_type=to_refined_string_type(f.type()); + string_exprt s2=add_axioms_from_float(args(f, 2)[1], ref_type, false); return add_axioms_for_concat(s1, s2); } @@ -222,7 +223,7 @@ Function: string_constraint_generatort::add_axioms_for_concat_code_point string_exprt string_constraint_generatort::add_axioms_for_concat_code_point( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[0]); const refined_string_typet &ref_type=to_refined_string_type(s1.type()); string_exprt s2=add_axioms_for_code_point(args(f, 2)[1], ref_type); return add_axioms_for_concat(s1, s2); diff --git a/src/solvers/refinement/string_constraint_generator_constants.cpp b/src/solvers/refinement/string_constraint_generator_constants.cpp index 90769747949..2905c6a5f21 100644 --- a/src/solvers/refinement/string_constraint_generator_constants.cpp +++ b/src/solvers/refinement/string_constraint_generator_constants.cpp @@ -93,9 +93,13 @@ string_exprt string_constraint_generatort::add_axioms_for_empty_string( const function_application_exprt &f) { assert(f.arguments().empty()); + assert(refined_string_typet::is_refined_string_type(f.type())); const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt res=fresh_string(ref_type); - axioms.push_back(res.axiom_for_has_length(0)); + exprt size=from_integer(0, ref_type.get_index_type()); + const array_typet &content_type=ref_type.get_content_type(); + array_of_exprt empty_array( + from_integer(0, ref_type.get_content_type().subtype()), content_type); + string_exprt res(size, empty_array, ref_type); return res; } @@ -132,7 +136,8 @@ string_exprt string_constraint_generatort::add_axioms_from_literal( else { // Java string constant - assert(refined_string_typet::is_unrefined_string_type(arg.type())); + assert(false); // TODO: Check if used. On the contrary, discard else. + assert(arg.id()==ID_symbol); const exprt &s=arg.op0(); // It seems the value of the string is lost, diff --git a/src/solvers/refinement/string_constraint_generator_indexof.cpp b/src/solvers/refinement/string_constraint_generator_indexof.cpp index 93db048458b..d799091a573 100644 --- a/src/solvers/refinement/string_constraint_generator_indexof.cpp +++ b/src/solvers/refinement/string_constraint_generator_indexof.cpp @@ -92,15 +92,16 @@ exprt string_constraint_generatort::add_axioms_for_index_of_string( symbol_exprt contains=fresh_boolean("contains_substring"); // We add axioms: - // a1 : contains => |substring|>=offset>=from_index + // a1 : contains => |str|-|substring|>=offset>=from_index // a2 : !contains => offset=-1 - // a3 : forall 0 <= witness str[witness+offset]=substring[witness] + // a3 : forall 0<=witness<|substring|. + // contains => str[witness+offset]=substring[witness] implies_exprt a1( contains, and_exprt( - str.axiom_for_is_longer_than(plus_exprt(substring.length(), offset)), + str.axiom_for_is_longer_than(plus_exprt_with_overflow_check( + substring.length(), offset)), binary_relation_exprt(offset, ID_ge, from_index))); axioms.push_back(a1); @@ -138,7 +139,8 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of_string( implies_exprt a1( contains, and_exprt( - str.axiom_for_is_longer_than(plus_exprt(substring.length(), offset)), + str.axiom_for_is_longer_than( + plus_exprt_with_overflow_check(substring.length(), offset)), binary_relation_exprt(offset, ID_le, from_index))); axioms.push_back(a1); @@ -173,7 +175,7 @@ exprt string_constraint_generatort::add_axioms_for_index_of( const function_application_exprt &f) { const function_application_exprt::argumentst &args=f.arguments(); - string_exprt str=add_axioms_for_string_expr(args[0]); + string_exprt str=get_string_expr(args[0]); const exprt &c=args[1]; const refined_string_typet &ref_type=to_refined_string_type(str.type()); assert(f.type()==ref_type.get_index_type()); @@ -186,14 +188,17 @@ exprt string_constraint_generatort::add_axioms_for_index_of( else assert(false); - if(refined_string_typet::is_java_string_pointer_type(c.type())) + if(c.type().id()==ID_unsignedbv || c.type().id()==ID_signedbv) { - string_exprt sub=add_axioms_for_string_expr(c); - return add_axioms_for_index_of_string(str, sub, from_index); - } - else return add_axioms_for_index_of( str, typecast_exprt(c, ref_type.get_char_type()), from_index); + } + else + { + assert(refined_string_typet::is_refined_string_type(c.type())); + string_exprt sub=get_string_expr(c); + return add_axioms_for_index_of_string(str, sub, from_index); + } } exprt string_constraint_generatort::add_axioms_for_last_index_of( @@ -213,7 +218,7 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( exprt index1=from_integer(1, index_type); exprt minus1=from_integer(-1, index_type); - exprt from_index_plus_one=plus_exprt(from_index, index1); + exprt from_index_plus_one=plus_exprt_with_overflow_check(from_index, index1); and_exprt a1( binary_relation_exprt(index, ID_ge, minus1), binary_relation_exprt(index, ID_lt, from_index_plus_one)); @@ -267,7 +272,7 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( const function_application_exprt &f) { const function_application_exprt::argumentst &args=f.arguments(); - string_exprt str=add_axioms_for_string_expr(args[0]); + string_exprt str=get_string_expr(args[0]); exprt c=args[1]; const refined_string_typet &ref_type=to_refined_string_type(str.type()); exprt from_index; @@ -280,12 +285,14 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( else assert(false); - if(refined_string_typet::is_java_string_pointer_type(c.type())) + if(c.type().id()==ID_unsignedbv || c.type().id()==ID_signedbv) { - string_exprt sub=add_axioms_for_string_expr(c); - return add_axioms_for_last_index_of_string(str, sub, from_index); - } - else return add_axioms_for_last_index_of( str, typecast_exprt(c, ref_type.get_char_type()), from_index); + } + else + { + string_exprt sub=get_string_expr(c); + return add_axioms_for_last_index_of_string(str, sub, from_index); + } } diff --git a/src/solvers/refinement/string_constraint_generator_insert.cpp b/src/solvers/refinement/string_constraint_generator_insert.cpp index 6a080a5dc6c..cd5f4376a93 100644 --- a/src/solvers/refinement/string_constraint_generator_insert.cpp +++ b/src/solvers/refinement/string_constraint_generator_insert.cpp @@ -48,8 +48,8 @@ Function: string_constraint_generatort::add_axioms_for_insert string_exprt string_constraint_generatort::add_axioms_for_insert( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); - string_exprt s2=add_axioms_for_string_expr(args(f, 3)[2]); + string_exprt s1=get_string_expr(args(f, 3)[0]); + string_exprt s2=get_string_expr(args(f, 3)[2]); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -70,7 +70,7 @@ string_exprt string_constraint_generatort::add_axioms_for_insert_int( const function_application_exprt &f) { const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s1=get_string_expr(args(f, 3)[0]); string_exprt s2=add_axioms_from_int( args(f, 3)[2], MAX_INTEGER_LENGTH, ref_type); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); @@ -93,7 +93,7 @@ string_exprt string_constraint_generatort::add_axioms_for_insert_long( const function_application_exprt &f) { const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s1=get_string_expr(args(f, 3)[0]); string_exprt s2=add_axioms_from_int(args(f, 3)[2], MAX_LONG_LENGTH, ref_type); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -115,7 +115,7 @@ string_exprt string_constraint_generatort::add_axioms_for_insert_bool( const function_application_exprt &f) { const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s1=get_string_expr(args(f, 3)[0]); string_exprt s2=add_axioms_from_bool(args(f, 3)[2], ref_type); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -136,7 +136,7 @@ Function: string_constraint_generatort::add_axioms_for_insert_char string_exprt string_constraint_generatort::add_axioms_for_insert_char( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s1=get_string_expr(args(f, 3)[0]); const refined_string_typet &ref_type=to_refined_string_type(s1.type()); string_exprt s2=add_axioms_from_char(args(f, 3)[2], ref_type); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); @@ -158,8 +158,9 @@ Function: string_constraint_generatort::add_axioms_for_insert_double string_exprt string_constraint_generatort::add_axioms_for_insert_double( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); - string_exprt s2=add_axioms_from_float(args(f, 3)[2]); + string_exprt s1=get_string_expr(args(f, 3)[0]); + const refined_string_typet &ref_type=to_refined_string_type(s1.type()); + string_exprt s2=add_axioms_from_float(args(f, 3)[2], ref_type, true); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -179,8 +180,9 @@ Function: string_constraint_generatort::add_axioms_for_insert_float string_exprt string_constraint_generatort::add_axioms_for_insert_float( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); - string_exprt s2=add_axioms_from_float(args(f, 3)[2]); + string_exprt s1=get_string_expr(args(f, 3)[0]); + const refined_string_typet &ref_type=to_refined_string_type(s1.type()); + string_exprt s2=add_axioms_from_float(args(f, 3)[2], ref_type, false); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -216,7 +218,7 @@ string_exprt string_constraint_generatort::add_axioms_for_insert_char_array( offset=from_integer(0, count.type()); } - string_exprt str=add_axioms_for_string_expr(f.arguments()[0]); + string_exprt str=get_string_expr(f.arguments()[0]); const exprt &length=f.arguments()[2]; const exprt &data=f.arguments()[3]; string_exprt arr=add_axioms_from_char_array( diff --git a/src/solvers/refinement/string_constraint_generator_main.cpp b/src/solvers/refinement/string_constraint_generator_main.cpp index 5a269e5ad3b..e4292da542b 100644 --- a/src/solvers/refinement/string_constraint_generator_main.cpp +++ b/src/solvers/refinement/string_constraint_generator_main.cpp @@ -16,6 +16,7 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #include #include #include +#include unsigned string_constraint_generatort::next_symbol_id=1; @@ -122,6 +123,42 @@ symbol_exprt string_constraint_generatort::fresh_boolean( /*******************************************************************\ +Function: string_constraint_generatort::plus_exprt_with_overflow_check + + Inputs: + op1 - First term of the sum + op2 - Second term of the sum + + Outputs: A plus expression representing the sum of the arguments + + Purpose: Create a plus expression while adding extra constraints to + axioms in order to prevent overflows. + +\*******************************************************************/ +plus_exprt string_constraint_generatort::plus_exprt_with_overflow_check( + const exprt &op1, const exprt &op2) +{ + plus_exprt sum(plus_exprt(op1, op2)); + + exprt zero=from_integer(0, op1.type()); + + binary_relation_exprt neg1(op1, ID_lt, zero); + binary_relation_exprt neg2(op2, ID_lt, zero); + binary_relation_exprt neg_sum(sum, ID_lt, zero); + + // We prevent overflows by adding the following constraint: + // If the signs of the two operands are the same, then the sign of the sum + // should also be the same. + implies_exprt no_overflow(equal_exprt(neg1, neg2), + equal_exprt(neg1, neg_sum)); + + axioms.push_back(no_overflow); + + return sum; +} + +/*******************************************************************\ + Function: string_constraint_generatort::fresh_string Inputs: a type for string @@ -144,59 +181,125 @@ string_exprt string_constraint_generatort::fresh_string( /*******************************************************************\ -Function: string_constraint_generatort::add_axioms_for_string_expr +Function: string_constraint_generatort::get_string_expr + + Inputs: an expression of refined string type + + Outputs: a string expression + + Purpose: casts an expression to a string expression, or fetches the + actual string_exprt in the case of a symbol. + +\*******************************************************************/ + +string_exprt string_constraint_generatort::get_string_expr(const exprt &expr) +{ + assert(refined_string_typet::is_refined_string_type(expr.type())); + + if(expr.id()==ID_symbol) + { + return find_or_add_string_of_symbol( + to_symbol_expr(expr), + to_refined_string_type(expr.type())); + } + else + { + return to_string_expr(expr); + } +} + +/*******************************************************************\ + +Function: string_constraint_generatort::convert_java_string_to_string_exprt + + Inputs: a java string + + Outputs: a string expression + + Purpose: create a new string_exprt as a conversion of a java string + +\*******************************************************************/ + +string_exprt string_constraint_generatort::convert_java_string_to_string_exprt( + const exprt &jls) +{ + assert(get_mode()==ID_java); + assert(jls.id()==ID_struct); + + exprt length(to_struct_expr(jls).op1()); + // TODO: Add assertion on the type. + // assert(length.type()==refined_string_typet::index_type()); + exprt java_content(to_struct_expr(jls).op2()); + if(java_content.id()==ID_address_of) + { + java_content=to_address_of_expr(java_content).object(); + } + else + { + java_content=dereference_exprt(java_content, java_content.type()); + } + + refined_string_typet type(java_int_type(), java_char_type()); + + return string_exprt(length, java_content, type); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_refined_string - Inputs: an expression of type string + Inputs: an expression of refined string type Outputs: a string expression that is linked to the argument through axioms that are added to the list - Purpose: obtain a refined string expression corresponding to string - variable of string function call + Purpose: obtain a refined string expression corresponding to a expression + of type string \*******************************************************************/ -string_exprt string_constraint_generatort::add_axioms_for_string_expr( - const exprt &unrefined_string) + +string_exprt string_constraint_generatort::add_axioms_for_refined_string( + const exprt &string) { - string_exprt s; + assert(refined_string_typet::is_refined_string_type(string.type())); + refined_string_typet type=to_refined_string_type(string.type()); + + // Function applications should have been removed before + assert(string.id()!=ID_function_application); - if(unrefined_string.id()==ID_function_application) + if(string.id()==ID_symbol) { - exprt res=add_axioms_for_function_application( - to_function_application_expr(unrefined_string)); - s=to_string_expr(res); + const symbol_exprt &sym=to_symbol_expr(string); + string_exprt s=find_or_add_string_of_symbol(sym, type); + axioms.push_back( + s.axiom_for_is_longer_than(from_integer(0, s.length().type()))); + return s; } - else if(unrefined_string.id()==ID_symbol) - s=find_or_add_string_of_symbol(to_symbol_expr(unrefined_string)); - else if(unrefined_string.id()==ID_address_of) + else if(string.id()==ID_nondet_symbol) { - assert(unrefined_string.op0().id()==ID_symbol); - s=find_or_add_string_of_symbol(to_symbol_expr(unrefined_string.op0())); + string_exprt s=fresh_string(type); + axioms.push_back( + s.axiom_for_is_longer_than(from_integer(0, s.length().type()))); + return s; } - else if(unrefined_string.id()==ID_if) - s=add_axioms_for_if(to_if_expr(unrefined_string)); - else if(unrefined_string.id()==ID_nondet_symbol || - unrefined_string.id()==ID_struct) + else if(string.id()==ID_if) { - // TODO: for now we ignore non deterministic symbols and struct + return add_axioms_for_if(to_if_expr(string)); } - else if(unrefined_string.id()==ID_typecast) + else if(string.id()==ID_struct) { - exprt arg=to_typecast_expr(unrefined_string).op(); - exprt res=add_axioms_for_string_expr(arg); - s=to_string_expr(res); + const string_exprt &s=to_string_expr(string); + axioms.push_back( + s.axiom_for_is_longer_than(from_integer(0, s.length().type()))); + return s; } else { - throw "add_axioms_for_string_expr:\n"+unrefined_string.pretty()+ + throw "add_axioms_for_refined_string:\n"+string.pretty()+ "\nwhich is not a function application, "+ - "a symbol or an if expression"; + "a symbol, a struct or an if expression"; } - - axioms.push_back( - s.axiom_for_is_longer_than(from_integer(0, s.length().type()))); - return s; } /*******************************************************************\ @@ -215,11 +318,11 @@ string_exprt string_constraint_generatort::add_axioms_for_if( const if_exprt &expr) { assert( - refined_string_typet::is_unrefined_string_type(expr.true_case().type())); - string_exprt t=add_axioms_for_string_expr(expr.true_case()); + refined_string_typet::is_refined_string_type(expr.true_case().type())); + string_exprt t=get_string_expr(expr.true_case()); assert( - refined_string_typet::is_unrefined_string_type(expr.false_case().type())); - string_exprt f=add_axioms_for_string_expr(expr.false_case()); + refined_string_typet::is_refined_string_type(expr.false_case().type())); + string_exprt f=get_string_expr(expr.false_case()); const refined_string_typet &ref_type=to_refined_string_type(t.type()); const typet &index_type=ref_type.get_index_type(); string_exprt res=fresh_string(ref_type); @@ -246,7 +349,7 @@ Function: string_constraint_generatort::find_or_add_string_of_symbol Outputs: a string expression - Purpose: if a symbol represent a string is present in the symbol_to_string + Purpose: if a symbol representing a string is present in the symbol_to_string table, returns the corresponding string, if the symbol is not yet present, creates a new string with the correct type depending on whether the mode is java or c, adds it to the table and returns it. @@ -254,12 +357,11 @@ Function: string_constraint_generatort::find_or_add_string_of_symbol \*******************************************************************/ string_exprt string_constraint_generatort::find_or_add_string_of_symbol( - const symbol_exprt &sym) + const symbol_exprt &sym, const refined_string_typet &ref_type) { irep_idt id=sym.get_identifier(); - const refined_string_typet &ref_type=to_refined_string_type(sym.type()); string_exprt str=fresh_string(ref_type); - auto entry=symbol_to_string.insert(std::make_pair(id, str)); + auto entry=unresolved_symbols.insert(std::make_pair(id, str)); return entry.first->second; } @@ -286,125 +388,180 @@ exprt string_constraint_generatort::add_axioms_for_function_application( const irep_idt &id=is_ssa_expr(name)?to_ssa_expr(name).get_object_name(): to_symbol_expr(name).get_identifier(); + std::string str_id(id.c_str()); + + size_t pos=str_id.find("func_length"); + if(pos!=std::string::npos) + { + function_application_exprt new_expr(expr); + // TODO: This part needs some improvement. + // Stripping the symbol name is not a very robust process. + new_expr.function() = symbol_exprt(str_id.substr(0, pos+4)); + assert(get_mode()==ID_java); + new_expr.type() = refined_string_typet(java_int_type(), java_char_type()); + + auto res_it=function_application_cache.insert(std::make_pair(new_expr, + nil_exprt())); + if(res_it.second) + { + string_exprt res=to_string_expr( + add_axioms_for_function_application(new_expr)); + res_it.first->second=res; + return res.length(); + } + else + return to_string_expr(res_it.first->second).length(); + } + + pos = str_id.find("func_data"); + if(pos!=std::string::npos) + { + function_application_exprt new_expr(expr); + new_expr.function() = symbol_exprt(str_id.substr(0, pos+4)); + new_expr.type() = refined_string_typet(java_int_type(), java_char_type()); + + auto res_it=function_application_cache.insert(std::make_pair(new_expr, + nil_exprt())); + if(res_it.second) + { + string_exprt res=to_string_expr( + add_axioms_for_function_application(new_expr)); + res_it.first->second=res; + return res.content(); + } + else + return to_string_expr(res_it.first->second).content(); + } + // TODO: improve efficiency of this test by either ordering test by frequency // or using a map + auto res_it=function_application_cache.find(expr); + if(res_it!=function_application_cache.end() && res_it->second!=nil_exprt()) + return res_it->second; + + exprt res; + if(id==ID_cprover_char_literal_func) - return add_axioms_for_char_literal(expr); + res=add_axioms_for_char_literal(expr); else if(id==ID_cprover_string_length_func) - return add_axioms_for_length(expr); + res=add_axioms_for_length(expr); else if(id==ID_cprover_string_equal_func) - return add_axioms_for_equals(expr); + res=add_axioms_for_equals(expr); else if(id==ID_cprover_string_equals_ignore_case_func) - return add_axioms_for_equals_ignore_case(expr); + res=add_axioms_for_equals_ignore_case(expr); else if(id==ID_cprover_string_is_empty_func) - return add_axioms_for_is_empty(expr); + res=add_axioms_for_is_empty(expr); else if(id==ID_cprover_string_char_at_func) - return add_axioms_for_char_at(expr); + res=add_axioms_for_char_at(expr); else if(id==ID_cprover_string_is_prefix_func) - return add_axioms_for_is_prefix(expr); + res=add_axioms_for_is_prefix(expr); else if(id==ID_cprover_string_is_suffix_func) - return add_axioms_for_is_suffix(expr); + res=add_axioms_for_is_suffix(expr); else if(id==ID_cprover_string_startswith_func) - return add_axioms_for_is_prefix(expr, true); + res=add_axioms_for_is_prefix(expr, true); else if(id==ID_cprover_string_endswith_func) - return add_axioms_for_is_suffix(expr, true); + res=add_axioms_for_is_suffix(expr, true); else if(id==ID_cprover_string_contains_func) - return add_axioms_for_contains(expr); + res=add_axioms_for_contains(expr); else if(id==ID_cprover_string_hash_code_func) - return add_axioms_for_hash_code(expr); + res=add_axioms_for_hash_code(expr); else if(id==ID_cprover_string_index_of_func) - return add_axioms_for_index_of(expr); + res=add_axioms_for_index_of(expr); else if(id==ID_cprover_string_last_index_of_func) - return add_axioms_for_last_index_of(expr); + res=add_axioms_for_last_index_of(expr); else if(id==ID_cprover_string_parse_int_func) - return add_axioms_for_parse_int(expr); + res=add_axioms_for_parse_int(expr); else if(id==ID_cprover_string_to_char_array_func) - return add_axioms_for_to_char_array(expr); + res=add_axioms_for_to_char_array(expr); else if(id==ID_cprover_string_code_point_at_func) - return add_axioms_for_code_point_at(expr); + res=add_axioms_for_code_point_at(expr); else if(id==ID_cprover_string_code_point_before_func) - return add_axioms_for_code_point_before(expr); + res=add_axioms_for_code_point_before(expr); else if(id==ID_cprover_string_code_point_count_func) - return add_axioms_for_code_point_count(expr); + res=add_axioms_for_code_point_count(expr); else if(id==ID_cprover_string_offset_by_code_point_func) - return add_axioms_for_offset_by_code_point(expr); + res=add_axioms_for_offset_by_code_point(expr); else if(id==ID_cprover_string_compare_to_func) - return add_axioms_for_compare_to(expr); + res=add_axioms_for_compare_to(expr); else if(id==ID_cprover_string_literal_func) - return add_axioms_from_literal(expr); + res=add_axioms_from_literal(expr); else if(id==ID_cprover_string_concat_func) - return add_axioms_for_concat(expr); + res=add_axioms_for_concat(expr); else if(id==ID_cprover_string_concat_int_func) - return add_axioms_for_concat_int(expr); + res=add_axioms_for_concat_int(expr); else if(id==ID_cprover_string_concat_long_func) - return add_axioms_for_concat_long(expr); + res=add_axioms_for_concat_long(expr); else if(id==ID_cprover_string_concat_bool_func) - return add_axioms_for_concat_bool(expr); + res=add_axioms_for_concat_bool(expr); else if(id==ID_cprover_string_concat_char_func) - return add_axioms_for_concat_char(expr); + res=add_axioms_for_concat_char(expr); else if(id==ID_cprover_string_concat_double_func) - return add_axioms_for_concat_double(expr); + res=add_axioms_for_concat_double(expr); else if(id==ID_cprover_string_concat_float_func) - return add_axioms_for_concat_float(expr); + res=add_axioms_for_concat_float(expr); else if(id==ID_cprover_string_concat_code_point_func) - return add_axioms_for_concat_code_point(expr); + res=add_axioms_for_concat_code_point(expr); else if(id==ID_cprover_string_insert_func) - return add_axioms_for_insert(expr); + res=add_axioms_for_insert(expr); else if(id==ID_cprover_string_insert_int_func) - return add_axioms_for_insert_int(expr); + res=add_axioms_for_insert_int(expr); else if(id==ID_cprover_string_insert_long_func) - return add_axioms_for_insert_long(expr); + res=add_axioms_for_insert_long(expr); else if(id==ID_cprover_string_insert_bool_func) - return add_axioms_for_insert_bool(expr); + res=add_axioms_for_insert_bool(expr); else if(id==ID_cprover_string_insert_char_func) - return add_axioms_for_insert_char(expr); + res=add_axioms_for_insert_char(expr); else if(id==ID_cprover_string_insert_double_func) - return add_axioms_for_insert_double(expr); + res=add_axioms_for_insert_double(expr); else if(id==ID_cprover_string_insert_float_func) - return add_axioms_for_insert_float(expr); + res=add_axioms_for_insert_float(expr); +#if 0 else if(id==ID_cprover_string_insert_char_array_func) - return add_axioms_for_insert_char_array(expr); + res=add_axioms_for_insert_char_array(expr); +#endif else if(id==ID_cprover_string_substring_func) - return add_axioms_for_substring(expr); + res=add_axioms_for_substring(expr); else if(id==ID_cprover_string_trim_func) - return add_axioms_for_trim(expr); + res=add_axioms_for_trim(expr); else if(id==ID_cprover_string_to_lower_case_func) - return add_axioms_for_to_lower_case(expr); + res=add_axioms_for_to_lower_case(expr); else if(id==ID_cprover_string_to_upper_case_func) - return add_axioms_for_to_upper_case(expr); + res=add_axioms_for_to_upper_case(expr); else if(id==ID_cprover_string_char_set_func) - return add_axioms_for_char_set(expr); + res=add_axioms_for_char_set(expr); else if(id==ID_cprover_string_value_of_func) - return add_axioms_for_value_of(expr); + res=add_axioms_for_value_of(expr); else if(id==ID_cprover_string_empty_string_func) - return add_axioms_for_empty_string(expr); + res=add_axioms_for_empty_string(expr); else if(id==ID_cprover_string_copy_func) - return add_axioms_for_copy(expr); + res=add_axioms_for_copy(expr); else if(id==ID_cprover_string_of_int_func) - return add_axioms_from_int(expr); + res=add_axioms_from_int(expr); else if(id==ID_cprover_string_of_int_hex_func) - return add_axioms_from_int_hex(expr); + res=add_axioms_from_int_hex(expr); else if(id==ID_cprover_string_of_float_func) - return add_axioms_from_float(expr); + res=add_axioms_from_float(expr); else if(id==ID_cprover_string_of_double_func) - return add_axioms_from_double(expr); + res=add_axioms_from_double(expr); else if(id==ID_cprover_string_of_long_func) - return add_axioms_from_long(expr); + res=add_axioms_from_long(expr); else if(id==ID_cprover_string_of_bool_func) - return add_axioms_from_bool(expr); + res=add_axioms_from_bool(expr); else if(id==ID_cprover_string_of_char_func) - return add_axioms_from_char(expr); - else if(id==ID_cprover_string_of_char_array_func) - return add_axioms_from_char_array(expr); + res=add_axioms_from_char(expr); else if(id==ID_cprover_string_set_length_func) - return add_axioms_for_set_length(expr); + res=add_axioms_for_set_length(expr); else if(id==ID_cprover_string_delete_func) - return add_axioms_for_delete(expr); + res=add_axioms_for_delete(expr); else if(id==ID_cprover_string_delete_char_at_func) - return add_axioms_for_delete_char_at(expr); + res=add_axioms_for_delete_char_at(expr); else if(id==ID_cprover_string_replace_func) - return add_axioms_for_replace(expr); + res=add_axioms_for_replace(expr); + else if(id==ID_cprover_string_intern_func) + res=add_axioms_for_intern(expr); + else if(id==ID_cprover_string_array_of_char_pointer_func) + res=add_axioms_for_char_pointer(expr); else { std::string msg( @@ -412,13 +569,16 @@ exprt string_constraint_generatort::add_axioms_for_function_application( msg+=id2string(id); throw msg; } + function_application_cache[expr]=res; + return res; } /*******************************************************************\ Function: string_constraint_generatort::add_axioms_for_copy - Inputs: function application with one argument, which is a string + Inputs: function application with one argument, which is a string, + or three arguments: string, integer offset and count Outputs: a new string expression @@ -430,20 +590,20 @@ Function: string_constraint_generatort::add_axioms_for_copy string_exprt string_constraint_generatort::add_axioms_for_copy( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 1)[0]); - const refined_string_typet &ref_type=to_refined_string_type(s1.type()); - string_exprt res=fresh_string(ref_type); - - // We add axioms: - // a1 : |res|=|s1| - // a2 : forall i<|s1|. s1[i]=res[i] - - axioms.push_back(res.axiom_for_has_same_length_as(s1)); - - symbol_exprt idx=fresh_univ_index("QA_index_copy", ref_type.get_index_type()); - string_constraintt a2(idx, s1.length(), equal_exprt(s1[idx], res[idx])); - axioms.push_back(a2); - return res; + const auto &args=f.arguments(); + if(args.size()==1) + { + string_exprt s1=get_string_expr(args[0]); + return s1; + } + else + { + assert(args.size()==3); + string_exprt s1=get_string_expr(args[0]); + exprt offset=args[1]; + exprt count=args[2]; + return add_axioms_for_substring(s1, offset, plus_exprt(offset, count)); + } } /*******************************************************************\ @@ -473,6 +633,28 @@ string_exprt string_constraint_generatort::add_axioms_for_java_char_array( /*******************************************************************\ +Function: string_constraint_generatort::add_axioms_for_char_pointer + + Inputs: an expression of type char + + Outputs: an array expression + + Purpose: for an expression of the form `array[0]` returns `array` + +\*******************************************************************/ + +exprt string_constraint_generatort::add_axioms_for_char_pointer( + const function_application_exprt &fun) +{ + exprt char_pointer=args(fun, 1)[0]; + if(char_pointer.id()==ID_index) + return char_pointer.op0(); + // TODO: we do not know what to do in the other cases + assert(false); +} + +/*******************************************************************\ + Function: string_constraint_generatort::add_axioms_for_length Inputs: function application with one string argument @@ -486,7 +668,7 @@ Function: string_constraint_generatort::add_axioms_for_length exprt string_constraint_generatort::add_axioms_for_length( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt str=get_string_expr(args(f, 1)[0]); return str.length(); } @@ -511,6 +693,7 @@ string_exprt string_constraint_generatort::add_axioms_from_char_array( const exprt &offset, const exprt &count) { + assert(false); // deprecated, we should use add_axioms_for_substring instead const typet &char_type=to_array_type(data.type()).subtype(); const typet &index_type=length.type(); refined_string_typet ref_type(index_type, char_type); @@ -523,7 +706,7 @@ string_exprt string_constraint_generatort::add_axioms_from_char_array( symbol_exprt qvar=fresh_univ_index("QA_string_of_char_array", index_type); exprt char_in_tab=data; assert(char_in_tab.id()==ID_index); - char_in_tab.op1()=plus_exprt(qvar, offset); + char_in_tab.op1()=plus_exprt_with_overflow_check(qvar, offset); string_constraintt a1(qvar, count, equal_exprt(str[qvar], char_in_tab)); axioms.push_back(a1); @@ -549,6 +732,7 @@ Function: string_constraint_generatort::add_axioms_from_char_array string_exprt string_constraint_generatort::add_axioms_from_char_array( const function_application_exprt &f) { + assert(false); // deprecated, we should use add_axioms_for_substring instead exprt offset; exprt count; if(f.arguments().size()==4) @@ -638,7 +822,7 @@ Function: string_constraint_generatort::add_axioms_for_char_at exprt string_constraint_generatort::add_axioms_for_char_at( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt str=get_string_expr(args(f, 2)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); symbol_exprt char_sym=fresh_symbol("char", ref_type.get_char_type()); axioms.push_back(equal_exprt(char_sym, str[args(f, 2)[1]])); @@ -660,26 +844,6 @@ Function: string_constraint_generatort::add_axioms_for_to_char_array exprt string_constraint_generatort::add_axioms_for_to_char_array( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt str=get_string_expr(args(f, 1)[0]); return str.content(); } - -/*******************************************************************\ - -Function: string_constraint_generatort::set_string_symbol_equal_to_expr - - Inputs: a symbol and a string - - Purpose: add a correspondence to make sure the symbol points to the - same string as the second argument - -\*******************************************************************/ - -void string_constraint_generatort::set_string_symbol_equal_to_expr( - const symbol_exprt &sym, const exprt &str) -{ - if(str.id()==ID_symbol) - assign_to_symbol(sym, find_or_add_string_of_symbol(to_symbol_expr(str))); - else - assign_to_symbol(sym, add_axioms_for_string_expr(str)); -} diff --git a/src/solvers/refinement/string_constraint_generator_testing.cpp b/src/solvers/refinement/string_constraint_generator_testing.cpp index 8b1c7bca9d4..2a08b021070 100644 --- a/src/solvers/refinement/string_constraint_generator_testing.cpp +++ b/src/solvers/refinement/string_constraint_generator_testing.cpp @@ -31,15 +31,15 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( // We add axioms: // a1 : isprefix => |str| >= |prefix|+offset - // a2 : forall 0<=qvar - // s0[witness+offset]=s2[witness] - // a3 : !isprefix => |str| < |prefix|+offset - // || (|str| >= |prefix|+offset &&0<=witness<|prefix| - // &&str[witness+ofsset]!=prefix[witness]) + // a2 : forall 0<=qvar<|prefix|. isprefix => s0[witness+offset]=s2[witness] + // a3 : !isprefix => + // |str|<|prefix|+offset || + // (0<=witness<|prefix| && str[witness+offset]!=prefix[witness]) implies_exprt a1( isprefix, - str.axiom_for_is_longer_than(plus_exprt(prefix.length(), offset))); + str.axiom_for_is_longer_than(plus_exprt_with_overflow_check( + prefix.length(), offset))); axioms.push_back(a1); symbol_exprt qvar=fresh_univ_index("QA_isprefix", index_type); @@ -47,7 +47,8 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( qvar, prefix.length(), isprefix, - equal_exprt(str[plus_exprt(qvar, offset)], prefix[qvar])); + equal_exprt(str[plus_exprt_with_overflow_check(qvar, offset)], + prefix[qvar])); axioms.push_back(a2); symbol_exprt witness=fresh_exist_index("witness_not_isprefix", index_type); @@ -55,14 +56,13 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( axiom_for_is_positive_index(witness), and_exprt( prefix.axiom_for_is_strictly_longer_than(witness), - notequal_exprt(str[plus_exprt(witness, offset)], prefix[witness]))); + notequal_exprt(str[plus_exprt_with_overflow_check(witness, offset)], + prefix[witness]))); or_exprt s0_notpref_s1( not_exprt( - str.axiom_for_is_longer_than(plus_exprt(prefix.length(), offset))), - and_exprt( - witness_diff, str.axiom_for_is_longer_than( - plus_exprt(prefix.length(), offset)))); + plus_exprt_with_overflow_check(prefix.length(), offset))), + witness_diff); implies_exprt a3(not_exprt(isprefix), s0_notpref_s1); axioms.push_back(a3); @@ -88,8 +88,8 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( { const function_application_exprt::argumentst &args=f.arguments(); assert(f.type()==bool_typet() || f.type().id()==ID_c_bool); - string_exprt s0=add_axioms_for_string_expr(args[swap_arguments?1:0]); - string_exprt s1=add_axioms_for_string_expr(args[swap_arguments?0:1]); + string_exprt s0=get_string_expr(args[swap_arguments?1:0]); + string_exprt s1=get_string_expr(args[swap_arguments?0:1]); exprt offset; if(args.size()==2) offset=from_integer(0, s0.length().type()); @@ -121,7 +121,7 @@ exprt string_constraint_generatort::add_axioms_for_is_empty( // a2 : s0 => is_empty symbol_exprt is_empty=fresh_boolean("is_empty"); - string_exprt s0=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt s0=get_string_expr(args(f, 1)[0]); axioms.push_back(implies_exprt(is_empty, s0.axiom_for_has_length(0))); axioms.push_back(implies_exprt(s0.axiom_for_has_length(0), is_empty)); return typecast_exprt(is_empty, f.type()); @@ -150,8 +150,8 @@ exprt string_constraint_generatort::add_axioms_for_is_suffix( symbol_exprt issuffix=fresh_boolean("issuffix"); typecast_exprt tc_issuffix(issuffix, f.type()); - string_exprt s0=add_axioms_for_string_expr(args[swap_arguments?1:0]); - string_exprt s1=add_axioms_for_string_expr(args[swap_arguments?0:1]); + string_exprt s0=get_string_expr(args[swap_arguments?1:0]); + string_exprt s1=get_string_expr(args[swap_arguments?0:1]); const typet &index_type=s0.length().type(); // We add axioms: @@ -159,7 +159,7 @@ exprt string_constraint_generatort::add_axioms_for_is_suffix( // a2 : forall witness s1[witness]=s0[witness + s0.length-s1.length] // a3 : !issuffix => - // s1.length > s0.length + // (s1.length > s0.length && witness=-1) // || (s1.length > witness>=0 // &&s1[witness]!=s0[witness + s0.length-s1.length] @@ -177,7 +177,8 @@ exprt string_constraint_generatort::add_axioms_for_is_suffix( exprt shifted=plus_exprt( witness, minus_exprt(s1.length(), s0.length())); or_exprt constr3( - s0.axiom_for_is_strictly_longer_than(s1), + and_exprt(s0.axiom_for_is_strictly_longer_than(s1), + equal_exprt(witness, from_integer(-1, index_type))), and_exprt( notequal_exprt(s0[witness], s1[shifted]), and_exprt( @@ -207,9 +208,10 @@ exprt string_constraint_generatort::add_axioms_for_contains( assert(f.type()==bool_typet() || f.type().id()==ID_c_bool); symbol_exprt contains=fresh_boolean("contains"); typecast_exprt tc_contains(contains, f.type()); - string_exprt s0=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[1]); - const typet &index_type=s0.type(); + string_exprt s0=get_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[1]); + const refined_string_typet ref_type=to_refined_string_type(s0.type()); + const typet &index_type=ref_type.get_index_type(); // We add axioms: // a1 : contains => s0.length >= s1.length diff --git a/src/solvers/refinement/string_constraint_generator_transformation.cpp b/src/solvers/refinement/string_constraint_generator_transformation.cpp index 83735e0d5b5..f27fe5d4317 100644 --- a/src/solvers/refinement/string_constraint_generator_transformation.cpp +++ b/src/solvers/refinement/string_constraint_generator_transformation.cpp @@ -29,7 +29,7 @@ Function: string_constraint_generatort::add_axioms_for_set_length string_exprt string_constraint_generatort::add_axioms_for_set_length( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[0]); exprt k=args(f, 2)[1]; const refined_string_typet &ref_type=to_refined_string_type(s1.type()); string_exprt res=fresh_string(ref_type); @@ -76,7 +76,7 @@ string_exprt string_constraint_generatort::add_axioms_for_substring( { const function_application_exprt::argumentst &args=f.arguments(); assert(args.size()>=2); - string_exprt str=add_axioms_for_string_expr(args[0]); + string_exprt str=get_string_expr(args[0]); exprt i(args[1]); exprt j; if(args.size()==3) @@ -110,7 +110,6 @@ string_exprt string_constraint_generatort::add_axioms_for_substring( { const refined_string_typet &ref_type=to_refined_string_type(str.type()); const typet &index_type=ref_type.get_index_type(); - symbol_exprt idx=fresh_exist_index("index_substring", index_type); assert(start.type()==index_type); assert(end.type()==index_type); string_exprt res=fresh_string(ref_type); @@ -133,8 +132,11 @@ string_exprt string_constraint_generatort::add_axioms_for_substring( // Warning: check what to do if the string is not long enough axioms.push_back(str.axiom_for_is_longer_than(end)); - string_constraintt a4( - idx, res.length(), equal_exprt(res[idx], str[plus_exprt(start, idx)])); + symbol_exprt idx=fresh_univ_index("QA_index_substring", index_type); + string_constraintt a4(idx, + res.length(), + equal_exprt(res[idx], + str[plus_exprt_with_overflow_check(start, idx)])); axioms.push_back(a4); return res; } @@ -154,7 +156,7 @@ Function: string_constraint_generatort::add_axioms_for_trim string_exprt string_constraint_generatort::add_axioms_for_trim( const function_application_exprt &expr) { - string_exprt str=add_axioms_for_string_expr(args(expr, 1)[0]); + string_exprt str=get_string_expr(args(expr, 1)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); const typet &index_type=ref_type.get_index_type(); string_exprt res=fresh_string(ref_type); @@ -173,7 +175,8 @@ string_exprt string_constraint_generatort::add_axioms_for_trim( // a8 : forall n<|s1|, s[idx+n]=s1[n] // a9 : (s[m]>' ' &&s[m+|s1|-1]>' ') || m=|s| - exprt a1=str.axiom_for_is_longer_than(plus_exprt(idx, res.length())); + exprt a1=str.axiom_for_is_longer_than( + plus_exprt_with_overflow_check(idx, res.length())); axioms.push_back(a1); binary_relation_exprt a2(idx, ID_ge, from_integer(0, index_type)); @@ -195,7 +198,8 @@ string_exprt string_constraint_generatort::add_axioms_for_trim( axioms.push_back(a6); symbol_exprt n2=fresh_univ_index("QA_index_trim2", index_type); - minus_exprt bound(str.length(), plus_exprt(idx, res.length())); + minus_exprt bound(str.length(), plus_exprt_with_overflow_check(idx, + res.length())); binary_relation_exprt eqn2( str[plus_exprt(idx, plus_exprt(res.length(), n2))], ID_le, @@ -210,7 +214,8 @@ string_exprt string_constraint_generatort::add_axioms_for_trim( axioms.push_back(a8); minus_exprt index_before( - plus_exprt(idx, res.length()), from_integer(1, index_type)); + plus_exprt_with_overflow_check(idx, res.length()), + from_integer(1, index_type)); binary_relation_exprt no_space_before(str[index_before], ID_gt, space_char); or_exprt a9( equal_exprt(idx, str.length()), @@ -236,7 +241,7 @@ Function: string_constraint_generatort::add_axioms_for_to_lower_case string_exprt string_constraint_generatort::add_axioms_for_to_lower_case( const function_application_exprt &expr) { - string_exprt str=add_axioms_for_string_expr(args(expr, 1)[0]); + string_exprt str=get_string_expr(args(expr, 1)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); const typet &char_type=ref_type.get_char_type(); const typet &index_type=ref_type.get_index_type(); @@ -289,7 +294,7 @@ Function: string_constraint_generatort::add_axioms_for_to_upper_case string_exprt string_constraint_generatort::add_axioms_for_to_upper_case( const function_application_exprt &expr) { - string_exprt str=add_axioms_for_string_expr(args(expr, 1)[0]); + string_exprt str=get_string_expr(args(expr, 1)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); const typet &char_type=ref_type.get_char_type(); const typet &index_type=ref_type.get_index_type(); @@ -345,7 +350,7 @@ Function: string_constraint_generatort::add_axioms_for_char_set string_exprt string_constraint_generatort::add_axioms_for_char_set( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt str=get_string_expr(args(f, 3)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); string_exprt res=fresh_string(ref_type); with_exprt sarrnew(str.content(), args(f, 3)[1], args(f, 3)[2]); @@ -378,7 +383,7 @@ Function: string_constraint_generatort::add_axioms_for_replace string_exprt string_constraint_generatort::add_axioms_for_replace( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt str=get_string_expr(args(f, 3)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); const exprt &old_char=args(f, 3)[1]; const exprt &new_char=args(f, 3)[2]; @@ -421,10 +426,12 @@ Function: string_constraint_generatort::add_axioms_for_delete_char_at string_exprt string_constraint_generatort::add_axioms_for_delete_char_at( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt str=get_string_expr(args(f, 2)[0]); exprt index_one=from_integer(1, str.length().type()); return add_axioms_for_delete( - str, args(f, 2)[1], plus_exprt(args(f, 2)[1], index_one)); + str, + args(f, 2)[1], + plus_exprt_with_overflow_check(args(f, 2)[1], index_one)); } /*******************************************************************\ @@ -468,6 +475,6 @@ Function: string_constraint_generatort::add_axioms_for_delete string_exprt string_constraint_generatort::add_axioms_for_delete( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt str=get_string_expr(args(f, 3)[0]); return add_axioms_for_delete(str, args(f, 3)[1], args(f, 3)[2]); } diff --git a/src/solvers/refinement/string_constraint_generator_valueof.cpp b/src/solvers/refinement/string_constraint_generator_valueof.cpp index d1030e5e085..10fe490259a 100644 --- a/src/solvers/refinement/string_constraint_generator_valueof.cpp +++ b/src/solvers/refinement/string_constraint_generator_valueof.cpp @@ -63,7 +63,8 @@ Function: string_constraint_generatort::add_axioms_from_float string_exprt string_constraint_generatort::add_axioms_from_float( const function_application_exprt &f) { - return add_axioms_from_float(args(f, 1)[0], false); + const refined_string_typet &ref_type=to_refined_string_type(f.type()); + return add_axioms_from_float(args(f, 1)[0], ref_type, false); } /*******************************************************************\ @@ -81,7 +82,8 @@ Function: string_constraint_generatort::add_axioms_from_double string_exprt string_constraint_generatort::add_axioms_from_double( const function_application_exprt &f) { - return add_axioms_from_float(args(f, 1)[0], true); + const refined_string_typet &ref_type=to_refined_string_type(f.type()); + return add_axioms_from_float(args(f, 1)[0], ref_type, true); } /*******************************************************************\ @@ -99,13 +101,12 @@ Function: string_constraint_generatort::add_axioms_from_float \*******************************************************************/ string_exprt string_constraint_generatort::add_axioms_from_float( - const exprt &f, bool double_precision) + const exprt &f, const refined_string_typet &ref_type, bool double_precision) { - const refined_string_typet &ref_type=to_refined_string_type(f.type()); + string_exprt res=fresh_string(ref_type); const typet &index_type=ref_type.get_index_type(); const typet &char_type=ref_type.get_char_type(); - string_exprt res=fresh_string(ref_type); - const exprt &index24=from_integer(24, ref_type.get_index_type()); + const exprt &index24=from_integer(24, index_type); axioms.push_back(res.axiom_for_is_shorter_than(index24)); string_exprt magnitude=fresh_string(ref_type); @@ -323,13 +324,22 @@ string_exprt string_constraint_generatort::add_axioms_from_int( axioms.push_back(a1); exprt chr=res[0]; - exprt starts_with_minus=equal_exprt(chr, minus_char); - exprt starts_with_digit=and_exprt( + equal_exprt starts_with_minus(chr, minus_char); + and_exprt starts_with_digit( binary_relation_exprt(chr, ID_ge, zero_char), binary_relation_exprt(chr, ID_le, nine_char)); or_exprt a2(starts_with_digit, starts_with_minus); axioms.push_back(a2); + // These are constraints to detect number that requiere the maximum number + // of digits + exprt smallest_with_max_digits= + from_integer(smallest_by_digit(max_size-1), type); + binary_relation_exprt big_negative( + i, ID_le, unary_minus_exprt(smallest_with_max_digits)); + binary_relation_exprt big_positive(i, ID_ge, smallest_with_max_digits); + or_exprt requieres_max_digits(big_negative, big_positive); + for(size_t size=1; size<=max_size; size++) { // For each possible size, we add axioms: @@ -387,13 +397,18 @@ string_exprt string_constraint_generatort::add_axioms_from_int( axioms.push_back(a6); } - // we have to be careful when exceeding the maximal size of integers + // when the size is close to the maximum, either the number is very big + // or it is negative + if(size==max_size-1) + { + implies_exprt a7(premise, or_exprt(requieres_max_digits, + starts_with_minus)); + axioms.push_back(a7); + } + // when we reach the maximal size the number is very big in the negative if(size==max_size) { - exprt smallest_with_10_digits=from_integer( - smallest_by_digit(max_size), type); - binary_relation_exprt big(i, ID_ge, smallest_with_10_digits); - implies_exprt a7(premise, big); + implies_exprt a7(premise, and_exprt(starts_with_minus, big_negative)); axioms.push_back(a7); } } @@ -597,6 +612,60 @@ string_exprt string_constraint_generatort::add_axioms_for_value_of( /*******************************************************************\ +Function: string_constraint_generatort::add_axioms_for_correct_number_format + + Inputs: function application with one string expression + + Outputs: an boolean expression + + Purpose: add axioms making the return value true if the given string is + a correct number + +\*******************************************************************/ + +exprt string_constraint_generatort::add_axioms_for_correct_number_format( + const string_exprt &str, std::size_t max_size) +{ + symbol_exprt correct=fresh_boolean("correct_number_format"); + const refined_string_typet &ref_type=to_refined_string_type(str.type()); + const typet &char_type=ref_type.get_char_type(); + const typet &index_type=ref_type.get_index_type(); + exprt zero_char=constant_char('0', char_type); + exprt nine_char=constant_char('9', char_type); + exprt minus_char=constant_char('-', char_type); + exprt plus_char=constant_char('+', char_type); + + exprt chr=str[0]; + equal_exprt starts_with_minus(chr, minus_char); + equal_exprt starts_with_plus(chr, plus_char); + and_exprt starts_with_digit( + binary_relation_exprt(chr, ID_ge, zero_char), + binary_relation_exprt(chr, ID_le, nine_char)); + + or_exprt correct_first( + or_exprt(starts_with_minus, starts_with_plus), starts_with_digit); + exprt has_first=str.axiom_for_is_longer_than(from_integer(1, index_type)); + implies_exprt a1(correct, and_exprt(has_first, correct_first)); + axioms.push_back(a1); + + exprt not_too_long=str.axiom_for_is_shorter_than(max_size); + axioms.push_back(not_too_long); + + symbol_exprt qvar=fresh_univ_index("number_format", index_type); + + and_exprt is_digit( + binary_relation_exprt(str[qvar], ID_ge, zero_char), + binary_relation_exprt(str[qvar], ID_le, nine_char)); + + string_constraintt a2( + qvar, from_integer(1, index_type), str.length(), correct, is_digit); + + axioms.push_back(a2); + return correct; +} + +/*******************************************************************\ + Function: string_constraint_generatort::add_axioms_for_parse_int Inputs: function application with one string expression @@ -610,7 +679,7 @@ Function: string_constraint_generatort::add_axioms_for_parse_int exprt string_constraint_generatort::add_axioms_for_parse_int( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt str=get_string_expr(args(f, 1)[0]); const typet &type=f.type(); symbol_exprt i=fresh_symbol("parsed_int", type); const refined_string_typet &ref_type=to_refined_string_type(str.type()); @@ -626,6 +695,10 @@ exprt string_constraint_generatort::add_axioms_for_parse_int( exprt starts_with_plus=equal_exprt(chr, plus_char); exprt starts_with_digit=binary_relation_exprt(chr, ID_ge, zero_char); + // TODO: we should throw an exception when this does not hold: + exprt correct=add_axioms_for_correct_number_format(str); + axioms.push_back(correct); + for(unsigned size=1; size<=10; size++) { exprt sum=from_integer(0, type); diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index 16b89bc43c8..1391ffd2315 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -11,17 +11,38 @@ Author: Alberto Griggio, alberto.griggio@gmail.com \*******************************************************************/ #include +#include #include #include #include +#include +#include #include #include #include +#include + +/*******************************************************************\ + +Constructor: string_refinementt + + Inputs: a namespace, a decision procedure, a bound on the number + of refinements and a boolean flag `concretize_result` + + Purpose: refinement_bound is a bound on the number of refinement allowed. + if `concretize_result` is set to true, at the end of the decision + procedure, the solver try to find a concrete value for each + character + +\*******************************************************************/ string_refinementt::string_refinementt( - const namespacet &_ns, propt &_prop, unsigned refinement_bound): + const namespacet &_ns, + propt &_prop, + unsigned refinement_bound): supert(_ns, _prop), use_counter_example(false), + do_concretizing(false), initial_loop_bound(refinement_bound) { } @@ -52,15 +73,27 @@ Function: string_refinementt::display_index_set void string_refinementt::display_index_set() { + std::size_t count=0; + std::size_t count_current=0; for(const auto &i : index_set) { const exprt &s=i.first; - debug() << "IS(" << from_expr(s) << ")=={"; + debug() << "IS(" << from_expr(s) << ")=={" << eom; for(auto j : i.second) - debug() << from_expr(j) << "; "; + { + if(current_index_set[i.first].find(j)!=current_index_set[i.first].end()) + { + count_current++; + debug() << "**"; + } + debug() << " " << from_expr(j) << ";" << eom; + count++; + } debug() << "}" << eom; } + debug() << count << " elements in index set (" << count_current + << " newly added)" << eom; } /*******************************************************************\ @@ -100,152 +133,320 @@ void string_refinementt::add_instantiations() /*******************************************************************\ -Function: string_refinementt::convert_rest - - Inputs: an expression +Function: string_refinementt::add_symbol_to_symbol_map() - Outputs: a literal + Inputs: a symbol and the expression to map it to - Purpose: if the expression is a function application, we convert it - using our own convert_function_application method + Purpose: keeps a map of symbols to expressions, such as none of the + mapped values exist as a key \*******************************************************************/ -literalt string_refinementt::convert_rest(const exprt &expr) +void string_refinementt::add_symbol_to_symbol_map +(const exprt &lhs, const exprt &rhs) { - if(expr.id()==ID_function_application) + assert(lhs.id()==ID_symbol); + + // We insert the mapped value of the rhs, if it exists. + auto it=symbol_resolve.find(rhs); + const exprt &new_rhs=it!=symbol_resolve.end()?it->second:rhs; + + symbol_resolve[lhs]=new_rhs; + reverse_symbol_resolve[new_rhs].push_back(lhs); + + std::list symbols_to_update_with_new_rhs(reverse_symbol_resolve[rhs]); + for(exprt item : symbols_to_update_with_new_rhs) { - // can occur in __CPROVER_assume - bvt bv=convert_function_application(to_function_application_expr(expr)); - assert(bv.size()==1); - return bv[0]; + symbol_resolve[item]=new_rhs; + reverse_symbol_resolve[new_rhs].push_back(item); } - else +} + +/*******************************************************************\ + +Function: string_refinementt::set_char_array_equality() + + Inputs: the rhs and lhs of an equality over character arrays + + Purpose: add axioms if the rhs is a character array + +\*******************************************************************/ + +void string_refinementt::set_char_array_equality( + const exprt &lhs, const exprt &rhs) +{ + assert(lhs.id()==ID_symbol); + + if(rhs.id()==ID_array && rhs.type().id()==ID_array) { - return supert::convert_rest(expr); + const typet &index_type=to_array_type(rhs.type()).size().type(); + for(size_t i=0, ilim=rhs.operands().size(); i!=ilim; ++i) + { + // Introduce axioms to map symbolic rhs to its char array. + index_exprt arraycell(rhs, from_integer(i, index_type)); + equal_exprt arrayeq(arraycell, rhs.operands()[i]); + add_lemma(arrayeq, false); +#if 0 + generator.axioms.push_back(arrayeq); +#endif + } } + // At least for Java (as it is currently pre-processed), we need not consider + // other cases, because all character arrays find themselves on the rhs of an + // equality. Note that this might not be the case for other languages. } /*******************************************************************\ -Function: string_refinementt::convert_symbol +Function: string_refinementt::substitute_function_applications() - Inputs: an expression + Inputs: an expression containing function applications - Outputs: a bitvector + Outputs: an epression containing no function application - Purpose: if the expression as string type, look up for the string in the - list of string symbols that we maintain, and convert it; - otherwise use the method of the parent class + Purpose: remove functions applications and create the necessary + axioms \*******************************************************************/ -bvt string_refinementt::convert_symbol(const exprt &expr) +exprt string_refinementt::substitute_function_applications(exprt expr) { - const typet &type=expr.type(); - const irep_idt &identifier=expr.get(ID_identifier); - assert(!identifier.empty()); + for(size_t i=0; iMAX_CONCRETE_STRING_SIZE? + MAX_CONCRETE_STRING_SIZE:concretize_limit; + exprt content_expr=str.content(); + replace_expr(current_model, content_expr); + for(size_t i=0; i &pair : non_string_axioms) + { + replace_expr(symbol_resolve, pair.first); + debug() << "super::set_to " << from_expr(pair.first) << eom; + supert::set_to(pair.first, pair.second); + } + + for(exprt &axiom : generator.axioms) + { + replace_expr(symbol_resolve, axiom); if(axiom.id()==ID_string_constraint) { string_constraintt c=to_string_constraint(axiom); @@ -282,6 +494,7 @@ decision_proceduret::resultt string_refinementt::dec_solve() { add_lemma(axiom); } + } initial_index_set(universal_axioms); update_index_set(cur); @@ -302,6 +515,7 @@ decision_proceduret::resultt string_refinementt::dec_solve() else { debug() << "check_SAT: the model is correct" << eom; + concretize_lengths(); return D_SATISFIABLE; } @@ -318,7 +532,13 @@ decision_proceduret::resultt string_refinementt::dec_solve() if(current_index_set.empty()) { debug() << "current index set is empty" << eom; - return D_SATISFIABLE; + if(do_concretizing) + { + concretize_results(); + do_concretizing=false; + } + else + return D_SATISFIABLE; } display_index_set(); @@ -367,131 +587,337 @@ bvt string_refinementt::convert_bool_bv(const exprt &boole, const exprt &orig) Function: string_refinementt::add_lemma - Inputs: a lemma + Inputs: a lemma and Boolean value stating whether the lemma should + be added to the index set. Purpose: add the given lemma to the solver \*******************************************************************/ -void string_refinementt::add_lemma(const exprt &lemma, bool add_to_index_set) +void string_refinementt::add_lemma( + const exprt &lemma, bool _simplify, bool add_to_index_set) { if(!seen_instances.insert(lemma).second) return; - if(lemma.is_true()) + if(add_to_index_set) + cur.push_back(lemma); + + exprt simple_lemma=lemma; + if(_simplify) + simplify(simple_lemma, ns); + + if(simple_lemma.is_true()) { +#if 0 debug() << "string_refinementt::add_lemma : tautology" << eom; +#endif return; } - debug() << "adding lemma " << from_expr(lemma) << eom; + debug() << "adding lemma " << from_expr(simple_lemma) << eom; - prop.l_set_to_true(convert(lemma)); - if(add_to_index_set) - cur.push_back(lemma); + prop.l_set_to_true(convert(simple_lemma)); } /*******************************************************************\ -Function: string_refinementt::string_of_array +Function: string_refinementt::get_array - Inputs: a constant array expression and a integer expression + Inputs: an expression representing an array and an expression + representing an integer - Outputs: a string + Outputs: an array expression or an array_of_exprt - Purpose: convert the content of a string to a more readable representation. - This should only be used for debbuging. + Purpose: get a model of an array and put it in a certain form. + If the size cannot be obtained or if it is too big, return an + empty array. \*******************************************************************/ -std::string string_refinementt::string_of_array( - const exprt &arr, const exprt &size) const +exprt string_refinementt::get_array(const exprt &arr, const exprt &size) const { - if(size.id()!=ID_constant) - return "string of unknown size"; + exprt arr_val=get_array(arr); + exprt size_val=supert::get(size); + size_val=simplify_expr(size_val, ns); + typet char_type=arr.type().subtype(); + typet index_type=size.type(); + array_typet empty_ret_type(char_type, from_integer(0, index_type)); + array_of_exprt empty_ret(from_integer(0, char_type), empty_ret_type); + + if(size_val.id()!=ID_constant) + { +#if 0 + debug() << "(sr::get_array) string of unknown size: " + << from_expr(size_val) << eom; +#endif + return empty_ret; + } + unsigned n; - if(to_unsigned_integer(to_constant_expr(size), n)) - n=0; + if(to_unsigned_integer(to_constant_expr(size_val), n)) + { +#if 0 + debug() << "(sr::get_array) size is not valid" << eom; +#endif + return empty_ret; + } + + array_typet ret_type(char_type, from_integer(n, index_type)); + array_exprt ret(ret_type); if(n>MAX_CONCRETE_STRING_SIZE) - return "very long string"; + { +#if 0 + debug() << "(sr::get_array) long string (size=" << n << ")" << eom; +#endif + return empty_ret; + } + if(n==0) - return "\"\""; + { +#if 0 + debug() << "(sr::get_array) empty string" << eom; +#endif + return empty_ret; + } - std::ostringstream buf; - buf << "\""; - exprt val=get(arr); + std::vector concrete_array(n); - if(val.id()=="array-list") + if(arr_val.id()=="array-list") { - for(size_t i=0; i(c); + exprt value=arr_val.operands()[i*2+1]; + to_unsigned_integer(to_constant_expr(value), concrete_array[idx]); } } } } + else if(arr_val.id()==ID_array) + { + for(size_t i=0; i=32) + result << (unsigned char) c; + else + { + result << "\\u" << std::hex << std::setw(4) << std::setfill('0') + << (unsigned int) c; + } + } + + return result.str(); +} + +/*******************************************************************\ - for(size_t i=0; i &m, bool negated) const + std::map &m, const typet &type, bool negated) const { exprt sum=nil_exprt(); mp_integer constants=0; typet index_type; if(m.empty()) - return nil_exprt(); + return from_integer(0, type); else index_type=m.begin()->first.type(); @@ -723,12 +1122,20 @@ exprt string_refinementt::sum_over_map( default: if(second>1) { - for(int i=0; isecond; i--) + if(sum.is_nil()) + sum=unary_minus_exprt(t); + else + sum=minus_exprt(sum, t); + for(int i=-1; i>second; i--) sum=minus_exprt(sum, t); } } @@ -755,7 +1162,7 @@ Function: string_refinementt::simplify_sum exprt string_refinementt::simplify_sum(const exprt &f) const { std::map map=map_representation_of_sum(f); - return sum_over_map(map); + return sum_over_map(map, f.type()); } /*******************************************************************\ @@ -800,7 +1207,7 @@ exprt string_refinementt::compute_inverse_function( } elems.erase(it); - return sum_over_map(elems, neg); + return sum_over_map(elems, f.type(), neg); } @@ -830,7 +1237,7 @@ Function: find_qvar Outputs: a Boolean - Purpose: looks for the symbol and return true if it is found + Purpose: look for the symbol and return true if it is found \*******************************************************************/ @@ -885,6 +1292,26 @@ Function: string_refinementt::initial_index_set and the upper bound minus one \*******************************************************************/ +void string_refinementt::add_to_index_set(const exprt &s, exprt i) +{ + simplify(i, ns); + if(i.id()==ID_constant) + { + mp_integer mpi; + to_integer(i, mpi); + if(mpi<0) + { + debug() << "add_to_index_set : ignoring negative number " << mpi << eom; + return; + } + } + if(index_set[s].insert(i).second) + { + debug() << "adding to index set of " << from_expr(s) + << ": " << from_expr(i) << eom; + current_index_set[s].insert(i); + } +} void string_refinementt::initial_index_set(const string_constraintt &axiom) { @@ -906,8 +1333,7 @@ void string_refinementt::initial_index_set(const string_constraintt &axiom) // if cur is of the form s[i] and no quantified variable appears in i if(!has_quant_var) { - current_index_set[s].insert(i); - index_set[s].insert(i); + add_to_index_set(s, i); } else { @@ -917,8 +1343,7 @@ void string_refinementt::initial_index_set(const string_constraintt &axiom) axiom.upper_bound(), from_integer(1, axiom.upper_bound().type())); replace_expr(qvar, kminus1, e); - current_index_set[s].insert(e); - index_set[s].insert(e); + add_to_index_set(s, e); } } else @@ -952,12 +1377,7 @@ void string_refinementt::update_index_set(const exprt &formula) const exprt &i=cur.op1(); assert(s.type().id()==ID_array); exprt simplified=simplify_sum(i); - if(index_set[s].insert(simplified).second) - { - debug() << "adding to index set of " << from_expr(s) - << ": " << from_expr(simplified) << eom; - current_index_set[s].insert(simplified); - } + add_to_index_set(s, simplified); } else { @@ -1014,19 +1434,18 @@ exprt find_index(const exprt &expr, const exprt &str) catch (exprt i) { return i; } } - /*******************************************************************\ Function: string_refinementt::instantiate - Inputs: an universaly quantified formula `axiom`, an array of char + Inputs: a universally quantified formula `axiom`, an array of char variable `str`, and an index expression `val`. - Outputs: substitute `qvar` the universaly quantified variable of `axiom`, by + Outputs: substitute `qvar` the universally quantified variable of `axiom`, by an index `val`, in `axiom`, so that the index used for `str` equals `val`. For instance, if `axiom` corresponds to - $\forall q. s[q+x]='a' && t[q]='b'$, `instantiate(axom,s,v)` would return - an expression for $s[v]='a' && t[v-x]='b'$. + $\forall q. s[q+x]='a' && t[q]='b'$, `instantiate(axom,s,v)` + would return an expression for $s[v]='a' && t[v-x]='b'$. \*******************************************************************/ @@ -1051,6 +1470,17 @@ exprt string_refinementt::instantiate( return implies_exprt(bounds, instance); } +/*******************************************************************\ + +Function: string_refinementt::instantiate_not_contains + + Inputs: a quantified formula representing `not_contains`, and a + list to which to add the created lemmas to + + Purpose: instantiate a quantified formula representing `not_contains` + by substituting the quantifiers and generating axioms + +\*******************************************************************/ void string_refinementt::instantiate_not_contains( const string_not_contains_constraintt &axiom, std::list &new_lemmas) @@ -1102,3 +1532,76 @@ void string_refinementt::instantiate_not_contains( new_lemmas.push_back(witness_bounds); } } + +/*******************************************************************\ + +Function: string_refinementt::substitute_array_lists() + + Inputs: an expression containing array-list expressions + + Outputs: an epression containing no array-list + + Purpose: replace array-lists by 'with' expressions + +\*******************************************************************/ + +exprt string_refinementt::substitute_array_lists(exprt expr) const +{ + for(size_t i=0; i=2); + typet &char_type=expr.operands()[1].type(); + array_typet arr_type(char_type, infinity_exprt(char_type)); + array_of_exprt new_arr(from_integer(0, char_type), + arr_type); + + with_exprt ret_expr(new_arr, + expr.operands()[0], + expr.operands()[1]); + + for(size_t i=2; isecond); + } + + ecopy=supert::get(ecopy); + + return substitute_array_lists(ecopy); +} diff --git a/src/solvers/refinement/string_refinement.h b/src/solvers/refinement/string_refinement.h index be0cd332bc3..0a62a6abe7d 100644 --- a/src/solvers/refinement/string_refinement.h +++ b/src/solvers/refinement/string_refinement.h @@ -14,6 +14,7 @@ Author: Alberto Griggio, alberto.griggio@gmail.com #define CPROVER_SOLVERS_REFINEMENT_STRING_REFINEMENT_H #include +#include #include #include @@ -27,30 +28,32 @@ Author: Alberto Griggio, alberto.griggio@gmail.com class string_refinementt: public bv_refinementt { public: - // refinement_bound is a bound on the number of refinement allowed string_refinementt( - const namespacet &_ns, propt &_prop, unsigned refinement_bound); + const namespacet &_ns, + propt &_prop, + unsigned refinement_bound); void set_mode(); // Should we use counter examples at each iteration? bool use_counter_example; - virtual std::string decision_procedure_text() const + bool do_concretizing; + + virtual std::string decision_procedure_text() const override { return "string refinement loop with "+prop.solver_text(); } static exprt is_positive(const exprt &x); + exprt get(const exprt &expr) const override; + protected: typedef std::set expr_sett; + typedef std::list exprt_listt; - virtual bvt convert_symbol(const exprt &expr); - virtual bvt convert_function_application( - const function_application_exprt &expr); - - decision_proceduret::resultt dec_solve(); + decision_proceduret::resultt dec_solve() override; bvt convert_bool_bv(const exprt &boole, const exprt &orig); @@ -76,23 +79,42 @@ class string_refinementt: public bv_refinementt // Warning: this is indexed by array_expressions and not string expressions std::map current_index_set; std::map index_set; + replace_mapt symbol_resolve; + std::map reverse_symbol_resolve; + std::list> non_string_axioms; - void display_index_set(); + // Valuation in the current model of the symbols that have been created + // by the solver + replace_mapt current_model; - void add_lemma(const exprt &lemma, bool add_to_index_set=true); + void add_equivalence(const irep_idt & lhs, const exprt & rhs); - bool boolbv_set_equality_to_true(const equal_exprt &expr); + void display_index_set(); - literalt convert_rest(const exprt &expr); + void add_lemma(const exprt &lemma, + bool simplify=true, + bool add_to_index_set=true); - void add_instantiations(); + exprt substitute_function_applications(exprt expr); + typet substitute_java_string_types(typet type); + exprt substitute_java_strings(exprt expr); + void add_symbol_to_symbol_map(const exprt &lhs, const exprt &rhs); + bool is_char_array(const typet &type) const; + bool add_axioms_for_string_assigns(const exprt &lhs, const exprt &rhs); + void set_to(const exprt &expr, bool value) override; + void add_instantiations(); + void add_negation_of_constraint_to_solver( + const string_constraintt &axiom, supert &solver); + void fill_model(); bool check_axioms(); + void set_char_array_equality(const exprt &lhs, const exprt &rhs); void update_index_set(const exprt &formula); void update_index_set(const std::vector &cur); void initial_index_set(const string_constraintt &axiom); void initial_index_set(const std::vector &string_axioms); + void add_to_index_set(const exprt &s, exprt i); exprt instantiate( const string_constraintt &axiom, const exprt &str, const exprt &val); @@ -101,17 +123,26 @@ class string_refinementt: public bv_refinementt const string_not_contains_constraintt &axiom, std::list &new_lemmas); + exprt substitute_array_lists(exprt) const; + exprt compute_inverse_function( const exprt &qvar, const exprt &val, const exprt &f); std::map map_representation_of_sum(const exprt &f) const; - exprt sum_over_map(std::map &m, bool negated=false) const; + exprt sum_over_map( + std::map &m, const typet &type, bool negated=false) const; exprt simplify_sum(const exprt &f) const; - exprt get_array(const exprt &arr, const exprt &size); + void concretize_results(); + void concretize_lengths(); + // Length of char arrays found during concretization + std::map found_length; + + exprt get_array(const exprt &arr, const exprt &size) const; + exprt get_array(const exprt &arr) const; - std::string string_of_array(const exprt &arr, const exprt &size) const; + std::string string_of_array(const array_exprt &arr); }; #endif diff --git a/src/util/Makefile b/src/util/Makefile index fefabaed3af..e9dd739db8e 100644 --- a/src/util/Makefile +++ b/src/util/Makefile @@ -23,7 +23,7 @@ SRC = arith_tools.cpp base_type.cpp cmdline.cpp config.cpp symbol_table.cpp \ memory_info.cpp pipe_stream.cpp irep_hash.cpp endianness_map.cpp \ ssa_expr.cpp json_irep.cpp json_expr.cpp \ fresh_symbol.cpp \ - string_utils.cpp + format_number_range.cpp string_utils.cpp nondet_ifthenelse.cpp INCLUDES= -I .. diff --git a/src/util/format_number_range.cpp b/src/util/format_number_range.cpp new file mode 100644 index 00000000000..cca3bbf154c --- /dev/null +++ b/src/util/format_number_range.cpp @@ -0,0 +1,122 @@ +/*******************************************************************\ + +Module: Format vector of numbers into a compressed range + +Author: Daniel Kroening, kroening@kroening.com + +\*******************************************************************/ + +#include +#include +#include + +#include "format_number_range.h" + +/*******************************************************************\ + +Function: format_number_range::operator() + + Inputs: vector of numbers + + Outputs: string of compressed number range representation + + Purpose: create shorter representation for output + +\*******************************************************************/ + +std::string format_number_ranget::operator()(std::vector &numbers) +{ + std::string number_range; + std::sort(numbers.begin(), numbers.end()); + unsigned end_number=numbers.back(); + if(numbers.front()==end_number) + number_range=std::to_string(end_number); // only single number + else + { + bool next=true; + bool first=true; + bool range=false; + unsigned start_number=numbers.front(); + unsigned last_number=start_number; + + for(const auto &number : numbers) + { + if(next) + { + next=false; + start_number=number; + last_number=number; + } + // advance one forward + else + { + if(number==last_number+1 && !(number==end_number)) + { + last_number++; + if(last_number>start_number+1) + range=true; + } + // end this block range + else + { + if(first) + first=false; + else + number_range+=","; + if(last_number>start_number) + { + if(range) + { + if(number==end_number && number==last_number+1) + number_range+= + std::to_string(start_number)+"-"+std::to_string(end_number); + else if(number==end_number) + number_range+= + std::to_string(start_number)+"-"+std::to_string(last_number)+ + ","+std::to_string(end_number); + else + number_range+= + std::to_string(start_number)+"-"+std::to_string(last_number); + } + else + { + if(number!=end_number) + number_range+= + std::to_string(start_number)+","+std::to_string(last_number); + else + { + if(start_number+1==last_number && last_number+1==number) + number_range+= + std::to_string(start_number)+"-"+std::to_string(end_number); + else + number_range+= + std::to_string(start_number)+ + ","+std::to_string(last_number)+ + ","+std::to_string(end_number); + } + } + start_number=number; + last_number=number; + range=false; + continue; + } + else + { + if(number!=end_number) + number_range+=std::to_string(start_number); + else + number_range+=std::to_string(start_number)+","+ + std::to_string(end_number); + start_number=number; + last_number=number; + range=false; + continue; // already set next start number + } + next=true; + } + } + } + } + assert(!number_range.empty()); + return number_range; +} diff --git a/src/util/format_number_range.h b/src/util/format_number_range.h new file mode 100644 index 00000000000..7ff016e9c4d --- /dev/null +++ b/src/util/format_number_range.h @@ -0,0 +1,21 @@ +/*******************************************************************\ + +Module: Format vector of numbers into a compressed range + +Author: Daniel Kroening, kroening@kroening.com + +\*******************************************************************/ + +#ifndef CPROVER_UTIL_FORMAT_NUMBER_RANGE_H +#define CPROVER_UTIL_FORMAT_NUMBER_RANGE_H + +#include +#include + +class format_number_ranget +{ +public: + std::string operator()(std::vector &); +}; + +#endif // CPROVER_UTIL_FORMAT_NUMBER_RANGE_H diff --git a/src/util/irep_ids.txt b/src/util/irep_ids.txt index 066a5f831cc..1958522eaee 100644 --- a/src/util/irep_ids.txt +++ b/src/util/irep_ids.txt @@ -737,12 +737,14 @@ bswap java_bytecode_index java_instanceof java_super_method_call +skip_initialize java_enum_static_unwind push_catch string_constraint string_not_contains_constraint cprover_char_literal_func cprover_string_literal_func +cprover_string_array_of_char_pointer_func cprover_string_char_at_func cprover_string_char_set_func cprover_string_code_point_at_func @@ -802,3 +804,4 @@ cprover_string_to_lower_case_func cprover_string_to_upper_case_func cprover_string_trim_func cprover_string_value_of_func +basic_block_covered_lines \ No newline at end of file diff --git a/src/util/language.cpp b/src/util/language.cpp index 4b4a1ba4ae4..ddb41979040 100644 --- a/src/util/language.cpp +++ b/src/util/language.cpp @@ -8,6 +8,11 @@ Author: Daniel Kroening, kroening@kroening.com #include "language.h" #include "expr.h" +#include +#include +#include +#include +#include /*******************************************************************\ @@ -125,3 +130,209 @@ bool languaget::type_to_name( name=type.pretty(); return false; } + +/*******************************************************************\ + +Function: languaget::set_should_generate_opaque_method_stubs + + Inputs: + should_generate_stubs - Should stub generation be enabled + + Outputs: + + Purpose: Turn on or off stub generation. + +\*******************************************************************/ +void languaget::set_should_generate_opaque_method_stubs( + bool should_generate_stubs) +{ + generate_opaque_stubs=should_generate_stubs; +} + +/*******************************************************************\ + +Function: languaget::generate_opaque_method_stubs + + Inputs: + symbol_table - the symbol table for the program + + Outputs: + + Purpose: When there are opaque methods (e.g. ones where we don't + have a body), we create a stub function in the goto + program and mark it as opaque so the interpreter fills in + appropriate values for it. This will only happen if + generate_opaque_stubs is enabled. + +\*******************************************************************/ + +void languaget::generate_opaque_method_stubs(symbol_tablet &symbol_table) +{ + if(generate_opaque_stubs) + { + system_symbols=system_library_symbolst(); + + for(auto &symbol_entry : symbol_table.symbols) + { + if(is_symbol_opaque_function(symbol_entry.second)) + { + symbolt &symbol=symbol_entry.second; + + generate_opaque_parameter_symbols(symbol, symbol_table); + + irep_idt return_symbol_id=generate_opaque_stub_body( + symbol, + symbol_table); + + if(return_symbol_id!=ID_nil) + { + symbol.type.set("opaque_method_capture_symbol", return_symbol_id); + } + } + } + } +} + +/*******************************************************************\ + +Function: languaget::generate_opaque_stub_body + + Inputs: + symbol - the function symbol which is opaque + symbol_table - the symbol table + + Outputs: The identifier of the return variable. ID_nil if the function + doesn't return anything. + + Purpose: To generate the stub function for the opaque function in + question. The identifier is used in the flag to the interpreter + that the function is opaque. This function should be implemented + in the languages. + +\*******************************************************************/ + +irep_idt languaget::generate_opaque_stub_body( + symbolt &symbol, + symbol_tablet &symbol_table) +{ + return ID_nil; +} + +/*******************************************************************\ + +Function: languaget::build_stub_parameter_symbol + + Inputs: + function_symbol - the symbol of an opaque function + parameter_index - the index of the parameter within the + the parameter list + parameter_type - the type of the parameter + + Outputs: A named symbol to be added to the symbol table representing + one of the parameters in this opaque function. + + Purpose: To build the parameter symbol and choose its name. This should + be implemented in each language. + +\*******************************************************************/ + +parameter_symbolt languaget::build_stub_parameter_symbol( + const symbolt &function_symbol, + size_t parameter_index, + const code_typet::parametert ¶meter) +{ + error() << "language " << id() + << " doesn't implement build_stub_parameter_symbol. " + << "This means cannot use opaque functions." << eom; + + return parameter_symbolt(); +} + +/*******************************************************************\ + +Function: languaget::get_stub_return_symbol_name + + Inputs: + function_id - the function that has a return value + + Outputs: the identifier to use for the symbol that will store the + return value of this function. + + Purpose: To get the name of the symbol to be used for the return value + of the function. Generates a name like to_return_function_name + +\*******************************************************************/ + +irep_idt languaget::get_stub_return_symbol_name(const irep_idt &function_id) +{ + std::ostringstream return_symbol_name_builder; + return_symbol_name_builder << "to_return_" << function_id; + return return_symbol_name_builder.str(); +} + + +/*******************************************************************\ + +Function: languaget::is_symbol_opaque_function + + Inputs: + symbol - the symbol to be checked + + Outputs: True if the symbol is an opaque (e.g. non-bodied) function + + Purpose: To identify if a given symbol is an opaque function and + hence needs to be stubbed. We explicitly exclude CPROVER + functions, if they have no body it is because we haven't + generated it yet. + +\*******************************************************************/ + +bool languaget::is_symbol_opaque_function(const symbolt &symbol) +{ + std::set headers; + // Don't create stubs for symbols like: + // __CPROVER_blah (which aren't real external functions) + // and strstr (which we will model for ourselves later) + bool is_internal=system_symbols.is_symbol_internal_symbol(symbol, headers); + + return !symbol.is_type && + symbol.value.id()==ID_nil && + symbol.type.id()==ID_code && + !is_internal; +} + +/*******************************************************************\ + +Function: languaget::generate_opaque_parameter_symbols + + Inputs: + function_symbol - the symbol of an opaque function + symbol_table - the symbol table to add the new parameter + symbols into + + Outputs: + + Purpose: To create stub parameter symbols for each parameter the + function has and assign their IDs into the parameters + identifier. + +\*******************************************************************/ + +void languaget::generate_opaque_parameter_symbols( + symbolt &function_symbol, + symbol_tablet &symbol_table) +{ + code_typet &function_type = to_code_type(function_symbol.type); + code_typet::parameterst ¶meters=function_type.parameters(); + for(std::size_t i=0; i #include #include +#include +#include +#include #include "message.h" @@ -110,9 +113,34 @@ class languaget:public messaget virtual languaget *new_language()=0; + void set_should_generate_opaque_method_stubs(bool should_generate_stubs); + // constructor / destructor languaget() { } virtual ~languaget() { } + +protected: + void generate_opaque_method_stubs(symbol_tablet &symbol_table); + virtual irep_idt generate_opaque_stub_body( + symbolt &symbol, + symbol_tablet &symbol_table); + + virtual parameter_symbolt build_stub_parameter_symbol( + const symbolt &function_symbol, + size_t parameter_index, + const code_typet::parametert ¶meter); + + static irep_idt get_stub_return_symbol_name(const irep_idt &function_id); + + bool generate_opaque_stubs; + +private: + bool is_symbol_opaque_function(const symbolt &symbol); + void generate_opaque_parameter_symbols( + symbolt &function_symbol, + symbol_tablet &symbol_table); + + system_library_symbolst system_symbols; }; #endif // CPROVER_UTIL_LANGUAGE_H diff --git a/src/util/language_file.cpp b/src/util/language_file.cpp index 46f51564b54..b0fbb47c2f9 100644 --- a/src/util/language_file.cpp +++ b/src/util/language_file.cpp @@ -93,6 +93,28 @@ void language_filest::show_parse(std::ostream &out) /*******************************************************************\ +Function: language_filest::set_should_generate_opqaue_method_stubs + + Inputs: + should_generate_stubs - Should stub generation be enabled + + Outputs: + + Purpose: Turn on or off stub generation for all the languages + +\*******************************************************************/ + +void language_filest::set_should_generate_opaque_method_stubs( + bool stubs_enabled) +{ + for(file_mapt::value_type &language_file_entry : file_map) + { + languaget *language=language_file_entry.second.language; + language->set_should_generate_opaque_method_stubs(stubs_enabled); + } +} + +/*******************************************************************\ Function: language_filest::parse Inputs: diff --git a/src/util/language_file.h b/src/util/language_file.h index 4380a346353..f023047b32a 100644 --- a/src/util/language_file.h +++ b/src/util/language_file.h @@ -75,6 +75,8 @@ class language_filest:public messaget file_map.clear(); } + void set_should_generate_opaque_method_stubs(bool stubs_enabled); + bool parse(); void show_parse(std::ostream &out); diff --git a/src/util/mp_arith.cpp b/src/util/mp_arith.cpp index 338a6103e40..6a9082b6823 100644 --- a/src/util/mp_arith.cpp +++ b/src/util/mp_arith.cpp @@ -17,6 +17,10 @@ Author: Daniel Kroening, kroening@kroening.com #include "mp_arith.h" #include "arith_tools.h" + +typedef BigInt::ullong_t ullong_t; // NOLINT(readability/identifiers) +typedef BigInt::llong_t llong_t; // NOLINT(readability/identifiers) + /*******************************************************************\ Function: >> @@ -320,3 +324,277 @@ unsigned integer2unsigned(const mp_integer &n) assert(ull <= std::numeric_limits::max()); return (unsigned)ull; } + +/*******************************************************************\ + +Function: bitwise_or + + Inputs: + + Outputs: + + Purpose: bitwise or + bitwise operations only make sense on native objects, hence the + largest object size should be the largest available c++ integer + size (currently long long) + +\*******************************************************************/ + +mp_integer bitwise_or(const mp_integer &a, const mp_integer &b) +{ + ullong_t result=a.to_ulong()|b.to_ulong(); + return result; +} + +/*******************************************************************\ + +Function: bitwise_and + + Inputs: + + Outputs: + + Purpose: bitwise and + bitwise operations only make sense on native objects, hence the + largest object size should be the largest available c++ integer + size (currently long long) + +\*******************************************************************/ + +mp_integer bitwise_and(const mp_integer &a, const mp_integer &b) +{ + ullong_t result=a.to_ulong()&b.to_ulong(); + return result; +} + +/*******************************************************************\ + +Function: bitwise_xor + + Inputs: + + Outputs: + + Purpose: bitwise xor + bitwise operations only make sense on native objects, hence the + largest object size should be the largest available c++ integer + size (currently long long) + +\*******************************************************************/ + +mp_integer bitwise_xor(const mp_integer &a, const mp_integer &b) +{ + ullong_t result=a.to_ulong()^b.to_ulong(); + return result; +} + +/*******************************************************************\ + +Function: bitwise_neg + + Inputs: + + Outputs: + + Purpose: bitwise negation + bitwise operations only make sense on native objects, hence the + largest object size should be the largest available c++ integer + size (currently long long) + +\*******************************************************************/ + +mp_integer bitwise_neg(const mp_integer &a) +{ + ullong_t result=~a.to_ulong(); + return result; +} + +/*******************************************************************\ + +Function: arith_left_shift + + Inputs: + + Outputs: + + Purpose: arithmetic left shift + bitwise operations only make sense on native objects, hence the + largest object size should be the largest available c++ integer + size (currently long long) + +\*******************************************************************/ + +mp_integer arith_left_shift( + const mp_integer &a, + const mp_integer &b, + std::size_t true_size) +{ + ullong_t shift=b.to_ulong(); + if(shift>true_size && a!=mp_integer(0)) + throw "shift value out of range"; + + llong_t result=a.to_long()<true_size) + throw "shift value out of range"; + + llong_t sign=(1<<(true_size-1))&number; + llong_t pad=(sign==0) ? 0 : ~((1<<(true_size-shift))-1); + llong_t result=(number >> shift)|pad; + return result; +} + +/*******************************************************************\ + +Function: logic_left_shift + + Inputs: + + Outputs: + + Purpose: logic left shift + bitwise operations only make sense on native objects, hence the + largest object size should be the largest available c++ integer + size (currently long long) + +\*******************************************************************/ + +mp_integer logic_left_shift( + const mp_integer &a, + const mp_integer &b, + std::size_t true_size) +{ + ullong_t shift=b.to_ulong(); + if(shift>true_size && a!=mp_integer(0)) + throw "shift value out of range"; + llong_t result=a.to_long()<true_size) + throw "shift value out of range"; + + ullong_t result=((ullong_t)a.to_long()) >> shift; + return result; +} + +/*******************************************************************\ + +Function: rotate_right + + Inputs: + + Outputs: + + Purpose: rotates right (MSB=LSB) + bitwise operations only make sense on native objects, hence the + largest object size should be the largest available c++ integer + size (currently long long) + +\*******************************************************************/ + +mp_integer rotate_right( + const mp_integer &a, + const mp_integer &b, + std::size_t true_size) +{ + ullong_t number=a.to_ulong(); + ullong_t shift=b.to_ulong(); + if(shift>true_size) + throw "shift value out of range"; + + ullong_t revShift=true_size-shift; + ullong_t filter=1<<(true_size-1); + ullong_t result=(number >> shift)|((number<true_size) + throw "shift value out of range"; + + ullong_t revShift=true_size-shift; + ullong_t filter=1<<(true_size-1); + ullong_t result=((number<> revShift); + return result; +} diff --git a/src/util/mp_arith.h b/src/util/mp_arith.h index 030a292bb74..046b1bff788 100644 --- a/src/util/mp_arith.h +++ b/src/util/mp_arith.h @@ -20,6 +20,28 @@ typedef BigInt mp_integer; std::ostream &operator<<(std::ostream &, const mp_integer &); mp_integer operator>>(const mp_integer &, const mp_integer &); mp_integer operator<<(const mp_integer &, const mp_integer &); +mp_integer bitwise_or(const mp_integer &, const mp_integer &); +mp_integer bitwise_and(const mp_integer &, const mp_integer &); +mp_integer bitwise_xor(const mp_integer &, const mp_integer &); +mp_integer bitwise_neg(const mp_integer &); + +mp_integer arith_left_shift( + const mp_integer &, const mp_integer &, std::size_t true_size); + +mp_integer arith_right_shift( + const mp_integer &, const mp_integer &, std::size_t true_size); + +mp_integer logic_left_shift( + const mp_integer &, const mp_integer &, std::size_t true_size); + +mp_integer logic_right_shift( + const mp_integer &, const mp_integer &, std::size_t true_size); + +mp_integer rotate_right( + const mp_integer &, const mp_integer &, std::size_t true_size); + +mp_integer rotate_left( + const mp_integer &, const mp_integer &, std::size_t true_size); const std::string integer2string(const mp_integer &, unsigned base=10); const mp_integer string2integer(const std::string &, unsigned base=10); @@ -28,5 +50,6 @@ const mp_integer binary2integer(const std::string &, bool is_signed); mp_integer::ullong_t integer2ulong(const mp_integer &); std::size_t integer2size_t(const mp_integer &); unsigned integer2unsigned(const mp_integer &); +const mp_integer mp_zero=string2integer("0"); #endif // CPROVER_UTIL_MP_ARITH_H diff --git a/src/util/nondet_bool.h b/src/util/nondet_bool.h new file mode 100644 index 00000000000..6aee700b889 --- /dev/null +++ b/src/util/nondet_bool.h @@ -0,0 +1,34 @@ +/*******************************************************************\ + +Module: Nondeterministic boolean helper + +Author: Chris Smowton, chris@smowton.net + +\*******************************************************************/ + +#ifndef CPROVER_UTIL_NONDET_BOOL_H +#define CPROVER_UTIL_NONDET_BOOL_H + +#include +#include + +/*******************************************************************\ + +Function: get_nondet_bool + + Inputs: Desired type (C_bool or plain bool) + + Outputs: nondet expr of that type + + Purpose: + +\*******************************************************************/ + +inline exprt get_nondet_bool(const typet &type) +{ + // We force this to 0 and 1 and won't consider + // other values. + return typecast_exprt(side_effect_expr_nondett(bool_typet()), type); +} + +#endif // CPROVER_UTIL_NONDET_BOOL_H diff --git a/src/util/nondet_ifthenelse.cpp b/src/util/nondet_ifthenelse.cpp new file mode 100644 index 00000000000..ae2f9040977 --- /dev/null +++ b/src/util/nondet_ifthenelse.cpp @@ -0,0 +1,134 @@ +/*******************************************************************\ + +Module: Nondeterministic if-then-else + +Author: Chris Smowton, chris@smowton.net + +\*******************************************************************/ + +#include "nondet_ifthenelse.h" + +#include + +#include +#include +#include +#include +#include +#include + +// This will be unified with other similar fresh-symbol routines shortly +static symbolt &new_tmp_symbol( + symbol_tablet &symbol_table, + const std::string &prefix, + const irep_idt &mode) +{ + static size_t temporary_counter=0; + + auxiliary_symbolt new_symbol; + symbolt *symbol_ptr; + + do + { + new_symbol.name="tmp_object_factory$"+std::to_string(++temporary_counter); + new_symbol.base_name=new_symbol.name; + new_symbol.mode=mode; + } + while(symbol_table.move(new_symbol, symbol_ptr)); + + return *symbol_ptr; +} + +/*******************************************************************\ + +Function: nondet_ifthenelset::begin_if + + Inputs: + + Outputs: + + Purpose: Emits instructions and defines labels for the beginning of + a nondeterministic if-else diamond. Code is emitted to the + `result_code` member of this object's associated + `java_object_factoryt` instance `state`. + The caller should use the following pattern (where *this + is an instance of java_object_factoryt): + ``` + nondet_ifthenelset diamond(*this, "name"); + diamond.begin_if(); + result_code.copy_to_operands(Some if-branch code) + diamond.begin_else(); + result_code.copy_to_operands(Some else-branch code) + diamond.finish(); + ``` + +\*******************************************************************/ + +void nondet_ifthenelset::begin_if() +{ + static size_t nondet_ifthenelse_count=0; + + auto choice_sym= + new_tmp_symbol(symbol_table, choice_symname, fresh_symbol_mode); + choice_sym.type=c_bool_typet(1); + auto choice=choice_sym.symbol_expr(); + auto assign_choice= + code_assignt(choice, get_nondet_bool(choice_sym.type)); + assign_choice.add_source_location()=loc; + result_code.move_to_operands(assign_choice); + + std::ostringstream fresh_label_oss; + fresh_label_oss << choice_symname << "_else_" + << (++nondet_ifthenelse_count); + std::string fresh_label=fresh_label_oss.str(); + else_label=code_labelt(fresh_label, code_skipt()); + + std::ostringstream done_label_oss; + done_label_oss << choice_symname << "_done_" + << nondet_ifthenelse_count; + join_label=code_labelt(done_label_oss.str(), code_skipt()); + + code_ifthenelset test; + test.cond()=equal_exprt( + choice, + constant_exprt("0", choice_sym.type)); + test.then_case()=code_gotot(fresh_label); + result_code.move_to_operands(test); +} + +/*******************************************************************\ + +Function: nondet_ifthenelset::begin_else + + Inputs: + + Outputs: + + Purpose: Terminates the if-block and begins the else-block of a + nondet if-then-else diamond. See ::begin_if for detail. + +\*******************************************************************/ + +void nondet_ifthenelset::begin_else() +{ + result_code.copy_to_operands(code_gotot(join_label.get_label())); + result_code.copy_to_operands(else_label); +} + +/*******************************************************************\ + +Function: nondet_ifthenelset::finish + + Inputs: + + Outputs: + + Purpose: Concludes a nondet if-then-else diamond. + See ::begin_if for detail. + +\*******************************************************************/ + +void nondet_ifthenelset::finish() +{ + result_code.move_to_operands(join_label); +} diff --git a/src/util/nondet_ifthenelse.h b/src/util/nondet_ifthenelse.h new file mode 100644 index 00000000000..c6ee25afcab --- /dev/null +++ b/src/util/nondet_ifthenelse.h @@ -0,0 +1,44 @@ +/*******************************************************************\ + +Module: Nondeterministic if-then-else + +Author: Chris Smowton, chris@smowton.net + +\*******************************************************************/ + +#ifndef CPROVER_UTIL_NONDET_IFTHENELSE_H +#define CPROVER_UTIL_NONDET_IFTHENELSE_H + +#include + +class symbol_tablet; +class source_locationt; + +class nondet_ifthenelset +{ + code_blockt &result_code; + symbol_tablet &symbol_table; + const source_locationt &loc; + irep_idt fresh_symbol_mode; + code_labelt else_label; + code_labelt join_label; + const std::string choice_symname; + public: + nondet_ifthenelset( + code_blockt &_result_code, + symbol_tablet &_symbol_table, + const source_locationt &_loc, + const irep_idt &_fresh_symbol_mode, + const std::string &name) : + result_code(_result_code), + symbol_table(_symbol_table), + loc(_loc), + fresh_symbol_mode(_fresh_symbol_mode), + choice_symname(name) + {} + void begin_if(); + void begin_else(); + void finish(); +}; + +#endif // CPROVER_UTIL_NONDET_IFTHENELSE_H diff --git a/src/util/refined_string_type.cpp b/src/util/refined_string_type.cpp new file mode 100644 index 00000000000..0625149cf83 --- /dev/null +++ b/src/util/refined_string_type.cpp @@ -0,0 +1,51 @@ +/********************************************************************\ + +Module: Type for string expressions used by the string solver. + These string expressions contain a field `length`, of type + `index_type`, a field `content` of type `content_type`. + This module also defines functions to recognise the C and java + string types. + +Author: Romain Brenguier, romain.brenguier@diffblue.com + +\*******************************************************************/ + +#include + +#include "refined_string_type.h" + +/*******************************************************************\ + +Constructor: refined_string_typet::refined_string_typet + + Inputs: type of characters + +\*******************************************************************/ + +refined_string_typet::refined_string_typet( + const typet &index_type, const typet &char_type) +{ + infinity_exprt infinite_index(index_type); + array_typet char_array(char_type, infinite_index); + components().emplace_back("length", index_type); + components().emplace_back("content", char_array); + set_tag(CPROVER_PREFIX"refined_string_type"); +} + +/*******************************************************************\ + +Function: refined_string_typet::is_refined_string_type + + Inputs: a type + + Outputs: Boolean telling whether the input is a refined string type + +\*******************************************************************/ + +bool refined_string_typet::is_refined_string_type(const typet &type) +{ + return + type.id()==ID_struct && + to_struct_type(type).get_tag()==CPROVER_PREFIX"refined_string_type"; +} + diff --git a/src/solvers/refinement/refined_string_type.h b/src/util/refined_string_type.h similarity index 55% rename from src/solvers/refinement/refined_string_type.h rename to src/util/refined_string_type.h index 3ecc0ac3a9f..a0cb7500e7f 100644 --- a/src/solvers/refinement/refined_string_type.h +++ b/src/util/refined_string_type.h @@ -10,8 +10,8 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com \*******************************************************************/ -#ifndef CPROVER_SOLVERS_REFINEMENT_REFINED_STRING_TYPE_H -#define CPROVER_SOLVERS_REFINEMENT_REFINED_STRING_TYPE_H +#ifndef CPROVER_UTIL_REFINED_STRING_TYPE_H +#define CPROVER_UTIL_REFINED_STRING_TYPE_H #include #include @@ -33,41 +33,16 @@ class refined_string_typet: public struct_typet const typet &get_char_type() const { - assert(components().size()==2); - return components()[0].type(); + return get_content_type().subtype(); } const typet &get_index_type() const { - return get_content_type().size().type(); - } - - // For C the unrefined string type is __CPROVER_string, for java it is a - // pointer to a struct with tag java.lang.String - - static bool is_c_string_type(const typet &type); - - static bool is_java_string_pointer_type(const typet &type); - - static bool is_java_string_type(const typet &type); - - static bool is_java_string_builder_type(const typet &type); - - static bool is_java_char_sequence_type(const typet &type); - - static bool is_unrefined_string_type(const typet &type) - { - return ( - is_c_string_type(type) || - is_java_string_pointer_type(type) || - is_java_string_builder_type(type) || - is_java_char_sequence_type(type)); + assert(components().size()==2); + return components()[0].type(); } - static bool is_unrefined_string(const exprt &expr) - { - return (is_unrefined_string_type(expr.type())); - } + static bool is_refined_string_type(const typet &type); constant_exprt index_of_int(int i) const { @@ -75,9 +50,10 @@ class refined_string_typet: public struct_typet } }; -const refined_string_typet &to_refined_string_type(const typet &type) +extern inline const refined_string_typet &to_refined_string_type( + const typet &type) { - assert(type.id()==ID_struct); + assert(refined_string_typet::is_refined_string_type(type)); return static_cast(type); } diff --git a/src/util/source_location.h b/src/util/source_location.h index b24befcb8c4..9d905c775cb 100644 --- a/src/util/source_location.h +++ b/src/util/source_location.h @@ -73,6 +73,11 @@ class source_locationt:public irept return get(ID_java_bytecode_index); } + const irep_idt &get_basic_block_covered_lines() const + { + return get(ID_basic_block_covered_lines); + } + void set_file(const irep_idt &file) { set(ID_file, file); @@ -128,6 +133,11 @@ class source_locationt:public irept set(ID_java_bytecode_index, index); } + void set_basic_block_covered_lines(const irep_idt &covered_lines) + { + return set(ID_basic_block_covered_lines, covered_lines); + } + void set_hide() { set(ID_hide, true); diff --git a/src/util/ssa_expr.cpp b/src/util/ssa_expr.cpp index c75d7d2ed92..affd2907eb5 100644 --- a/src/util/ssa_expr.cpp +++ b/src/util/ssa_expr.cpp @@ -78,6 +78,19 @@ static void build_ssa_identifier_rec( assert(false); } +/* Used to determine whether or not an identifier can be built + * before trying and getting an exception */ +bool ssa_exprt::can_build_identifier(const exprt &expr) +{ + if(expr.id()==ID_symbol) + return true; + else if(expr.id()==ID_member || + expr.id()==ID_index) + return can_build_identifier(expr.op0()); + else + return false; +} + /*******************************************************************\ Function: ssa_exprt::build_identifier diff --git a/src/util/ssa_expr.h b/src/util/ssa_expr.h index 446e82169d6..e18862fa9f9 100644 --- a/src/util/ssa_expr.h +++ b/src/util/ssa_expr.h @@ -64,7 +64,7 @@ class ssa_exprt:public symbol_exprt const irep_idt get_l1_object_identifier() const { - #if 0 + #if 1 return get_l1_object().get_identifier(); #else // the above is the clean version, this is the fast one, using @@ -134,6 +134,10 @@ class ssa_exprt:public symbol_exprt const irep_idt &l0, const irep_idt &l1, const irep_idt &l2); + + /* Used to determine whether or not an identifier can be built + * before trying and getting an exception */ + static bool can_build_identifier(const exprt &src); }; /*! \brief Cast a generic exprt to an \ref ssa_exprt diff --git a/src/util/std_expr.h b/src/util/std_expr.h index a2f78ab25e8..e3921c38d98 100644 --- a/src/util/std_expr.h +++ b/src/util/std_expr.h @@ -2979,6 +2979,17 @@ class member_exprt:public exprt { return op0(); } + + // Retrieves the object(symbol) this member corresponds to + inline const symbol_exprt &symbol() const + { + const exprt &op=op0(); + if(op.id()==ID_member) + { + return static_cast(op).symbol(); + } + return to_symbol_expr(op); + } }; /*! \brief Cast a generic exprt to a \ref member_exprt @@ -3450,6 +3461,19 @@ class function_application_exprt:public exprt operands().resize(2); } + explicit function_application_exprt(const typet &_type): + exprt(ID_function_application, _type) + { + operands().resize(2); + } + + function_application_exprt( + const symbol_exprt &_function, const typet &_type): + function_application_exprt(_type) // NOLINT(runtime/explicit) + { + function()=_function; + } + exprt &function() { return op0();