diff --git a/doc/cprover-manual/goto-harness.md b/doc/cprover-manual/goto-harness.md new file mode 100644 index 00000000000..80d36c633fa --- /dev/null +++ b/doc/cprover-manual/goto-harness.md @@ -0,0 +1,263 @@ +[CPROVER Manual TOC](../../) + +## Goto Harness + +This is a tool that can generate harnesses, that is functions that instrument +another function under analysis, by setting up an appropriate environment before +calling them. + +This is useful when trying to analyse an isolated unit of a program without +having to manually construct an appropriate environment. + +### Usage + +We have two types of harnesses we can generate for now: + +* The `memory-snapshot` harness. TODO: Daniel can document this. +* The `function-call` harness, which automatically synthesises an environment +without having any information about the program state. + +It is used similarly to how `goto-instrument` is used by modifying an existing +GOTO binary, as produced by `goto-cc`. + +### The function call harness generator + +For the most basic use of the `function-call` harness generator, imagine that we +have the following C program: + +``` C +// main.c + +#include + +int function_to_test(int some_argument) +{ + assert(some_argument == 0); + return some_argument; +} +``` + +We first need to generate a GOTO binary. + +```sh +$ goto-cc -o main.gb main.c +``` + +Then we can call `goto-harness` on the generated GOTO binary +to get a modified one that contains the harness function: + +```sh +$ goto-harness \ +--harness-function-name harness \ +--harness-type call-function \ +--function function_to_test \ +main.gb \ +main-with-harness.gb +``` + +The options we pass to `goto-harness` are: + +* `harness-function-name` is the name of the function generated by the harness + generator (this needs to be specified for all harness generators). +* `harness-type` is the type of harness to generate (`call-function` is the + function-call harness generator) +* `function` is the name of the function that gets instrumented +* then we pass the input GOTO-binary and a name for the output GOTO binary. + +What comes of this is a GOTO binary roughly corresponding to the following C +pseudocode: + +```C +// modified_goto_binary.c + +#include + +int function_to_test(int some_argument) +{ + assert(some_argument == 0); + return some_argument; +} + +void harness(void) +{ + int argument = nondet_int(); + function_to_test(argument); +} +``` + +You can pass that modified GOTO binary to `cbmc` and select `harness` as the +entry function for analysis, or further modify it with `goto-harness` or +`goto-instrument` or other tools. + +The example above demonstrates the most simple case, which is roughly the same +as the entry point `cbmc` automically generates for functions. However, the +`function-call` harness can also non-deterministically initialise global +variables, array and struct elements. Consider this more complicated example: + +```C +// list_example.c +#include +#include + +typedef struct linked_list linked_list; +struct linked_list { + int value; + linked_list *next; +}; + +linked_list *global_linked_list; + +/// initialize all values in the global +/// list to 0 +void initialize_global(void) +{ + for(linked_list *ll = global_linked_list; + ll != NULL; + ll = ll->next) + { + ll->value = 0; + } +} + +/// try to initialize all values in the linked list +/// to 0 - but this version contains two bugs, +/// as it won't work with nullpointer arguments +/// and it will also not initialize the last element +void initialize_other(linked_list *ll) +{ + do { + ll->value = 0; + ll = ll->next; + } while(ll->next != NULL); +} + +void check_list(linked_list *list_parameter) +{ + assert(list_parameter != global_linked_list); + initialize_global(); + initialize_other(list_parameter); + linked_list *global_cursor = global_linked_list; + linked_list *parameter_cursor = list_parameter; + + // global list should be a prefix of the parameter now, + // or the other way round + while(global_cursor != NULL && parameter_cursor != NULL) + { + // this assertion should fail since we didn't + // initialize the last element of of the + // list parameter correctly + assert(global_cursor->value + == parameter_cursor->value); + global_cursor = global_cursor->next; + parameter_cursor = parameter_cursor->next; + } +} +``` + +Now we'll try to find the bug in `check_list` by generating a harness for it. + +```sh +$ goto-cc -o list_example.gb list_example.c +$ goto-harness \ + --harness-function-name harness \ + --harness-type call-function \ + --function check_list \ + --max-nondet-tree-depth 4 \ + --min-null-tree-depth 1 \ + --nondet-globals \ + list_example.gb \ + list_example_with_harness.gb +$ cbmc --function harness list_example_with_harness.gb --unwind 20 --unwinding-assertions +``` + +We have 3 new options here: + +* `max-nondet-tree-depth` is the maximum extent to which we will generate and + initialize non-null pointers - in this case, this means generating lists up to + length 2 +* `min-null-tree-depth` this is the minimum depth at which pointers can be + nullpointers for generated values - in this case, this sets the _minimum_ + length for our linked lists to one. +* `nondet-globals` is non-deterministically initialising global variables. + +``` +CBMC version 5.11 (cbmc-5.11-1523-g419a958-dirty) 64-bit x86_64 linux +[...] + +** Results: +example.c function initialize_global +[initialize_global.unwind.0] line 17 unwinding assertion loop 0: SUCCESS + +example.c function initialize_other +[initialize_other.unwind.0] line 32 unwinding assertion loop 0: SUCCESS + +example.c function check_list +[check_list.assertion.1] line 42 assertion list_parameter != global_linked_list: SUCCESS +[check_list.unwind.0] line 50 unwinding assertion loop 0: SUCCESS +[check_list.assertion.2] line 55 assertion global_cursor->value == parameter_cursor->value: FAILURE + +** 1 of 5 failed (2 iterations) +VERIFICATION FAILED +``` + +We also have support for arrays (currently only for array function parameters, +globals and struct members are considered for the future). + +Take this example of an implementation of an `is_prefix_of` function that +should return true if the first string parameter `string` is a prefix of the +second one `prefix`. + +```c +// array_example.c + +int is_prefix_of( + const char *string, + int string_length, + const char *prefix, + int prefix_length +) +{ + if(prefix_length > string_length) { return 0; } + // oops, we should have used prefix_length here + for(int i = 0; i < string_length; ++i) + { + // we'll get an out of bounds error here! + if(prefix[i] != string[i]) { return 0; } + } + return 1; +} +``` + +We can compile and run it like this: + +```sh +$ goto-cc -o array_example.gb array_example.c +$ goto-harness \ + --harness-function-name harness \ + --harness-type call-function \ + --function is_prefix_of \ + --associated_array-size string:string_length \ + --associated-array-size prefix:prefix_length \ + array_example.gb array_example-mod.gb +$ cbmc --function harness --pointer-check array_example-mod.gb +``` + +We have the additional option `associated-array-size` here. This is in the format +`:` and will cause the array +parameter with name`array-parameter-name` to be initialised as an array, with +the parameter `array-size-parameter-name` holding its size (it should have +some integral type like `int` or `size_t`). + +Running the example shows the bug highlighted in the comments: +``` +[...] +[is_prefix_of.pointer_dereference.6] line 14 dereference failure: pointer outside object bounds in prefix[(signed long int)i]: FAILURE +``` + +By default, arrays are created with a minimum size of 1 and a maximum size of 2. +These limits can be set with the `--min-array-size` and `--max-array-size` +options. + +If you have a function that takes an array parameter, but without an associated +size parameter, you can also use the `--treat-pointer-as-array ` +option. diff --git a/regression/goto-harness/associated-size-parameter/main.c b/regression/goto-harness/associated-size-parameter/main.c new file mode 100644 index 00000000000..30f1c16732a --- /dev/null +++ b/regression/goto-harness/associated-size-parameter/main.c @@ -0,0 +1,10 @@ +#include + +void test(int *array, int size) +{ + for(int i = 0; i < size; i++) + array[i] = i; + + for(int i = 0; i < size; i++) + assert(array[i] == i); +} diff --git a/regression/goto-harness/associated-size-parameter/test.desc b/regression/goto-harness/associated-size-parameter/test.desc new file mode 100644 index 00000000000..a219ea272c9 --- /dev/null +++ b/regression/goto-harness/associated-size-parameter/test.desc @@ -0,0 +1,7 @@ +CORE +main.c +--harness-type call-function --function test --associated-array-size array:size +VERIFICATION SUCCESSFUL +^EXIT=0$ +^SIGNAL=0$ +-- diff --git a/regression/goto-harness/chain.sh b/regression/goto-harness/chain.sh index eb430bf9a17..01d447961a6 100755 --- a/regression/goto-harness/chain.sh +++ b/regression/goto-harness/chain.sh @@ -23,5 +23,11 @@ if [ -e "${name}-mod.gb" ] ; then rm -f "${name}-mod.gb" fi -$goto_harness "${name}.gb" "${name}-mod.gb" --harness-function-name $entry_point ${args} -$cbmc --function $entry_point "${name}-mod.gb" --unwind 20 --unwinding-assertions +# `# some comment` is an inline comment - basically, cause bash to execute an empty command + +$goto_harness "${name}.gb" "${name}-mod.gb" --harness-function-name $entry_point ${args} +$cbmc --function $entry_point "${name}-mod.gb" \ + --pointer-check `# because we want to see out of bounds errors` \ + --unwind 11 `# with the way we set up arrays symex can't figure out loop bounds automatically` \ + --unwinding-assertions `# we want to make sure we don't accidentally pass tests because we didn't unwind enough` \ + # cbmc args end diff --git a/regression/goto-harness/nondet_initialize_static_arrays/main.c b/regression/goto-harness/nondet_initialize_static_arrays/main.c new file mode 100644 index 00000000000..6fbb63e4153 --- /dev/null +++ b/regression/goto-harness/nondet_initialize_static_arrays/main.c @@ -0,0 +1,37 @@ +#include +#include + +typedef struct st1 +{ + struct st2 *to_st2; + int data; +} st1_t; + +typedef struct st2 +{ + struct st1 *to_st1; + int data; +} st2_t; + +typedef struct st3 +{ + st1_t *array[3]; +} st3_t; + +st1_t dummy1; +st2_t dummy2; + +void func(st3_t *p) +{ + assert(p != NULL); + for(int index = 0; index < 3; index++) + { + assert(p->array[index]->to_st2 != NULL); + assert(p->array[index]->to_st2->to_st1 != NULL); + assert(p->array[index]->to_st2->to_st1->to_st2 == NULL); + } + + assert(p->array[0] != p->array[1]); + assert(p->array[1] != p->array[2]); + assert(p->array[0] != p->array[2]); +} diff --git a/regression/goto-harness/nondet_initialize_static_arrays/test.desc b/regression/goto-harness/nondet_initialize_static_arrays/test.desc new file mode 100644 index 00000000000..d0173ae1e36 --- /dev/null +++ b/regression/goto-harness/nondet_initialize_static_arrays/test.desc @@ -0,0 +1,8 @@ +CORE +main.c +--function func --min-null-tree-depth 10 --max-nondet-tree-depth 4 --harness-type call-function +^EXIT=0$ +^SIGNAL=0$ +VERIFICATION SUCCESSFUL +-- +^warning: ignoring diff --git a/regression/goto-harness/pointer-to-array-function-parameters-max-size/test.c b/regression/goto-harness/pointer-to-array-function-parameters-max-size/test.c new file mode 100644 index 00000000000..0b116e662fe --- /dev/null +++ b/regression/goto-harness/pointer-to-array-function-parameters-max-size/test.c @@ -0,0 +1,9 @@ +#include +void test(int *arr, int sz) +{ + assert(sz < 10); + for(int i = 0; i < sz; ++i) + { + arr[i] = 0; + } +} diff --git a/regression/goto-harness/pointer-to-array-function-parameters-max-size/test.desc b/regression/goto-harness/pointer-to-array-function-parameters-max-size/test.desc new file mode 100644 index 00000000000..a690f08c757 --- /dev/null +++ b/regression/goto-harness/pointer-to-array-function-parameters-max-size/test.desc @@ -0,0 +1,14 @@ +CORE +test.c +--harness-type call-function --function test --max-array-size 10 --associated-array-size arr:sz +\[test.assertion.1\] line \d+ assertion sz < 10: FAILURE +\[test.pointer_dereference.\d+\] line \d+ dereference failure: pointer NULL in arr\[\(signed( long)* int\)i\]: SUCCESS +\[test.pointer_dereference.\d+\] line \d+ dereference failure: pointer invalid in arr\[\(signed( long)* int\)i\]: SUCCESS +\[test.pointer_dereference.\d+\] line \d+ dereference failure: deallocated dynamic object in arr\[\(signed( long)* int\)i\]: SUCCESS +\[test.pointer_dereference.\d+\] line \d+ dereference failure: dead object in arr\[\(signed( long)* int\)i\]: SUCCESS +\[test.pointer_dereference.\d+\] line \d+ dereference failure: pointer outside object bounds in arr\[\(signed( long)* int\)i\]: SUCCESS +\[test.pointer_dereference.\d+\] line \d+ dereference failure: pointer outside object bounds in arr\[\(signed( long)* int\)i\]: SUCCESS +^EXIT=10$ +^SIGNAL=0$ +-- +unwinding assertion loop \d: FAILURE diff --git a/regression/goto-harness/pointer-to-array-function-parameters-min-size/test.c b/regression/goto-harness/pointer-to-array-function-parameters-min-size/test.c new file mode 100644 index 00000000000..d0d6c71affe --- /dev/null +++ b/regression/goto-harness/pointer-to-array-function-parameters-min-size/test.c @@ -0,0 +1,14 @@ +#include + +void min_array_size_test(int *arr, int sz) +{ + assert(sz > 2); + for(int i = 0; i < sz; ++i) + { + arr[i] = i; + } + for(int i = 0; i < sz; ++i) + { + assert(arr[i] == i); + } +} diff --git a/regression/goto-harness/pointer-to-array-function-parameters-min-size/test.desc b/regression/goto-harness/pointer-to-array-function-parameters-min-size/test.desc new file mode 100644 index 00000000000..0f27a4a8d3e --- /dev/null +++ b/regression/goto-harness/pointer-to-array-function-parameters-min-size/test.desc @@ -0,0 +1,8 @@ +CORE +test.c +--harness-type call-function --function min_array_size_test --max-array-size 3 --min-array-size 3 --associated-array-size arr:sz +VERIFICATION SUCCESSFUL +^EXIT=0$ +^SIGNAL=0$ +-- +unwinding assertion loop \d: FAILURE diff --git a/regression/goto-harness/pointer-to-array-function-parameters-multi-arg-right/test.c b/regression/goto-harness/pointer-to-array-function-parameters-multi-arg-right/test.c new file mode 100644 index 00000000000..3afac1ef068 --- /dev/null +++ b/regression/goto-harness/pointer-to-array-function-parameters-multi-arg-right/test.c @@ -0,0 +1,22 @@ +#include + +int is_prefix_of( + const char *string, + int string_size, + const char *prefix, + int prefix_size) +{ + if(string_size < prefix_size) + { + return 0; + } + + for(int ix = 0; ix < prefix_size; ++ix) + { + if(string[ix] != prefix[ix]) + { + return 0; + } + } + return 1; +} diff --git a/regression/goto-harness/pointer-to-array-function-parameters-multi-arg-right/test.desc b/regression/goto-harness/pointer-to-array-function-parameters-multi-arg-right/test.desc new file mode 100644 index 00000000000..e185abccd2f --- /dev/null +++ b/regression/goto-harness/pointer-to-array-function-parameters-multi-arg-right/test.desc @@ -0,0 +1,8 @@ +CORE +test.c +--harness-type call-function --function is_prefix_of --treat-pointer-as-array string --treat-pointer-as-array prefix --associated-array-size string:string_size --associated-array-size prefix:prefix_size --max-array-size 5 +^SIGNAL=0$ +^EXIT=0$ +VERIFICATION SUCCESSFUL +-- +unwinding assertion loop \d+: FAILURE diff --git a/regression/goto-harness/pointer-to-array-function-parameters-multi-arg-wrong/test.c b/regression/goto-harness/pointer-to-array-function-parameters-multi-arg-wrong/test.c new file mode 100644 index 00000000000..6500af4f123 --- /dev/null +++ b/regression/goto-harness/pointer-to-array-function-parameters-multi-arg-wrong/test.c @@ -0,0 +1,24 @@ +#include +#include + +int is_prefix_of( + const char *string, + size_t string_size, + const char *prefix, + size_t prefix_size) +{ + if(string_size < prefix_size) + { + return 0; + } + assert(prefix_size <= string_size); + // oh no! we should have used prefix_size here + for(int ix = 0; ix < string_size; ++ix) + { + if(string[ix] != prefix[ix]) + { + return 0; + } + } + return 1; +} diff --git a/regression/goto-harness/pointer-to-array-function-parameters-multi-arg-wrong/test.desc b/regression/goto-harness/pointer-to-array-function-parameters-multi-arg-wrong/test.desc new file mode 100644 index 00000000000..d37096e99bc --- /dev/null +++ b/regression/goto-harness/pointer-to-array-function-parameters-multi-arg-wrong/test.desc @@ -0,0 +1,8 @@ +CORE +test.c +--harness-type call-function --function is_prefix_of --treat-pointer-as-array string --treat-pointer-as-array prefix --associated-array-size string:string_size --associated-array-size prefix:prefix_size --max-array-size 5 +^EXIT=10$ +^SIGNAL=0$ +\[is_prefix_of.pointer_dereference.\d+\] line \d+ dereference failure: pointer outside object bounds in prefix\[\(signed( long)* int\)ix\]: FAILURE +VERIFICATION FAILED +-- diff --git a/regression/goto-harness/pointer-to-array-function-parameters-with-size/test.c b/regression/goto-harness/pointer-to-array-function-parameters-with-size/test.c new file mode 100644 index 00000000000..436baadd23d --- /dev/null +++ b/regression/goto-harness/pointer-to-array-function-parameters-with-size/test.c @@ -0,0 +1,7 @@ +void test(int *arr, int sz) +{ + for(int i = 0; i < sz; ++i) + { + arr[i] = 0; + } +} diff --git a/regression/goto-harness/pointer-to-array-function-parameters-with-size/test.desc b/regression/goto-harness/pointer-to-array-function-parameters-with-size/test.desc new file mode 100644 index 00000000000..0ed4b256468 --- /dev/null +++ b/regression/goto-harness/pointer-to-array-function-parameters-with-size/test.desc @@ -0,0 +1,12 @@ +CORE +test.c +--harness-type call-function --function test --treat-pointer-as-array arr --associated-array-size arr:sz +^EXIT=0$ +^SIGNAL=0$ +\[test.pointer_dereference.1\] line \d+ dereference failure: pointer NULL in arr\[\(signed( long)* int\)i\]: SUCCESS +\[test.pointer_dereference.2\] line \d+ dereference failure: pointer invalid in arr\[\(signed( long)* int\)i\]: SUCCESS +\[test.pointer_dereference.3\] line \d+ dereference failure: deallocated dynamic object in arr\[\(signed( long)* int\)i\]: SUCCESS +\[test.pointer_dereference.4\] line \d+ dereference failure: dead object in arr\[\(signed( long)* int\)i\]: SUCCESS +\[test.pointer_dereference.5\] line \d+ dereference failure: pointer outside dynamic object bounds in arr\[\(signed( long)* int\)i\]: SUCCESS +\[test.pointer_dereference.6\] line \d+ dereference failure: pointer outside object bounds in arr\[\(signed( long)* int\)i\]: SUCCESS +-- diff --git a/regression/goto-harness/pointer-to-array-function-parameters/test.c b/regression/goto-harness/pointer-to-array-function-parameters/test.c new file mode 100644 index 00000000000..f70cbfbae28 --- /dev/null +++ b/regression/goto-harness/pointer-to-array-function-parameters/test.c @@ -0,0 +1,10 @@ +#include + +void test(int *arr) +{ + // works because the arrays we generate have at least one element + arr[0] = 5; + + // doesn't work because our arrays have at most ten elements + arr[10] = 10; +} diff --git a/regression/goto-harness/pointer-to-array-function-parameters/test.desc b/regression/goto-harness/pointer-to-array-function-parameters/test.desc new file mode 100644 index 00000000000..f010f238e1b --- /dev/null +++ b/regression/goto-harness/pointer-to-array-function-parameters/test.desc @@ -0,0 +1,8 @@ +CORE +test.c +--harness-type call-function --function test --treat-pointer-as-array arr +\[test.pointer_dereference.\d+\] line \d+ dereference failure: pointer outside object bounds in arr\[\(signed( long)* int\)0\]: SUCCESS +\[test.pointer_dereference.\d+\] line \d+ dereference failure: pointer outside object bounds in arr\[\(signed( long)* int\)10\]: FAILURE +^EXIT=10$ +^SIGNAL=0$ +-- diff --git a/src/goto-harness/function_call_harness_generator.cpp b/src/goto-harness/function_call_harness_generator.cpp index 1b965e96973..9173b36c761 100644 --- a/src/goto-harness/function_call_harness_generator.cpp +++ b/src/goto-harness/function_call_harness_generator.cpp @@ -15,11 +15,16 @@ Author: Diffblue Ltd. #include #include #include +#include #include #include #include +#include +#include +#include + #include "function_harness_generator_options.h" #include "goto_harness_parse_options.h" #include "recursive_initialization.h" @@ -40,14 +45,17 @@ struct function_call_harness_generatort::implt recursive_initialization_configt recursive_initialization_config; std::unique_ptr recursive_initialization; + std::set function_parameters_to_treat_as_arrays; + std::set function_arguments_to_treat_as_arrays; + + std::map function_argument_to_associated_array_size; + std::map function_parameter_to_associated_array_size; + /// \see goto_harness_generatort::generate void generate(goto_modelt &goto_model, const irep_idt &harness_function_name); /// Iterate over the symbol table and generate initialisation code for /// globals into the function body. void generate_nondet_globals(code_blockt &function_body); - /// Non-deterministically initialise the parameters of the entry function - /// and insert function call to the passed code block. - void setup_parameters_and_call_entry_function(code_blockt &function_body); /// Return a reference to the entry function or throw if it doesn't exist. const symbolt &lookup_function_to_call(); /// Generate initialisation code for one lvalue inside block. @@ -56,6 +64,14 @@ struct function_call_harness_generatort::implt void ensure_harness_does_not_already_exist(); /// Update the goto-model with the new harness function. void add_harness_function_to_goto_model(code_blockt function_body); + /// declare local variables for each of the parameters of the entry function + /// and return them + code_function_callt::argumentst declare_arguments(code_blockt &function_body); + /// write initialisation code for each of the arguments into function_body, + /// then insert a call to the entry function with the arguments + void call_function( + const code_function_callt::argumentst &arguments, + code_blockt &function_body); }; function_call_harness_generatort::function_call_harness_generatort( @@ -111,6 +127,57 @@ void function_call_harness_generatort::handle_option( "--" FUNCTION_HARNESS_GENERATOR_MAX_NONDET_TREE_DEPTH_OPT}; } } + else if(option == FUNCTION_HARNESS_GENERATOR_MAX_ARRAY_SIZE_OPT) + { + p_impl->recursive_initialization_config.max_dynamic_array_size = + require_one_size_value( + FUNCTION_HARNESS_GENERATOR_MAX_ARRAY_SIZE_OPT, values); + } + else if(option == FUNCTION_HARNESS_GENERATOR_MIN_ARRAY_SIZE_OPT) + { + p_impl->recursive_initialization_config.min_dynamic_array_size = + require_one_size_value( + FUNCTION_HARNESS_GENERATOR_MIN_ARRAY_SIZE_OPT, values); + } + else if(option == FUNCTION_HARNESS_GENERATOR_TREAT_POINTER_AS_ARRAY_OPT) + { + p_impl->function_parameters_to_treat_as_arrays.insert( + values.begin(), values.end()); + } + else if(option == FUNCTION_HARNESS_GENERATOR_ASSOCIATED_ARRAY_SIZE_OPT) + { + for(auto const &array_size_pair : values) + { + try + { + std::string array; + std::string size; + split_string(array_size_pair, ':', array, size); + // --associated-array-size implies --treat-pointer-as-array + // but it is not an error to specify both, so we don't check + // for duplicates here + p_impl->function_parameters_to_treat_as_arrays.insert(array); + auto const inserted = + p_impl->function_parameter_to_associated_array_size.emplace( + array, size); + if(!inserted.second) + { + throw invalid_command_line_argument_exceptiont{ + "can not have two associated array sizes for one array", + "--" FUNCTION_HARNESS_GENERATOR_ASSOCIATED_ARRAY_SIZE_OPT}; + } + } + catch(const deserialization_exceptiont &) + { + throw invalid_command_line_argument_exceptiont{ + "`" + array_size_pair + + "' is in an invalid format for array size pair", + "--" FUNCTION_HARNESS_GENERATOR_ASSOCIATED_ARRAY_SIZE_OPT, + "array_name:size_name, where both are the names of function " + "parameters"}; + } + } + } else { throw invalid_command_line_argument_exceptiont{ @@ -125,55 +192,36 @@ void function_call_harness_generatort::generate( p_impl->generate(goto_model, harness_function_name); } -void function_call_harness_generatort::implt:: - setup_parameters_and_call_entry_function(code_blockt &function_body) -{ - const auto &function_to_call = lookup_function_to_call(); - const auto &function_type = to_code_type(function_to_call.type); - const auto ¶meters = function_type.parameters(); - - code_function_callt::operandst arguments{}; - - auto allocate_objects = allocate_objectst{function_to_call.mode, - function_to_call.location, - "__goto_harness", - *symbol_table}; - for(const auto ¶meter : parameters) - { - auto argument = allocate_objects.allocate_automatic_local_object( - parameter.type(), parameter.get_base_name()); - arguments.push_back(argument); - } - allocate_objects.declare_created_symbols(function_body); - for(auto const &argument : arguments) - { - generate_initialisation_code_for(function_body, argument); - } - code_function_callt function_call{function_to_call.symbol_expr(), - std::move(arguments)}; - function_call.add_source_location() = function_to_call.location; - - function_body.add(std::move(function_call)); -} - void function_call_harness_generatort::implt::generate( goto_modelt &goto_model, const irep_idt &harness_function_name) { symbol_table = &goto_model.symbol_table; goto_functions = &goto_model.goto_functions; - const auto &function_to_call = lookup_function_to_call(); - recursive_initialization_config.mode = function_to_call.mode; - recursive_initialization = util_make_unique( - recursive_initialization_config, goto_model); this->harness_function_name = harness_function_name; + const auto &function_to_call = lookup_function_to_call(); ensure_harness_does_not_already_exist(); // create body for the function code_blockt function_body{}; + auto const arguments = declare_arguments(function_body); + + // configure and create recursive initialisation object + recursive_initialization_config.mode = function_to_call.mode; + recursive_initialization_config.pointers_to_treat_as_arrays = + function_arguments_to_treat_as_arrays; + recursive_initialization_config.array_name_to_associated_array_size_variable = + function_argument_to_associated_array_size; + for(const auto &pair : function_argument_to_associated_array_size) + { + recursive_initialization_config.variables_that_hold_array_sizes.insert( + pair.second); + } + recursive_initialization = util_make_unique( + recursive_initialization_config, goto_model); generate_nondet_globals(function_body); - setup_parameters_and_call_entry_function(function_body); + call_function(arguments, function_body); add_harness_function_to_goto_model(std::move(function_body)); } @@ -220,6 +268,32 @@ void function_call_harness_generatort::validate_options() throw invalid_command_line_argument_exceptiont{ "required parameter entry function not set", "--" FUNCTION_HARNESS_GENERATOR_FUNCTION_OPT}; + if( + p_impl->recursive_initialization_config.min_dynamic_array_size > + p_impl->recursive_initialization_config.max_dynamic_array_size) + { + throw invalid_command_line_argument_exceptiont{ + "min dynamic array size cannot be greater than max dynamic array size", + "--" FUNCTION_HARNESS_GENERATOR_MIN_ARRAY_SIZE_OPT + " --" FUNCTION_HARNESS_GENERATOR_MAX_ARRAY_SIZE_OPT}; + } +} + +std::size_t function_call_harness_generatort::require_one_size_value( + const std::string &option, + const std::list &values) +{ + auto const string_value = require_exactly_one_value(option, values); + auto value = string2optional(string_value, 10); + if(value.has_value()) + { + return value.value(); + } + else + { + throw invalid_command_line_argument_exceptiont{ + "failed to parse `" + string_value + "' as integer", "--" + option}; + } } const symbolt & @@ -278,3 +352,72 @@ void function_call_harness_generatort::implt:: function_to_call.mode); body.add(goto_programt::make_end_function()); } + +code_function_callt::argumentst +function_call_harness_generatort::implt::declare_arguments( + code_blockt &function_body) +{ + const auto &function_to_call = lookup_function_to_call(); + const auto &function_type = to_code_type(function_to_call.type); + const auto ¶meters = function_type.parameters(); + + code_function_callt::operandst arguments{}; + + auto allocate_objects = allocate_objectst{function_to_call.mode, + function_to_call.location, + "__goto_harness", + *symbol_table}; + std::map parameter_name_to_argument_name; + for(const auto ¶meter : parameters) + { + auto argument = allocate_objects.allocate_automatic_local_object( + parameter.type(), parameter.get_base_name()); + parameter_name_to_argument_name.insert( + {parameter.get_base_name(), argument.get_identifier()}); + arguments.push_back(argument); + } + + for(const auto &pair : parameter_name_to_argument_name) + { + auto const ¶meter_name = pair.first; + auto const &argument_name = pair.second; + if(function_parameters_to_treat_as_arrays.count(parameter_name) != 0) + { + function_arguments_to_treat_as_arrays.insert(argument_name); + } + auto it = function_parameter_to_associated_array_size.find(parameter_name); + if(it != function_parameter_to_associated_array_size.end()) + { + auto const &associated_array_size_parameter = it->second; + auto associated_array_size_argument = + parameter_name_to_argument_name.find(associated_array_size_parameter); + if( + associated_array_size_argument == parameter_name_to_argument_name.end()) + { + throw invalid_command_line_argument_exceptiont{ + "associated array size is not there", + "--" FUNCTION_HARNESS_GENERATOR_ASSOCIATED_ARRAY_SIZE_OPT}; + } + function_argument_to_associated_array_size.insert( + {argument_name, associated_array_size_argument->second}); + } + } + allocate_objects.declare_created_symbols(function_body); + return arguments; +} + +void function_call_harness_generatort::implt::call_function( + const code_function_callt::argumentst &arguments, + code_blockt &function_body) +{ + auto const &function_to_call = lookup_function_to_call(); + for(auto const &argument : arguments) + { + generate_initialisation_code_for(function_body, argument); + } + code_function_callt function_call{function_to_call.symbol_expr(), + std::move(arguments)}; + function_call.add_source_location() = function_to_call.location; + + function_body.add(std::move(function_call)); +} diff --git a/src/goto-harness/function_call_harness_generator.h b/src/goto-harness/function_call_harness_generator.h index a1cd5a443f2..354542b757b 100644 --- a/src/goto-harness/function_call_harness_generator.h +++ b/src/goto-harness/function_call_harness_generator.h @@ -36,6 +36,9 @@ class function_call_harness_generatort : public goto_harness_generatort void validate_options() override; private: + std::size_t require_one_size_value( + const std::string &option, + const std::list &values); struct implt; std::unique_ptr p_impl; }; diff --git a/src/goto-harness/function_harness_generator_options.h b/src/goto-harness/function_harness_generator_options.h index 400fd761791..1892a41d98e 100644 --- a/src/goto-harness/function_harness_generator_options.h +++ b/src/goto-harness/function_harness_generator_options.h @@ -14,6 +14,12 @@ Author: Diffblue Ltd. #define FUNCTION_HARNESS_GENERATOR_MIN_NULL_TREE_DEPTH_OPT "min-null-tree-depth" #define FUNCTION_HARNESS_GENERATOR_MAX_NONDET_TREE_DEPTH_OPT \ "max-nondet-tree-depth" +#define FUNCTION_HARNESS_GENERATOR_MIN_ARRAY_SIZE_OPT "min-array-size" +#define FUNCTION_HARNESS_GENERATOR_MAX_ARRAY_SIZE_OPT "max-array-size" +#define FUNCTION_HARNESS_GENERATOR_TREAT_POINTER_AS_ARRAY_OPT \ + "treat-pointer-as-array" +#define FUNCTION_HARNESS_GENERATOR_ASSOCIATED_ARRAY_SIZE_OPT \ + "associated-array-size" // clang-format off #define FUNCTION_HARNESS_GENERATOR_OPTIONS \ @@ -21,6 +27,10 @@ Author: Diffblue Ltd. "(" FUNCTION_HARNESS_GENERATOR_NONDET_GLOBALS_OPT ")" \ "(" FUNCTION_HARNESS_GENERATOR_MIN_NULL_TREE_DEPTH_OPT "):" \ "(" FUNCTION_HARNESS_GENERATOR_MAX_NONDET_TREE_DEPTH_OPT "):" \ + "(" FUNCTION_HARNESS_GENERATOR_MIN_ARRAY_SIZE_OPT "):" \ + "(" FUNCTION_HARNESS_GENERATOR_MAX_ARRAY_SIZE_OPT "):" \ + "(" FUNCTION_HARNESS_GENERATOR_TREAT_POINTER_AS_ARRAY_OPT "):" \ + "(" FUNCTION_HARNESS_GENERATOR_ASSOCIATED_ARRAY_SIZE_OPT "):" \ // FUNCTION_HARNESS_GENERATOR_OPTIONS // clang-format on @@ -38,6 +48,20 @@ Author: Diffblue Ltd. "--" FUNCTION_HARNESS_GENERATOR_MAX_NONDET_TREE_DEPTH_OPT \ " N limit size of nondet (e.g. input) object tree;\n" \ " at level N pointers are set to null\n" \ + "--" FUNCTION_HARNESS_GENERATOR_MIN_ARRAY_SIZE_OPT \ + " N minimum size of dynamically created arrays (default: 1)\n" \ + "--" FUNCTION_HARNESS_GENERATOR_MAX_ARRAY_SIZE_OPT \ + " N maximum size of dynamically created arrays (default: 2)\n" \ + "--" FUNCTION_HARNESS_GENERATOR_TREAT_POINTER_AS_ARRAY_OPT \ + " parameter treat the function parameter with the name `parameter'\n" \ + " as an array\n" \ + "--" FUNCTION_HARNESS_GENERATOR_ASSOCIATED_ARRAY_SIZE_OPT \ + " array_name:size_name\n" \ + " set the parameter to the size\n"\ + " of the array \n" \ + " (implies " \ + "-- " FUNCTION_HARNESS_GENERATOR_ASSOCIATED_ARRAY_SIZE_OPT \ + " )\n" \ // FUNCTION_HARNESS_GENERATOR_HELP // clang-format on diff --git a/src/goto-harness/recursive_initialization.cpp b/src/goto-harness/recursive_initialization.cpp index 259063f6168..306eceb03b7 100644 --- a/src/goto-harness/recursive_initialization.cpp +++ b/src/goto-harness/recursive_initialization.cpp @@ -9,9 +9,11 @@ Author: Diffblue Ltd. #include "recursive_initialization.h" #include +#include #include #include #include +#include #include #include @@ -25,7 +27,7 @@ recursive_initializationt::recursive_initializationt( void recursive_initializationt::initialize( const exprt &lhs, - std::size_t depth, + const std::size_t depth, const recursion_sett &known_tags, code_blockt &body) { @@ -36,7 +38,20 @@ void recursive_initializationt::initialize( } else if(type.id() == ID_pointer) { - initialize_pointer(lhs, depth, known_tags, body); + if( + lhs.id() == ID_symbol && + should_be_treated_as_array(to_symbol_expr(lhs).get_identifier())) + { + initialize_dynamic_array(lhs, depth, known_tags, body); + } + else + { + initialize_pointer(lhs, depth, known_tags, body); + } + } + else if(type.id() == ID_array) + { + initialize_array(lhs, depth, known_tags, body); } else { @@ -64,7 +79,7 @@ symbol_exprt recursive_initializationt::get_malloc_function() void recursive_initializationt::initialize_struct_tag( const exprt &lhs, - std::size_t depth, + const std::size_t depth, const recursion_sett &known_tags, code_blockt &body) { @@ -81,7 +96,7 @@ void recursive_initializationt::initialize_struct_tag( void recursive_initializationt::initialize_pointer( const exprt &lhs, - std::size_t depth, + const std::size_t depth, const recursion_sett &known_tags, code_blockt &body) { @@ -132,9 +147,191 @@ void recursive_initializationt::initialize_pointer( void recursive_initializationt::initialize_nondet( const exprt &lhs, - std::size_t depth, + const std::size_t depth, + const recursion_sett &known_tags, + code_blockt &body) +{ + if( + lhs.id() != ID_symbol || + !is_array_size_parameter(to_symbol_expr(lhs).get_identifier())) + { + body.add(code_assignt{lhs, side_effect_expr_nondett{lhs.type()}}); + } +} + +void recursive_initializationt::initialize_array( + const exprt &array, + const std::size_t depth, + const recursion_sett &known_tags, + code_blockt &body) +{ + PRECONDITION(array.type().id() == ID_array); + const auto &array_type = to_array_type(array.type()); + const auto array_size = + numeric_cast_v(to_constant_expr(array_type.size())); + for(std::size_t index = 0; index < array_size; index++) + { + initialize( + index_exprt(array, from_integer(index, size_type())), + depth, + known_tags, + body); + } +} + +bool recursive_initializationt::should_be_treated_as_array( + const irep_idt &array_name) const +{ + return initialization_config.pointers_to_treat_as_arrays.find(array_name) != + initialization_config.pointers_to_treat_as_arrays.end(); +} + +bool recursive_initializationt::is_array_size_parameter( + const irep_idt &cmdline_arg) const +{ + return initialization_config.variables_that_hold_array_sizes.find( + cmdline_arg) != + initialization_config.variables_that_hold_array_sizes.end(); +} + +optionalt recursive_initializationt::get_associated_size_variable( + const irep_idt &array_name) const +{ + return optional_lookup( + initialization_config.array_name_to_associated_array_size_variable, + array_name); +} + +void recursive_initializationt::initialize_dynamic_array( + const exprt &pointer, + const std::size_t depth, const recursion_sett &known_tags, code_blockt &body) { - body.add(code_assignt{lhs, side_effect_expr_nondett{lhs.type()}}); + PRECONDITION(pointer.type().id() == ID_pointer); + + // size_t number_of_arrays = max_array_size - min_array_size; + // T *arrays[number_of_arrays]; + // + // T array1[min_array_size]; + // initialize(array1); + // arrays[0] = &array1[0]; + // + // T array2[min_array_size+1]; + // initialize(array2); + // arrays[1] = &array2[0]; + // + // ... and so on ... + // + // T arrayN[max_array_size]; + // initialize(arrayN); + // arrays[number_of_arrays-1] = &arrayN[0]; + // + // size_t array_index; + // assume(0 <= array_index < number_of_arrays); + // actual_size = array_index + min_array_size; + // array_pointer = arrays[array_index]; + + const auto &pointer_type = to_pointer_type(pointer.type()); + allocate_objectst allocate_objects{initialization_config.mode, + pointer_type.source_location(), + "initializer", + goto_model.symbol_table}; + const auto min_array_size = initialization_config.min_dynamic_array_size; + const auto max_array_size = initialization_config.max_dynamic_array_size; + PRECONDITION(max_array_size >= min_array_size); + const auto number_of_arrays = max_array_size - min_array_size + 1; + + auto const array_variables = [&]() { + std::vector array_variables; + for(auto array_size = min_array_size; array_size <= max_array_size; + array_size++) + { + array_variables.push_back( + allocate_objects.allocate_automatic_local_object( + array_typet{pointer_type.subtype(), + from_integer(array_size, size_type())}, + "array")); + } + return array_variables; + }(); + + const auto arrays = allocate_objects.allocate_automatic_local_object( + array_typet{pointer_type, from_integer(number_of_arrays, size_type())}, + "arrays"); + + const auto nondet_index = allocate_objects.allocate_automatic_local_object( + size_type(), "nondet_index"); + + allocate_objects.declare_created_symbols(body); + + { + std::size_t array_counter = 0; + for(const auto &array_variable : array_variables) + { + initialize(array_variable, depth + 1, known_tags, body); + body.add(code_assignt{ + index_exprt{arrays, from_integer(array_counter++, size_type())}, + address_of_exprt{ + index_exprt{array_variable, from_integer(0, size_type())}}}); + } + INVARIANT(array_counter == number_of_arrays, "initialized all arrays"); + } + + body.add(code_assumet{and_exprt{ + binary_relation_exprt{nondet_index, ID_ge, from_integer(0, size_type())}, + binary_relation_exprt{ + nondet_index, ID_lt, from_integer(number_of_arrays, size_type())}}}); + + if(pointer.id() == ID_symbol) + { + const auto array_id = to_symbol_expr(pointer).get_identifier(); + const auto size_var = get_associated_size_variable(array_id); + if(size_var.has_value()) + { + const auto size_var_symbol_expr = + goto_model.symbol_table.lookup_ref(size_var.value()).symbol_expr(); + body.add(code_assignt{ + size_var_symbol_expr, + typecast_exprt{ + plus_exprt{nondet_index, + from_integer(min_array_size, nondet_index.type())}, + size_var_symbol_expr.type()}}); + } + } + + body.add(code_assignt{pointer, index_exprt{arrays, nondet_index}}); +} + +std::string recursive_initialization_configt::to_string() const +{ + std::ostringstream out{}; + out << "recursive_initialization_config {" + << "\n min_null_tree_depth = " << min_null_tree_depth + << "\n max_nondet_tree_depth = " << max_nondet_tree_depth + << "\n mode = " << mode + << "\n max_dynamic_array_size = " << max_dynamic_array_size + << "\n min_dynamic_array_size = " << min_dynamic_array_size + << "\n pointers_to_treat_as_arrays = ["; + for(auto const &pointer : pointers_to_treat_as_arrays) + { + out << "\n " << pointer; + } + out << "\n ]" + << "\n variables_that_hold_array_sizes = ["; + for(auto const &array_size : variables_that_hold_array_sizes) + { + out << "\n " << array_size; + } + out << "\n ]"; + out << "\n array_name_to_associated_size_variable = ["; + for(auto const &associated_array_size : + array_name_to_associated_array_size_variable) + { + out << "\n " << associated_array_size.first << " -> " + << associated_array_size.second; + } + out << "\n ]"; + out << "\n}"; + return out.str(); } diff --git a/src/goto-harness/recursive_initialization.h b/src/goto-harness/recursive_initialization.h index fe2c5ba0f47..bddb60e2eb1 100644 --- a/src/goto-harness/recursive_initialization.h +++ b/src/goto-harness/recursive_initialization.h @@ -10,10 +10,12 @@ Author: Diffblue Ltd. #define CPROVER_GOTO_HARNESS_RECURSIVE_INITIALIZATION_H #include +#include #include #include #include +#include #include struct recursive_initialization_configt @@ -21,6 +23,16 @@ struct recursive_initialization_configt std::size_t min_null_tree_depth = 1; std::size_t max_nondet_tree_depth = 2; irep_idt mode; + + // array stuff + std::size_t max_dynamic_array_size = 2; + std::size_t min_dynamic_array_size = 1; + + std::set pointers_to_treat_as_arrays; + std::set variables_that_hold_array_sizes; + std::map array_name_to_associated_array_size_variable; + + std::string to_string() const; // for debugging purposes }; /// Class for generating initialisation code @@ -69,6 +81,21 @@ class recursive_initializationt std::size_t depth, const recursion_sett &known_tags, code_blockt &body); + void initialize_array( + const exprt &array, + std::size_t depth, + const recursion_sett &known_tags, + code_blockt &body); + void initialize_dynamic_array( + const exprt &pointer, + std::size_t depth, + const recursion_sett &known_tags, + code_blockt &body); + + bool should_be_treated_as_array(const irep_idt &pointer_name) const; + bool is_array_size_parameter(const irep_idt &cmdline_arg) const; + optionalt + get_associated_size_variable(const irep_idt &array_name) const; }; #endif // CPROVER_GOTO_HARNESS_RECURSIVE_INITIALIZATION_H