diff --git a/CMakeLists.txt b/CMakeLists.txt index fea650dbac1..ac725ad583c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -68,6 +68,8 @@ set_target_properties( json langapi linking + memory-analyzer + memory-analyzer-lib miniBDD pointer-analysis solvers diff --git a/regression/memory-analyzer/Makefile b/regression/memory-analyzer/Makefile new file mode 100644 index 00000000000..e6ccfe714c8 --- /dev/null +++ b/regression/memory-analyzer/Makefile @@ -0,0 +1,21 @@ +default: clean tests.log + +clean: + find -name '*.exe' -execdir $(RM) '{}' \; + find -name '*.out' -execdir $(RM) '{}' \; + $(RM) tests.log + +test: + -@ln -s goto-cc ../../src/goto-cc/goto-gcc + @../test.pl -p -c ../compile_example.sh + +tests.log: ../test.pl + -@ln -s goto-cc ../../src/goto-cc/goto-gcc + @../test.pl -p -c ../compile_example.sh + +show: + @for dir in *; do \ + if [ -d "$$dir" ]; then \ + vim -o "$$dir/*.c" "$$dir/*.out"; \ + fi; \ + done; diff --git a/regression/memory-analyzer/arrays/arrays.c b/regression/memory-analyzer/arrays/arrays.c new file mode 100644 index 00000000000..f122024a280 --- /dev/null +++ b/regression/memory-analyzer/arrays/arrays.c @@ -0,0 +1,82 @@ +//Copyright 2018 Author: Malte Mues + +/// \file +/// This file test array support in the memory-analyzer. +/// A more detailed description of the test idea is in example3.h. +/// setup() prepares the data structure. +/// manipulate_data() is the hook used for the test, +/// where gdb reaches the breakpoint. +/// main() is just the required boilerplate around this methods, +/// to make the compiled result executable. + +#include "arrays.h" + +#include +#include +#include + +void setup() +{ + test_struct = malloc(sizeof(struct a_typet)); + for(int i = 0; i < 10; i++) + { + test_struct->config[i] = i + 10; + } + for(int i = 0; i < 10; i++) + { + test_struct->values[i] = 0xf3; + } + for(int i = 10; i < 20; i++) + { + test_struct->values[i] = 0x3f; + } + for(int i = 20; i < 30; i++) + { + test_struct->values[i] = 0x01; + } + for(int i = 30; i < 40; i++) + { + test_struct->values[i] = 0x01; + } + for(int i = 40; i < 50; i++) + { + test_struct->values[i] = 0xff; + } + for(int i = 50; i < 60; i++) + { + test_struct->values[i] = 0x00; + } + for(int i = 60; i < 70; i++) + { + test_struct->values[i] = 0xab; + } + messaget option1 = {.text = "accept"}; + for(int i = 0; i < 4; i++) + { + char *value = malloc(sizeof(char *)); + sprintf(value, "unique%i", i); + entryt your_options = { + .id = 1, .options[0] = option1, .options[1].text = value}; + your_options.id = i + 12; + test_struct->childs[i].id = your_options.id; + test_struct->childs[i].options[0] = your_options.options[0]; + test_struct->childs[i].options[1].text = your_options.options[1].text; + } + test_struct->initalized = true; +} + +int manipulate_data() +{ + for(int i = 0; i < 4; i++) + { + free(test_struct->childs[i].options[1].text); + test_struct->childs[i].options[1].text = "decline"; + } +} + +int main() +{ + setup(); + manipulate_data(); + return 0; +} diff --git a/regression/memory-analyzer/arrays/arrays.h b/regression/memory-analyzer/arrays/arrays.h new file mode 100644 index 00000000000..cb760ce8ec5 --- /dev/null +++ b/regression/memory-analyzer/arrays/arrays.h @@ -0,0 +1,32 @@ +//Copyright 2018 Author: Malte Mues + +/// \file +/// Example3 test explicitly the array support. +/// It ensures, that arrays are handled right. +/// The different typedefs have been used, to make sure the memory-analyzer +/// is aware of the different appeareances in the gdb responses. + +#include + +struct a_sub_sub_typet +{ + char *text; +}; + +typedef struct a_sub_sub_typet messaget; + +struct a_sub_typet +{ + int id; + messaget options[2]; +}; + +struct a_typet +{ + int config[10]; + bool initalized; + unsigned char values[70]; + struct a_sub_typet childs[4]; +} * test_struct; + +typedef struct a_sub_typet entryt; diff --git a/regression/memory-analyzer/arrays/test.desc b/regression/memory-analyzer/arrays/test.desc new file mode 100644 index 00000000000..4b17306681f --- /dev/null +++ b/regression/memory-analyzer/arrays/test.desc @@ -0,0 +1,24 @@ +CORE +arrays.exe +--breakpoint manipulate_data --symbols test_struct +analyzing symbol: test_struct +GENERATED CODE: +\{ + char _test_struct_childs0_options0_text_1\[\]="accept"; + char _test_struct_childs0_options1_text_2\[\]="unique0"; + char _test_struct_childs1_options1_text_3\[\]="unique1"; + char _test_struct_childs2_options1_text_4\[\]="unique2"; + char _test_struct_childs3_options1_text_5\[\]="unique3"; + struct a_typet test_struct_0=\{ .config=\{ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 \}, .initalized=0, + .values=\{ 243, 243, 243, 243, 243, 243, 243, 243, 243, 243, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171 \}, .childs=\{ \{ .id=12, .options=\{ \{ .text=\(char \*\)&_test_struct_childs0_options0_text_1 \}, + \{ .text=\(char \*\)&_test_struct_childs0_options1_text_2 \} \} \}, + \{ .id=13, .options=\{ \{ .text=\(char \*\)&_test_struct_childs0_options0_text_1 \}, + \{ .text=\(char \*\)&_test_struct_childs1_options1_text_3 \} \} \}, + \{ .id=14, .options=\{ \{ .text=\(char \*\)&_test_struct_childs0_options0_text_1 \}, + \{ .text=\(char \*\)&_test_struct_childs2_options1_text_4 \} \} \}, + \{ .id=15, .options=\{ \{ .text=\(char \*\)&_test_struct_childs0_options0_text_1 \}, + \{ .text=\(char \*\)&_test_struct_childs3_options1_text_5 \} \} \} \} \}; + test_struct = &test_struct_0; +\} +-- +-- diff --git a/regression/memory-analyzer/compile_example.sh b/regression/memory-analyzer/compile_example.sh new file mode 100755 index 00000000000..d3fbac4f440 --- /dev/null +++ b/regression/memory-analyzer/compile_example.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +MEMORYANALYZER="../../../src/memory-analyzer/memory-analyzer" + +set -e + +NAME=${5%.exe} + +../../../src/goto-cc/goto-gcc -g -std=c11 -o $NAME.exe $NAME.c +$MEMORYANALYZER $@ diff --git a/regression/memory-analyzer/cycles/cycles.c b/regression/memory-analyzer/cycles/cycles.c new file mode 100644 index 00000000000..fd35e04bc23 --- /dev/null +++ b/regression/memory-analyzer/cycles/cycles.c @@ -0,0 +1,61 @@ +//Copyright 2018 Author: Malte Mues + +/// \file +/// This example deals with cyclic data structures, +/// see comment in example2.h explaining why this is necessary. +/// add_element is just declared as a helper method for cycle construction. +/// process_buffer isn't supposed to do meaningfull stuff. +/// It is the hook for the gdb breakpoint used in the test. +/// free_buffer just does clean up, if you run this. + +#include "cycles.h" +void add_element(char *content) +{ + cycle_buffer_entry_t *new_entry = malloc(sizeof(cycle_buffer_entry_t)); + new_entry->data = content; + if(buffer.end && buffer.start) + { + new_entry->next = buffer.start; + buffer.end->next = new_entry; + buffer.end = new_entry; + } + else + { + new_entry->next = new_entry; + buffer.end = new_entry; + buffer.start = new_entry; + } +} + +int process_buffer() +{ + return 0; +} + +void free_buffer() +{ + cycle_buffer_entry_t *current; + cycle_buffer_entry_t *free_next; + if(buffer.start) + { + current = buffer.start->next; + while(current != buffer.start) + { + free_next = current; + current = current->next; + free(free_next); + } + free(current); + } +} + +int main() +{ + add_element("snow"); + add_element("sun"); + add_element("rain"); + add_element("thunder storm"); + + process_buffer(); + free_buffer(); +} diff --git a/regression/memory-analyzer/cycles/cycles.h b/regression/memory-analyzer/cycles/cycles.h new file mode 100644 index 00000000000..0c96abc9760 --- /dev/null +++ b/regression/memory-analyzer/cycles/cycles.h @@ -0,0 +1,27 @@ +//Copyright 2018 Author: Malte Mues + +/// \file +/// Example2 deals with cycles in datastructures. +/// +/// While it is common that some datastructures contain cylces, +/// it is necessary that the memory-analyzer does recognize them. +/// Otherwise it would follow the datastructures pointer establishing +/// the cycle for ever and never terminate execution. +/// The cycle_buffer described below contains a cycle. +/// As long as this regression test works, cycle introduced by using pointers +/// are handle the correct way. + +#include +typedef struct cycle_buffer_entry +{ + char *data; + struct cycle_buffer_entry *next; +} cycle_buffer_entry_t; + +struct cycle_buffer +{ + cycle_buffer_entry_t *start; + struct cycle_buffer_entry *end; +}; + +struct cycle_buffer buffer; diff --git a/regression/memory-analyzer/cycles/test.desc b/regression/memory-analyzer/cycles/test.desc new file mode 100644 index 00000000000..5c5a290fdf7 --- /dev/null +++ b/regression/memory-analyzer/cycles/test.desc @@ -0,0 +1,20 @@ +CORE +cycles.exe +--breakpoint process_buffer --symbols buffer +analyzing symbol: buffer +GENERATED CODE: +\{ + char _buffer_start_data_1\[\]="snow"; + char __buffer_start_next_data_3\[\]="sun"; + char ___buffer_start_next_next_data_5\[\]="rain"; + char ____buffer_start_next_next_next_data_7\[\]="thunder storm"; + struct cycle_buffer_entry ___buffer_start_next_next_next_6=\{ .data=\(char \*\)&____buffer_start_next_next_next_data_7, .next=\(\(struct cycle_buffer_entry \*\)NULL\) \}; + struct cycle_buffer_entry __buffer_start_next_next_4=\{ .data=\(char \*\)&___buffer_start_next_next_data_5, .next=&___buffer_start_next_next_next_6 \}; + struct cycle_buffer_entry _buffer_start_next_2=\{ .data=\(char \*\)&__buffer_start_next_data_3, .next=&__buffer_start_next_next_4 \}; + struct cycle_buffer_entry buffer_start_0=\{ .data=\(char \*\)&_buffer_start_data_1, .next=&_buffer_start_next_2 \}; + buffer.start = &buffer_start_0; + buffer.end = &___buffer_start_next_next_next_6; + \(\*\(\*\(\*\(\*buffer.start\).next\).next\).next\).next = &buffer_start_0; +\} +-- +-- diff --git a/regression/memory-analyzer/primitive_types/primitive_types.c b/regression/memory-analyzer/primitive_types/primitive_types.c new file mode 100644 index 00000000000..32d86bc53aa --- /dev/null +++ b/regression/memory-analyzer/primitive_types/primitive_types.c @@ -0,0 +1,42 @@ +//Copyright 2018 Author: Malte Mues + +/// \file +/// This is just a basic example. +/// Pointer references are tested and ensured, that for example f and f_1 are +/// pointing to the same int value location after running memory-analyzer. + +#include "primitive_types.h" +int my_function(char *s) +{ + int a = 10; + g->a = a; + g->b = s; + return 0; +} + +int main(int argc, char **argv) +{ + char *test; + + e = 17; + + f = malloc(sizeof(int)); + f = &e; + f_1 = f; + + h = "test"; + + g = malloc(sizeof(struct mapping_things)); + d.a = 4; + d.c = -32; + test = "test2"; + + d.b = test; + + my_function(test); + + free(g); + free(f); + + return 0; +} diff --git a/regression/memory-analyzer/primitive_types/primitive_types.h b/regression/memory-analyzer/primitive_types/primitive_types.h new file mode 100644 index 00000000000..cc1e44b6370 --- /dev/null +++ b/regression/memory-analyzer/primitive_types/primitive_types.h @@ -0,0 +1,28 @@ +//Copyright 2018 Author: Malte Mues + +/// \file +/// Example1 is just demonstrating, that the tool works in general. +/// A small struct and a few primitive pointers are declared in the global +/// namespace. These are assigned with values before calling my_function +/// and then, it is tested that this global state can be reconstructed before +/// calling my_function. As long as this example work basic functionallity is +/// provided. + +#include + +struct mapping_things +{ + int a; + char *b; + int c; +}; + +typedef struct mapping_things other_things; + +other_things d; +int e; +int *f; +int *f_1; +struct mapping_things *g; +char *h; +int my_function(char *s); diff --git a/regression/memory-analyzer/primitive_types/test.desc b/regression/memory-analyzer/primitive_types/test.desc new file mode 100644 index 00000000000..9e29245aa15 --- /dev/null +++ b/regression/memory-analyzer/primitive_types/test.desc @@ -0,0 +1,27 @@ +CORE +primitive_types.exe +--breakpoint my_function --symbols e,f,f_1,d,g,h +analyzing symbol: e +analyzing symbol: f +analyzing symbol: f_1 +analyzing symbol: d +analyzing symbol: g +analyzing symbol: h +GENERATED CODE: +\{ + e = 17; + f = &e; + f_1 = &e; + char d_b_0\[\]="test2"; + d.a = 4; + d.b = \(char \*\)&d_b_0; + d.c = -32; + struct mapping_things g_1=\{ .a=0, .b=\(\(char \*\)NULL\), .c=0 \}; + g = &g_1; + char h_2\[\]="test"; + h = \(char \*\)&h_2; +} +^EXIT=0 +^SIGNAL=0 +-- +-- diff --git a/regression/memory-analyzer/void_pointer/test.desc b/regression/memory-analyzer/void_pointer/test.desc new file mode 100644 index 00000000000..f709930d1f0 --- /dev/null +++ b/regression/memory-analyzer/void_pointer/test.desc @@ -0,0 +1,16 @@ +CORE +void_pointer.exe +--breakpoint void_pointer.c:17 --symbols a_void_pointer,a_second_void_pointer,a_third_void_pointer,blob +analyzing symbol: blob +GENERATED CODE: +\{ + a_void_pointer = NULL; + char char_a_second_void_pointer_0\[\]="test_string"; + a_second_void_pointer = \(char \*\)&char_a_second_void_pointer_0; + char char_a_third_void_pointer_1\[\]=\{ -13, -13, 'H', -1, '\\\\', '\\\\', -1 \}; + a_third_void_pointer = \(char \*\)&char_a_third_void_pointer_1; + blob.size = 7; + blob.data = \(char \*\)&char_a_third_void_pointer_1; +\} +-- +-- diff --git a/regression/memory-analyzer/void_pointer/void_pointer.c b/regression/memory-analyzer/void_pointer/void_pointer.c new file mode 100644 index 00000000000..88224166336 --- /dev/null +++ b/regression/memory-analyzer/void_pointer/void_pointer.c @@ -0,0 +1,18 @@ +//Copyright 2018 Author: Malte Mues + +/// \file +/// This file initializes some void pointer in different styles. +/// Later the memory-analyzer tries to reconstruct the content. + +#include "void_pointer.h" +int main() +{ + char *char_pointer = "test_string"; + a_second_void_pointer = char_pointer; + char bytes[] = {0xf3, 0xf3, 0x48, 0xff, 0x5c, 0x5c, 0xff}; + a_third_void_pointer = &bytes; + + blob.data = bytes; + blob.size = sizeof(bytes); + return 0; +} diff --git a/regression/memory-analyzer/void_pointer/void_pointer.h b/regression/memory-analyzer/void_pointer/void_pointer.h new file mode 100644 index 00000000000..5744b2c8d1e --- /dev/null +++ b/regression/memory-analyzer/void_pointer/void_pointer.h @@ -0,0 +1,15 @@ +//Copyright 2018 Author: Malte Mues + +/// \file +/// Example4 test the handling of void pointer. +/// The memory-analyzer tries to cast them to char and read some of the data. + +struct a_struct_with_void +{ + int size; + void *data; +} blob; + +void *a_void_pointer; +void *a_second_void_pointer; +void *a_third_void_pointer; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4022889e456..45218016123 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -101,3 +101,4 @@ add_subdirectory(goto-cc) add_subdirectory(goto-instrument) add_subdirectory(goto-analyzer) add_subdirectory(goto-diff) +add_subdirectory(memory-analyzer) diff --git a/src/Makefile b/src/Makefile index a75535d49bc..eadbd1e4fbc 100644 --- a/src/Makefile +++ b/src/Makefile @@ -14,6 +14,7 @@ DIRS = analyses \ json \ langapi \ linking \ + memory-analyzer \ pointer-analysis \ solvers \ util \ @@ -25,6 +26,7 @@ all: cbmc.dir \ goto-cc.dir \ goto-diff.dir \ goto-instrument.dir \ + memory-analyzer.dir \ # Empty last line ############################################################################### @@ -62,6 +64,8 @@ goto-diff.dir: languages goto-programs.dir pointer-analysis.dir \ goto-cc.dir: languages pointer-analysis.dir goto-programs.dir linking.dir +memory-analyzer.dir: util.dir goto-programs.dir ansi-c.dir + # building for a particular directory $(patsubst %, %.dir, $(DIRS)): diff --git a/src/ansi-c/c_typecast.cpp b/src/ansi-c/c_typecast.cpp index 746a01cb834..a689e478180 100644 --- a/src/ansi-c/c_typecast.cpp +++ b/src/ansi-c/c_typecast.cpp @@ -55,14 +55,14 @@ bool c_implicit_typecast_arithmetic( return !c_typecast.errors.empty(); } -bool is_void_pointer(const typet &type) +bool has_a_void_pointer(const typet &type) { if(type.id()==ID_pointer) { if(type.subtype().id()==ID_empty) return true; - return is_void_pointer(type.subtype()); + return has_a_void_pointer(type.subtype()); } else return false; @@ -214,10 +214,11 @@ bool check_c_implicit_typecast( const irept &dest_subtype=dest_type.subtype(); const irept &src_subtype =src_type.subtype(); - if(src_subtype==dest_subtype) + if(src_subtype == dest_subtype) return false; - else if(is_void_pointer(src_type) || // from void to anything - is_void_pointer(dest_type)) // to void from anything + else if( + has_a_void_pointer(src_type) || // from void to anything + has_a_void_pointer(dest_type)) // to void from anything return false; } @@ -521,10 +522,9 @@ void c_typecastt::implicit_typecast_followed( // we are quite generous about pointers const typet &src_sub=ns.follow(src_type.subtype()); - const typet &dest_sub=ns.follow(dest_type.subtype()); + const typet &dest_sub = ns.follow(dest_type.subtype()); - if(is_void_pointer(src_type) || - is_void_pointer(dest_type)) + if(has_a_void_pointer(src_type) || has_a_void_pointer(dest_type)) { // from/to void is always good } diff --git a/src/memory-analyzer/CMakeLists.txt b/src/memory-analyzer/CMakeLists.txt new file mode 100644 index 00000000000..7ab5015d76d --- /dev/null +++ b/src/memory-analyzer/CMakeLists.txt @@ -0,0 +1,19 @@ +# Library +file(GLOB_RECURSE sources "*.cpp" "*.h") +list(REMOVE_ITEM sources + ${CMAKE_CURRENT_SOURCE_DIR}/memory-analyzer_main.cpp +) +add_library(memory-analyzer-lib ${sources}) + +generic_includes(memory-analyzer-lib) + +target_link_libraries(memory-analyzer-lib + ansi-c + goto-programs + util +) + + +# Executable +add_executable(memory-analyzer memory_analyzer_main.cpp) +target_link_libraries(memory-analyzer memory-analyzer-lib) diff --git a/src/memory-analyzer/Makefile b/src/memory-analyzer/Makefile new file mode 100644 index 00000000000..a5f514d63c8 --- /dev/null +++ b/src/memory-analyzer/Makefile @@ -0,0 +1,37 @@ +SRC = analyze_symbol.cpp\ + gdb_api.cpp \ + memory_analyzer_main.cpp \ + memory_analyzer_parse_options.cpp + # Empty last line + +INCLUDES= -I .. + +LIBS = \ + ../ansi-c/ansi-c$(LIBEXT) \ + ../goto-programs/goto-programs$(LIBEXT) \ + ../linking/linking$(LIBEXT) \ + ../util/util$(LIBEXT) \ + ../big-int/big-int$(LIBEXT) \ + ../langapi/langapi$(LIBEXT) + + + +CLEANFILES = memory-analyzer$(EXEEXT) + +include ../config.inc +include ../common + +all: memory-analyzer$(EXEEXT) + + + +############################################################################### + +memory-analyzer$(EXEEXT): $(OBJ) + $(LINKBIN) + + +.PHONY: memory-analyser-mac-signed + +memory-analyser-mac-signed: memory-analyzer$(EXEEXT) + codesign -v -s $(OSX_IDENTITY) memory-analyzer$(EXEEXT) diff --git a/src/memory-analyzer/analyze_symbol.cpp b/src/memory-analyzer/analyze_symbol.cpp new file mode 100644 index 00000000000..1cdbb5831ad --- /dev/null +++ b/src/memory-analyzer/analyze_symbol.cpp @@ -0,0 +1,505 @@ +// Copyrigth 2018 Author: Malte Mues +#ifdef __linux__ + +#include "analyze_symbol.h" +#include "gdb_api.h" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +symbol_analyzert::symbol_analyzert( + const symbol_tablet &st, + gdb_apit &gdb, + message_handlert &handler) + : messaget(handler), + gdb_api(gdb), + ns(st), + c_converter(ns, expr2c_configurationt::clean_configuration), + id_counter(0) +{ +} + +void symbol_analyzert::analyse_symbol(const std::string &symbol_name) +{ + const symbolt symbol = ns.lookup(symbol_name); + const symbol_exprt symbol_expr(symbol.name, symbol.type); + + if(is_c_char_pointer(symbol.type)) + { + process_char_pointer(symbol_expr, symbol.location); + } + else if(is_void_pointer(symbol.type)) + { + const exprt target_expr = + fill_void_pointer_with_values(symbol_expr, symbol.location); + + add_assignment(symbol_expr, target_expr); + } + else if(is_pointer(symbol.type)) + { + const std::string memory_location = gdb_api.get_memory(symbol_name); + + if(memory_location != "0x0") + { + const exprt target_symbol = process_any_pointer_target( + symbol_expr, memory_location, symbol.location); + const address_of_exprt reference_to_target(target_symbol); + add_assignment(symbol_expr, reference_to_target); + } + else + { + const exprt null_ptr = zero_initializer(symbol.type, symbol.location, ns); + add_assignment(symbol_expr, null_ptr); + } + } + else + { + const typet target_type = ns.follow(symbol.type); + const exprt target_expr = fill_expr_with_values( + symbol_name, + zero_initializer(target_type, symbol.location, ns), + target_type, + symbol.location); + + if(is_struct(target_type)) + { + const struct_typet structt = to_struct_type(target_type); + + for(std::size_t i = 0; i < target_expr.operands().size(); ++i) + { + auto &member_declaration = structt.components()[i]; + if(!member_declaration.get_is_padding()) + { + const auto &operand = target_expr.operands()[i]; + + const symbol_exprt symbol_op( + id2string(symbol.name) + "." + + id2string(member_declaration.get(ID_name)), + operand.type()); + add_assignment(symbol_op, operand); + } + } + } + else + { + add_assignment(symbol_expr, target_expr); + } + + try + { + const std::string memory_location = gdb_api.get_memory("&" + symbol_name); + values[memory_location] = symbol_expr; + } + catch(gdb_inaccessible_memoryt e) + { + warning() << "Couldn't access memory location for " << symbol_name << "\n" + << "continue execution anyhow." + << "You might want to check results for correctness!" + << messaget::eom; + } + } + process_outstanding_assignments(); +} + +std::string symbol_analyzert::get_code() +{ + return c_converter.convert(generated_code); +} + +void symbol_analyzert::add_assignment( + const symbol_exprt &symbol, + const exprt &value) +{ + generated_code.add(code_assignt(symbol, value)); +} + +void symbol_analyzert::process_char_pointer( + const symbol_exprt &symbol, + const source_locationt &location) +{ + const std::string memory_location = + gdb_api.get_memory(id2string(symbol.get_identifier())); + const exprt target_object = + declare_and_initalize_char_ptr(symbol, memory_location, location); + + add_assignment(symbol, target_object); +} + +array_exprt create_char_array_from_string( + std::string &values, + const bitvector_typet &type, + const source_locationt &location, + const namespacet &ns) +{ + const std::regex single_char_pattern( + "(?:(?:(?:\\\\)?([0-9]{3}))|([\\\\]{2}|\\S))"); + std::smatch result; + std::vector array_content; + size_t content_size = type.get_width(); + while(regex_search(values, result, single_char_pattern)) + { + if(!result[1].str().empty()) + { + // Char are octal encoded in gdb output. + mp_integer int_value = string2integer(result[1], OCTAL_SYSTEM); + array_content.push_back(integer2binary(int_value, content_size)); + } + else if(!result[2].str().empty()) + { + char token = result[2].str()[0]; + array_content.push_back(integer2binary(token, content_size)); + } + else + { + throw symbol_analysis_exceptiont( + "It is not supposed that non group match"); + } + values = result.suffix().str(); + } + + exprt size = from_integer(array_content.size(), size_type()); + array_typet new_array_type(type, size); + array_exprt result_array = + to_array_expr(zero_initializer(new_array_type, location, ns)); + + for(size_t i = 0; i < array_content.size(); ++i) + { + result_array.operands()[i].set(ID_value, array_content[i]); + } + + return result_array; +} + +code_declt +symbol_analyzert::declare_instance(const std::string &prefix, const typet &type) +{ + std::string safe_prefix = prefix; + std::replace(safe_prefix.begin(), safe_prefix.end(), '.', '_'); + std::replace(safe_prefix.begin(), safe_prefix.end(), '-', '_'); + std::replace(safe_prefix.begin(), safe_prefix.end(), '>', '_'); + std::replace(safe_prefix.begin(), safe_prefix.end(), '&', '_'); + std::replace(safe_prefix.begin(), safe_prefix.end(), '*', '_'); + safe_prefix.erase( + std::remove(safe_prefix.begin(), safe_prefix.end(), '('), + safe_prefix.end()); + safe_prefix.erase( + std::remove(safe_prefix.begin(), safe_prefix.end(), ')'), + safe_prefix.end()); + safe_prefix.erase( + std::remove(safe_prefix.begin(), safe_prefix.end(), '['), + safe_prefix.end()); + safe_prefix.erase( + std::remove(safe_prefix.begin(), safe_prefix.end(), ']'), + safe_prefix.end()); + safe_prefix.erase( + std::remove(safe_prefix.begin(), safe_prefix.end(), ' '), + safe_prefix.end()); + + const std::string var_id = safe_prefix + "_" + std::to_string(id_counter); + ++id_counter; + const code_declt declaration(symbol_exprt(var_id, type)); + return declaration; +} + +exprt symbol_analyzert::declare_and_initalize_char_ptr( + const symbol_exprt &symbol, + const std::string &memory_location, + const source_locationt &location) +{ + const std::regex octal_encoding_pattern("(\\\\[0-9]{3})"); + + auto it = values.find(memory_location); + if(it == values.end()) + { + std::string value = gdb_api.get_value(id2string(symbol.get_identifier())); + + exprt init = string_constantt(value); + + if(regex_search(value, octal_encoding_pattern)) + { + bitvector_typet bv_type = to_bitvector_type(symbol.type().subtype()); + init = create_char_array_from_string(value, bv_type, location, ns); + } + + code_declt target_object = + declare_instance(id2string(symbol.get_identifier()), init.type()); + + target_object.operands().resize(2); + target_object.op1() = init; + generated_code.add(target_object); + const address_of_exprt deref_pointer(target_object.symbol()); + const exprt casted_if_needed = + typecast_exprt::conditional_cast(deref_pointer, symbol.type()); + values[memory_location] = casted_if_needed; + return casted_if_needed; + } + else + { + return it->second; + } +} + +exprt symbol_analyzert::process_any_pointer_target( + const symbol_exprt &symbol, + const std::string memory_location, + const source_locationt &location) +{ + auto it = values.find(memory_location); + if(it == values.end()) + { + const place_holder_exprt recursion_breaker(memory_location); + values[memory_location] = recursion_breaker; + + const typet target_type = ns.follow(symbol.type().subtype()); + const code_declt target_object = get_pointer_target( + id2string(symbol.get_identifier()), target_type, location); + generated_code.add(target_object); + values[memory_location] = target_object.symbol(); + return target_object.symbol(); + } + else + { + return it->second; + } +} + +code_declt symbol_analyzert::get_pointer_target( + const std::string pointer_name, + const typet &type, + const source_locationt &location) +{ + code_declt declaration = declare_instance(pointer_name, type); + + const std::string deref_pointer = "(*" + pointer_name + ")"; + const exprt target_expr = fill_expr_with_values( + deref_pointer, zero_initializer(type, location, ns), type, location); + + declaration.operands().resize(2); + declaration.op1() = target_expr; + return declaration; +} + +exprt symbol_analyzert::fill_array_with_values( + const std::string &symbol_id, + exprt &array, + const typet &type, + const source_locationt &location) +{ + for(size_t i = 0; i < array.operands().size(); ++i) + { + const std::string cell_access = symbol_id + "[" + std::to_string(i) + "]"; + const typet target_type = ns.follow(array.operands()[i].type()); + if(is_c_char_pointer(target_type)) + { + const std::string memory_location = gdb_api.get_memory(cell_access); + const symbol_exprt char_ptr(cell_access, target_type); + array.operands()[i] = + declare_and_initalize_char_ptr(char_ptr, memory_location, location); + } + else + { + array.operands()[i] = fill_expr_with_values( + cell_access, array.operands()[i], target_type, location); + } + } + return array; +} + +exprt symbol_analyzert::fill_expr_with_values( + const std::string &symbol_id, + exprt expr, + const typet &type, + const source_locationt &location) +{ + if(expr.is_constant() && (is_c_int_derivate(type) || is_c_char(type))) + { + const std::string value = gdb_api.get_value(symbol_id); + const mp_integer int_rep = string2integer(value, DECIMAL_SYSTEM); + return from_integer(int_rep, type); + } + else if(expr.is_constant() && is_c_bool(type)) + { + const std::string value = gdb_api.get_value(symbol_id); + return from_c_boolean_value(value, type); + } + else if(expr.is_constant() && is_c_enum(expr.type())) + { + const std::string value = gdb_api.get_value(symbol_id); + return convert_memeber_name_to_enum_value( + value, to_c_enum_type(expr.type())); + } + else if(is_struct(type)) + { + return fill_struct_with_values(symbol_id, expr, type, location); + } + else if(is_array(type)) + { + expr = fill_array_with_values(symbol_id, expr, type, location); + } + else + { + throw symbol_analysis_exceptiont( + "unexpected thing: " + expr.pretty() + "\nexception end\n"); + } + return expr; +} + +exprt symbol_analyzert::fill_char_struct_member_with_values( + const symbol_exprt &field_access, + const exprt &default_expr, + const source_locationt &location) +{ + const std::string memory_location = + gdb_api.get_memory(id2string(field_access.get_identifier())); + if(memory_location != "0x0") + { + return declare_and_initalize_char_ptr( + field_access, memory_location, location); + } + return default_expr; +} + +exprt symbol_analyzert::fill_struct_with_values( + const std::string &symbol_id, + exprt &expr, + const typet &type, + const source_locationt &location) +{ + const struct_typet structt = to_struct_type(type); + for(size_t i = 0; i < expr.operands().size(); ++i) + { + exprt &operand = expr.operands()[i]; + const typet &resolved_type = ns.follow(operand.type()); + if(!structt.components()[i].get_is_padding()) + { + std::string field_access = + symbol_id + "." + id2string(structt.components()[i].get_name()); + symbol_exprt field_symbol(field_access, resolved_type); + + if(is_c_char_pointer(resolved_type)) + { + operand = + fill_char_struct_member_with_values(field_symbol, operand, location); + } + else if(is_void_pointer(resolved_type)) + { + operand = fill_void_pointer_with_values(field_symbol, location); + } + else if(is_struct(resolved_type)) + { + code_declt declaration = declare_instance(field_access, resolved_type); + + const exprt target_expr = fill_expr_with_values( + field_access, + zero_initializer(resolved_type, location, ns), + resolved_type, + location); + + declaration.operands().resize(2); + declaration.op1() = target_expr; + generated_code.add(declaration); + operand = declaration.symbol(); + } + else if(is_pointer(resolved_type)) + { + const std::string memory_location = gdb_api.get_memory(field_access); + + if(memory_location != "0x0") + { + const exprt target_sym = + process_any_pointer_target(field_symbol, memory_location, location); + + if(target_sym.id() == ID_skip_initialize) + { + outstanding_assigns[field_symbol] = + id2string(target_sym.get(ID_identifier)); + } + else + { + operand = address_of_exprt(target_sym); + } + } + } + else if(is_array(resolved_type)) + { + operand = fill_array_with_values( + field_access, operand, resolved_type, location); + } + else + { + const std::string value = gdb_api.get_value(field_access); + + if(is_c_int_derivate(resolved_type)) + { + const mp_integer int_rep = string2integer(value, DECIMAL_SYSTEM); + const constant_exprt constant = from_integer(int_rep, operand.type()); + expr.operands()[i] = constant; + } + } + } + } + return expr; +} + +exprt symbol_analyzert::fill_void_pointer_with_values( + const symbol_exprt &pointer_symbol, + const source_locationt &location) +{ + const std::string &pointer_name = id2string(pointer_symbol.get_identifier()); + const std::string &char_casted = "(char *)" + pointer_name; + exprt zero_ptr = zero_initializer(pointer_symbol.type(), location, ns); + + try + { + const typet &new_char_pointer = pointer_type(char_type()); + const symbol_exprt char_access_symbol(char_casted, new_char_pointer); + + const std::string memory_location = gdb_api.get_memory(pointer_name); + + if(memory_location == "0x0") + { + return zero_ptr; + } + return declare_and_initalize_char_ptr( + char_access_symbol, memory_location, location); + } + catch(gdb_inaccessible_memoryt) + { + // It is not directly what is supposed to happen, + // but doesn't corupt gdbs process state. So it's save to continue. + // Anyhow, you should inspect the result and value of field_access + // manually before using the state for verifcation. + // Field access stays zero initalized. + warning() << "Could not deal with void pointer: " << pointer_name + << "\nThe value is potential unsafe and need review\n" + << messaget::eom; + } + + return zero_ptr; +} + +void symbol_analyzert::process_outstanding_assignments() +{ + for(auto it = outstanding_assigns.begin(); it != outstanding_assigns.end(); + ++it) + { + const address_of_exprt reference_to_target(values[it->second]); + generated_code.add(code_assignt(it->first, reference_to_target)); + } +} +#endif diff --git a/src/memory-analyzer/analyze_symbol.h b/src/memory-analyzer/analyze_symbol.h new file mode 100644 index 00000000000..2874a114923 --- /dev/null +++ b/src/memory-analyzer/analyze_symbol.h @@ -0,0 +1,114 @@ +// Copyright 2018 Author: Malte Mues +#ifdef __linux__ +#ifndef CPROVER_MEMORY_ANALYZER_ANALYZE_SYMBOL_H +#define CPROVER_MEMORY_ANALYZER_ANALYZE_SYMBOL_H +#include +#include + +#include +#include +#include +#include + +class symbol_tablet; +class gdb_apit; +class exprt; +class source_locationt; + +class symbol_analyzert : public messaget +{ +public: + symbol_analyzert( + const symbol_tablet &st, + gdb_apit &gdb, + message_handlert &handler); + void analyse_symbol(const std::string &symbol); + std::string get_code(); + +private: + gdb_apit &gdb_api; + const namespacet ns; + expr2ct c_converter; + + code_blockt generated_code; + size_t id_counter; + std::map outstanding_assigns; + std::map values; + + void add_assignment(const symbol_exprt &symbol, const exprt &value); + std::map + get_value_for_struct(std::string variable, struct_typet structure); + exprt fill_array_with_values( + const std::string &symbol_id, + exprt &array, + const typet &type, + const source_locationt &location); + exprt fill_expr_with_values( + const std::string &symbol_id, + exprt expr, + const typet &type, + const source_locationt &location); + exprt fill_char_struct_member_with_values( + const symbol_exprt &field_acces, + const exprt &default_expr, + const source_locationt &location); + exprt fill_struct_with_values( + const std::string &symbol_id, + exprt &expr, + const typet &type, + const source_locationt &location); + exprt fill_void_pointer_with_values( + const symbol_exprt &symbol, + const source_locationt &location); + + exprt process_any_pointer_target( + const symbol_exprt &symbol, + const std::string memory_location, + const source_locationt &location); + code_declt get_pointer_target( + const std::string deref_pointer, + const typet &type, + const source_locationt &location); + + code_declt declare_instance(const std::string &prefix, const typet &type); + exprt declare_and_initalize_char_ptr( + const symbol_exprt &symbol, + const std::string &memory_location, + const source_locationt &location); + void process_char_pointer( + const symbol_exprt &symbol, + const source_locationt &location); + + void process_outstanding_assignments(); +}; + +class symbol_analysis_exceptiont : public std::exception +{ +public: + explicit symbol_analysis_exceptiont(std::string reason) : std::exception() + { + error = reason; + } + const char *what() const throw() + { + return error.c_str(); + } + +private: + std::string error; +}; + +class place_holder_exprt : public exprt +{ +public: + place_holder_exprt() : exprt(ID_skip_initialize) + { + } + explicit place_holder_exprt(const irep_idt &identifier) + : exprt(ID_skip_initialize) + { + set(ID_identifier, identifier); + } +}; +#endif // CPROVER_MEMORY_ANALYZER_ANALYZE_SYMBOL_H +#endif diff --git a/src/memory-analyzer/gdb_api.cpp b/src/memory-analyzer/gdb_api.cpp new file mode 100644 index 00000000000..a363c004a2f --- /dev/null +++ b/src/memory-analyzer/gdb_api.cpp @@ -0,0 +1,266 @@ +// Copyright 2018 Author: Malte Mues +#ifdef __linux__ +#include +#include +#include +#include +#include + +#include "gdb_api.h" +#include + +gdb_apit::gdb_apit(const char *arg) + : binary_name(arg), buffer_position(0), last_read_size(0) +{ + memset(buffer, 0, MAX_READ_SIZE_GDB_BUFFER); +} + +int gdb_apit::create_gdb_process() +{ + if(pipe(pipe_input) == -1) + { + throw gdb_interaction_exceptiont("could not create pipe for stdin!"); + } + if(pipe(pipe_output) == -1) + { + throw gdb_interaction_exceptiont("could not create pipe for stdout!"); + } + + gdb_process = fork(); + if(gdb_process == -1) + { + throw gdb_interaction_exceptiont("could not create gdb process."); + } + if(gdb_process == 0) + { + // child process + close(pipe_input[1]); + close(pipe_output[0]); + dup2(pipe_input[0], STDIN_FILENO); + dup2(pipe_output[1], STDOUT_FILENO); + dup2(pipe_output[1], STDERR_FILENO); + dprintf(pipe_output[1], "binary name: %s\n", binary_name); + char *arg[] = { + const_cast("gdb"), const_cast(binary_name), NULL}; + + dprintf(pipe_output[1], "Loading gdb...\n"); + execvp("gdb", arg); + + // Only reachable, if execvp failed + int errno_value = errno; + dprintf(pipe_output[1], "errno in child: %s\n", strerror(errno_value)); + } + else + { + // parent process + close(pipe_input[0]); + close(pipe_output[1]); + write_to_gdb("set max-value-size unlimited\n"); + } + return 0; +} + +void gdb_apit::terminate_gdb_process() +{ + close(pipe_input[0]); + close(pipe_input[1]); +} + +void gdb_apit::write_to_gdb(const std::string &command) +{ + if( + write(pipe_input[1], command.c_str(), command.length()) != + (ssize_t)command.length()) + { + throw gdb_interaction_exceptiont("Could not write a command to gdb"); + } +} + +std::string gdb_apit::read_next_line() +{ + char token; + std::string line; + do + { + while(buffer_position >= last_read_size) + { + read_next_buffer_chunc(); + } + token = buffer[buffer_position]; + line += token; + ++buffer_position; + } while(token != '\n'); + return line; +} + +void gdb_apit::read_next_buffer_chunc() +{ + memset(buffer, 0, last_read_size); + const auto read_size = + read(pipe_output[0], &buffer, MAX_READ_SIZE_GDB_BUFFER); + if(read_size < 0) + { + throw gdb_interaction_exceptiont("Error reading from gdb_process"); + } + last_read_size = read_size; + buffer_position = 0; +} + +void gdb_apit::run_gdb_from_core(const std::string &corefile) +{ + const std::string command = "core " + corefile + "\n"; + write_to_gdb(command); + std::string line; + while(!isdigit(line[0])) + { + line = read_next_line(); + if(check_for_gdb_core_error(line)) + { + terminate_gdb_process(); + throw gdb_interaction_exceptiont( + "This core file doesn't work with the binary."); + } + } +} + +bool gdb_apit::check_for_gdb_core_error(const std::string &line) +{ + const std::regex core_init_error_r("exec file is newer than core"); + return regex_search(line, core_init_error_r); +} + +void gdb_apit::run_gdb_to_breakpoint(const std::string &breakpoint) +{ + std::string command = "break " + breakpoint + "\n"; + write_to_gdb(command); + command = "run\n"; + write_to_gdb(command); + std::string line; + while(!isdigit(line[0])) + { + line = read_next_line(); + if(check_for_gdb_breakpoint_error(line)) + { + terminate_gdb_process(); + throw gdb_interaction_exceptiont("This is not a valid breakpoint\n"); + } + } +} + +bool gdb_apit::check_for_gdb_breakpoint_error(const std::string &line) +{ + const std::regex breakpoint_init_error_r("Make breakpoint pending on future"); + return regex_search(line, breakpoint_init_error_r); +} + +std::string gdb_apit::create_command(const std::string &variable) +{ + return "p " + variable + "\n"; +} + +std::string gdb_apit::get_memory(const std::string &variable) +{ + write_to_gdb(create_command(variable)); + const std::string &response = read_next_line(); + return extract_memory(response); +} + +// All lines in the output start with something like +// '$XX = ' where XX is a digit. => shared_prefix. +const char shared_prefix[] = R"(\$[0-9]+\s=\s)"; + +// Matching a hex encoded address. +const char memory_address[] = R"(0x[[:xdigit:]]+)"; + +std::string gdb_apit::extract_memory(const std::string &line) +{ + // The next patterns matches the type information, + // which be "(classifier struct name (*)[XX])" + // for pointer to struct arrayes. classsifier and struct is optional => {1,3} + // If it isn't an array, than the ending is " *)" + // => or expression in pointer_star_or_array_suffix. + const std::string struct_name = R"((?:\S+\s){1,3})"; + const std::string pointer_star_or_arary_suffix = + R"((?:\*|(?:\*)?\(\*\)\[[0-9]+\])?)"; + const std::string pointer_type_info = + R"(\()" + struct_name + pointer_star_or_arary_suffix + R"(\))"; + + // The pointer type info is followed by the memory value and eventually + // extended by the name of the pointer content, if gdb has an explicit symbol. + // See unit test cases for more examples of expected input. + const std::regex pointer_pattern( + std::string(shared_prefix) + pointer_type_info + R"(\s()" + memory_address + + R"()(\s\S+)?)"); + const std::regex null_pointer_pattern( + std::string(shared_prefix) + R"((0x0))"); + // Char pointer output the memory adress followed by the string in there. + const std::regex char_pointer_pattern( + std::string(shared_prefix) + R"(()" + memory_address + + R"()\s"[\S[:s:]]*")"); + + std::smatch result; + if(regex_search(line, result, char_pointer_pattern)) + { + return result[1]; + } + if(regex_search(line, result, pointer_pattern)) + { + return result[1]; + } + if(regex_search(line, result, null_pointer_pattern)) + { + return result[1]; + } + throw gdb_interaction_exceptiont("Cannot resolve memory_address: " + line); +} + +std::string gdb_apit::get_value(const std::string &variable) +{ + write_to_gdb(create_command(variable)); + const std::string &response = read_next_line(); + return extract_value(response); +} + +std::string gdb_apit::extract_value(const std::string &line) +{ + // This pattern matches primitive int values and bools. + const std::regex value_pattern( + std::string(shared_prefix) + R"(((?:-)?\d+|true|false))"); + // This pattern matches the char pointer content. + // It is printed behind the address. + const std::regex char_value_pattern( + std::string(shared_prefix) + memory_address + "\\s\"([\\S ]*)\""); + // An enum entry just prints the name of the value on the commandline. + const std::regex enum_value_pattern( + std::string(shared_prefix) + R"(([\S]+)(?:\n|$))"); + // A void pointer outputs it type first, followed by the memory address it + // is pointing to. This pattern should extract the address. + const std::regex void_pointer_pattern( + std::string(shared_prefix) + R"((?:[\S\s]+)\s()" + memory_address + ")"); + + std::smatch result; + if(regex_search(line, result, char_value_pattern)) + { + return result[1]; + } + if(regex_search(line, result, value_pattern)) + { + return result[1]; + } + if(regex_search(line, result, enum_value_pattern)) + { + return result[1]; + } + if(regex_search(line, result, void_pointer_pattern)) + { + return result[1]; + } + std::regex memmory_access_error("Cannot access memory"); + if(regex_search(line, memmory_access_error)) + { + throw gdb_inaccessible_memoryt("ERROR: " + line); + } + throw gdb_interaction_exceptiont("Cannot extract value from this: " + line); +} + +#endif diff --git a/src/memory-analyzer/gdb_api.h b/src/memory-analyzer/gdb_api.h new file mode 100644 index 00000000000..b9e3d915b9c --- /dev/null +++ b/src/memory-analyzer/gdb_api.h @@ -0,0 +1,76 @@ +// Copyright 2018 Author: Malte Mues +#ifdef __linux__ +#ifndef CPROVER_MEMORY_ANALYZER_GDB_API_H +#define CPROVER_MEMORY_ANALYZER_GDB_API_H +#include + +#include + +class namespacet; +class symbolt; +class irept; + +class gdb_apit +{ +public: + explicit gdb_apit(const char *binary); + + int create_gdb_process(); + void terminate_gdb_process(); + + void run_gdb_to_breakpoint(const std::string &breakpoint); + void run_gdb_from_core(const std::string &corefile); + + std::string get_value(const std::string &variable); + std::string get_memory(const std::string &variable); + +private: + static const int MAX_READ_SIZE_GDB_BUFFER = 600; + + const char *binary_name; + char buffer[MAX_READ_SIZE_GDB_BUFFER]; + int buffer_position; + pid_t gdb_process; + int last_read_size; + int pipe_input[2]; + int pipe_output[2]; + + static std::string create_command(const std::string &variable); + void write_to_gdb(const std::string &command); + + std::string read_next_line(); + void read_next_buffer_chunc(); + + static bool check_for_gdb_breakpoint_error(const std::string &line); + static bool check_for_gdb_core_error(const std::string &line); + + static std::string extract_value(const std::string &line); + static std::string extract_memory(const std::string &line); +}; + +class gdb_interaction_exceptiont : public std::exception +{ +public: + explicit gdb_interaction_exceptiont(std::string reason) : std::exception() + { + error = reason; + } + const char *what() const throw() + { + return error.c_str(); + } + +private: + std::string error; +}; + +class gdb_inaccessible_memoryt : public gdb_interaction_exceptiont +{ +public: + explicit gdb_inaccessible_memoryt(std::string reason) + : gdb_interaction_exceptiont(reason) + { + } +}; +#endif +#endif diff --git a/src/memory-analyzer/memory_analyzer_main.cpp b/src/memory-analyzer/memory_analyzer_main.cpp new file mode 100644 index 00000000000..ae57e4c9af2 --- /dev/null +++ b/src/memory-analyzer/memory_analyzer_main.cpp @@ -0,0 +1,16 @@ +// Copyright 2018 Author: Malte Mues +#ifdef __linux__ + +#include "memory_analyzer_parse_options.h" + +int main(int argc, const char **argv) +{ + memory_analyzer_parse_optionst parse_options(argc, argv); + return parse_options.main(); +} +#else +int main() +{ + throw "only supported on Linux platforms currently\n"; +} +#endif diff --git a/src/memory-analyzer/memory_analyzer_parse_options.cpp b/src/memory-analyzer/memory_analyzer_parse_options.cpp new file mode 100644 index 00000000000..ec5ea8c4f03 --- /dev/null +++ b/src/memory-analyzer/memory_analyzer_parse_options.cpp @@ -0,0 +1,102 @@ +// Copyright 2018 Author: Malte Mues + +/// \file +/// Commandline parser for the memory analyzer executing main work. + +#ifdef __linux__ + +#include "memory_analyzer_parse_options.h" +#include "analyze_symbol.h" +#include "gdb_api.h" + +#include +#include +#include +#include +#include +#include + +int memory_analyzer_parse_optionst::doit() +{ + // This script is the entry point and has to make sure + // that the config object is set to a valid architecture. + // config is later used to determine right size for types. + // If config is not set, you might see a bunch of types with + // size 0. + // It might be desierable to convert this into flags in the future. + config.ansi_c.mode = configt::ansi_ct::flavourt::GCC; + config.ansi_c.set_arch_spec_x86_64(); + + if(cmdline.args.size() != 1) + { + error() << "Please provide one binary with symbols to process" << eom; + return CPROVER_EXIT_USAGE_ERROR; + } + + std::string binary = cmdline.args[0]; + + gdb_apit gdb_api = gdb_apit(binary.c_str()); + gdb_api.create_gdb_process(); + + if(cmdline.isset("core-file")) + { + std::string core_file = cmdline.get_value("core-file"); + gdb_api.run_gdb_from_core(core_file); + } + else if(cmdline.isset("breakpoint")) + { + std::string breakpoint = cmdline.get_value("breakpoint"); + gdb_api.run_gdb_to_breakpoint(breakpoint); + } + else + { + error() << "Either the 'core-file' or 'breakpoint; flag must be provided\n" + << "Cannot execute memory-analyzer. Going to shut it down...\n" + << messaget::eom; + + gdb_api.terminate_gdb_process(); + + help(); + return CPROVER_EXIT_USAGE_ERROR; + } + + if(cmdline.isset("symbols")) + { + const std::string symbol_list(cmdline.get_value("symbols")); + std::vector result; + split_string(symbol_list, ',', result, false, false); + + goto_modelt goto_model; + read_goto_binary(binary, goto_model, ui_message_handler); + + symbol_analyzert analyzer( + goto_model.symbol_table, gdb_api, ui_message_handler); + + for(auto it = result.begin(); it != result.end(); it++) + { + messaget::result() << "analyzing symbol: " << (*it) << "\n"; + analyzer.analyse_symbol(*it); + } + + messaget::result() << "GENERATED CODE: \n" << messaget::eom; + messaget::result() << analyzer.get_code() << "\n" << messaget::eom; + + gdb_api.terminate_gdb_process(); + return CPROVER_EXIT_SUCCESS; + } + else + { + error() << "It is required to provide the symbols flag in order to make " + << "this tool work.\n" + << messaget::eom; + } + gdb_api.terminate_gdb_process(); + help(); + return CPROVER_EXIT_USAGE_ERROR; +} + +void memory_analyzer_parse_optionst::help() +{ +} + +#endif // __linux__ diff --git a/src/memory-analyzer/memory_analyzer_parse_options.h b/src/memory-analyzer/memory_analyzer_parse_options.h new file mode 100644 index 00000000000..7648a67069c --- /dev/null +++ b/src/memory-analyzer/memory_analyzer_parse_options.h @@ -0,0 +1,38 @@ +// Copyright 2018 Author: Malte Mues + +/// \file +/// This code does the command line parsing for the memory-analyzer tool + +#ifndef CPROVER_MEMORY_ANALYZER_MEMORY_ANALYZER_PARSE_OPTIONS_H +#define CPROVER_MEMORY_ANALYZER_MEMORY_ANALYZER_PARSE_OPTIONS_H +#ifdef __linux__ +#include +#include + +// clang-format off +#define MEMMORY_ANALYZER_OPTIONS \ + "(core-file):" \ + "(breakpoint):" \ + "(symbols):" + +// clang-format on + +class memory_analyzer_parse_optionst : public parse_options_baset, + public messaget +{ +public: + int doit() override; + void help() override; + + memory_analyzer_parse_optionst(int argc, const char **argv) + : parse_options_baset(MEMMORY_ANALYZER_OPTIONS, argc, argv), + messaget(ui_message_handler), + ui_message_handler(cmdline, "memory-analyzer") + { + } + +protected: + ui_message_handlert ui_message_handler; +}; +#endif // __linux__ +#endif // CPROVER_MEMORY_ANALYZER_MEMORY_ANALYZER_PARSE_OPTIONS_H diff --git a/src/memory-analyzer/module_dependencies.txt b/src/memory-analyzer/module_dependencies.txt new file mode 100644 index 00000000000..24a7a855d94 --- /dev/null +++ b/src/memory-analyzer/module_dependencies.txt @@ -0,0 +1,3 @@ +ansi-c +goto-programs +util diff --git a/src/util/c_types_util.h b/src/util/c_types_util.h new file mode 100644 index 00000000000..ee19f6111c0 --- /dev/null +++ b/src/util/c_types_util.h @@ -0,0 +1,109 @@ +// Copyright 2018 Author: Malte Mues + +/// \file +/// This file contains functions, that should support test for underlying +/// c types, in cases where this is required for anlysis purpose. +#ifndef CPROVER_UTIL_C_TYPES_UTIL_H +#define CPROVER_UTIL_C_TYPES_UTIL_H + +#include "arith_tools.h" +#include "invariant.h" +#include "std_types.h" +#include "type.h" + +#include +#include + +/// This function checks, whether this has been a char type in the c program. +inline bool is_c_char(const typet &type) +{ + const irep_idt &c_type = type.get(ID_C_c_type); + return is_signed_or_unsigned_bitvector(type) && + (c_type == ID_char || c_type == ID_unsigned_char || + c_type == ID_signed_char); +} + +/// This function checks, whether the type +/// has been a bool type in the c program. +inline bool is_c_bool(const typet &type) +{ + return type.id() == ID_c_bool; +} + +/// This function checks, whether the type is has been some kind of integer +/// type in the c program. +/// It considers the signed and unsigned verison of +/// int, short, long and long long as integer types in c. +inline bool is_c_int_derivate(const typet &type) +{ + const irep_idt &c_type = type.get(ID_C_c_type); + return is_signed_or_unsigned_bitvector(type) && + (c_type == ID_signed_int || c_type == ID_unsigned_int || + c_type == ID_signed_short_int || c_type == ID_unsigned_int || + c_type == ID_unsigned_short_int || c_type == ID_signed_long_int || + c_type == ID_signed_long_long_int || c_type == ID_unsigned_long_int || + c_type == ID_unsigned_long_long_int); +} + +/// This function checks, whether type is a pointer and the target type +/// of the pointer has been a char type in the c program. +inline bool is_c_char_pointer(const typet &type) +{ + return is_pointer(type) && is_c_char(type.subtype()); +} + +/// This function checks, whether type is a pointer and the target type +/// has been some kind of int type in the c program. +/// is_c_int_derivate answers is used for checking the int type. +inline bool is_c_int_derivate_pointer(const typet &type) +{ + return is_pointer(type) && is_c_int_derivate(type.subtype()); +} + +/// This function checks, whether the type +/// has been an enum type in the c program. +inline bool is_c_enum(const typet &type) +{ + return type.id() == ID_c_enum; +} + +/// This function creates a constant representing the +/// bitvector encoded integer value of a string in the enum. +/// \param member_name is a string that should be in the enum. +/// \param c_enum the enum type memeber_name is supposed to be part of. +/// \return value a constant, that could be assigned as value for an expression +/// with type c_enum. +constant_exprt convert_memeber_name_to_enum_value( + const std::string &member_name, + const c_enum_typet &c_enum) +{ + for(const auto &enum_value : c_enum.members()) + { + if(id2string(enum_value.get_identifier()) == member_name) + { + mp_integer int_value = string2integer(id2string(enum_value.get_value())); + return from_integer(int_value, c_enum); + } + } + INVARIANT(false, "member_name must be a valid value in the c_enum."); +} + +/// This function creates a constant representing either 0 or 1 as value of +/// type type. +/// \param bool_value: A string that is compared to "true" ignoring case. +/// \param type: The type, the resulting constant is supposed to have. +/// \return a constant of type \param type with either 0 or 1 as value. +constant_exprt from_c_boolean_value(std::string bool_value, const typet &type) +{ + std::transform( + bool_value.begin(), bool_value.end(), bool_value.begin(), ::tolower); + if(bool_value == "true") + { + return from_integer(mp_integer(1), type); + } + else + { + return from_integer(mp_integer(0), type); + } +} +#endif // CPROVER_UTIL_C_TYPES_UTIL_H diff --git a/src/util/mp_arith.h b/src/util/mp_arith.h index d20bb33f29a..ec88a7e3abd 100644 --- a/src/util/mp_arith.h +++ b/src/util/mp_arith.h @@ -18,6 +18,11 @@ Author: Daniel Kroening, kroening@kroening.com #include "optional.h" #include "deprecate.h" +const int DECIMAL_SYSTEM = 10; +const int OCTAL_SYSTEM = 8; +const int BINARY_SYSTEM = 2; +const int HEXADECIMAL_SYSTEM = 16; + // NOLINTNEXTLINE(readability/identifiers) typedef BigInt mp_integer; @@ -47,8 +52,11 @@ mp_integer rotate_right( mp_integer rotate_left( const mp_integer &, const mp_integer &, std::size_t true_size); -const std::string integer2string(const mp_integer &, unsigned base=10); -const mp_integer string2integer(const std::string &, unsigned base=10); +const std::string +integer2string(const mp_integer &, unsigned base = DECIMAL_SYSTEM); +const mp_integer +string2integer(const std::string &, unsigned base = DECIMAL_SYSTEM); + const std::string integer2binary(const mp_integer &, std::size_t width); const mp_integer binary2integer(const std::string &, bool is_signed); diff --git a/src/util/std_types.h b/src/util/std_types.h index 3cf53f56db1..700c5da6689 100644 --- a/src/util/std_types.h +++ b/src/util/std_types.h @@ -22,6 +22,13 @@ Author: Daniel Kroening, kroening@kroening.com class constant_exprt; +/// This method tests, +/// if the given typet is a constant +inline bool is_constant(const typet &type) +{ + return type.id() == ID_constant; +} + /// The Boolean type class bool_typet:public typet { @@ -284,6 +291,13 @@ class struct_typet:public struct_union_typet bool is_prefix_of(const struct_typet &other) const; }; +/// This method tests, +/// if the given typet is a struct +inline bool is_struct(const typet &type) +{ + return type.id() == ID_struct; +} + /// \brief Cast a typet to a \ref struct_typet /// /// This is an unchecked conversion. \a type must be known to be \ref @@ -978,6 +992,12 @@ class array_typet:public type_with_subtypet return size().is_nil(); } }; +/// This method tests, +/// if the given typet is an array_typet +inline bool is_array(const typet &type) +{ + return type.id() == ID_array; +} /// Check whether a reference to a typet is a \ref array_typet. /// \param type Source type. @@ -1087,6 +1107,13 @@ class bitvector_typet:public type_with_subtypet } }; +/// This method tests, +/// if the given typet is a signed or unsigned bitvector. +inline bool is_signed_or_unsigned_bitvector(const typet &type) +{ + return type.id() == ID_signedbv || type.id() == ID_unsignedbv; +} + /// \brief Cast a typet to a \ref bitvector_typet /// /// This is an unchecked conversion. \a type must be known to be \ref @@ -1395,6 +1422,20 @@ inline void validate_type(const pointer_typet &type) DATA_INVARIANT(type.get_width() > 0, "pointer must have non-zero width"); } +/// This method tests, +/// if the given typet is a pointer. +inline bool is_pointer(const typet &type) +{ + return type.id() == ID_pointer; +} + +/// This method tests, +/// if the given typet is a pointer of type void. +inline bool is_void_pointer(const typet &type) +{ + return is_pointer(type) && type.subtype().id() == ID_empty; +} + /// The reference type /// /// Intends to model C++ reference. Comparing to \ref pointer_typet should diff --git a/src/util/string2int.h b/src/util/string2int.h index 12c0260aaac..cd4f0459f09 100644 --- a/src/util/string2int.h +++ b/src/util/string2int.h @@ -6,28 +6,35 @@ Author: Michael Tautschnig, michael.tautschnig@cs.ox.ac.uk \*******************************************************************/ - #ifndef CPROVER_UTIL_STRING2INT_H #define CPROVER_UTIL_STRING2INT_H +#include "mp_arith.h" + #include // These check that the string is indeed a valid number, // and fail an assertion otherwise. // We use those for data types that C++11's std::stoi etc. do not // cover. -unsigned safe_string2unsigned(const std::string &str, int base=10); -std::size_t safe_string2size_t(const std::string &str, int base=10); +unsigned +safe_string2unsigned(const std::string &str, int base = DECIMAL_SYSTEM); +std::size_t +safe_string2size_t(const std::string &str, int base = DECIMAL_SYSTEM); // The below mimic C's atoi/atol: any errors are silently ignored. // They are meant to replace atoi/atol. -int unsafe_string2int(const std::string &str, int base=10); -unsigned unsafe_string2unsigned(const std::string &str, int base=10); -std::size_t unsafe_string2size_t(const std::string &str, int base=10); +int unsafe_string2int(const std::string &str, int base = DECIMAL_SYSTEM); +unsigned +unsafe_string2unsigned(const std::string &str, int base = DECIMAL_SYSTEM); +std::size_t +unsafe_string2size_t(const std::string &str, int base = DECIMAL_SYSTEM); // Same for atoll -long long int unsafe_string2signedlonglong(const std::string &str, int base=10); +long long int +unsafe_string2signedlonglong(const std::string &str, int base = DECIMAL_SYSTEM); long long unsigned int unsafe_string2unsignedlonglong( - const std::string &str, int base=10); + const std::string &str, + int base = DECIMAL_SYSTEM); #endif // CPROVER_UTIL_STRING2INT_H diff --git a/unit/CMakeLists.txt b/unit/CMakeLists.txt index 036cc3ce4c4..e282f9cdfee 100644 --- a/unit/CMakeLists.txt +++ b/unit/CMakeLists.txt @@ -1,3 +1,14 @@ +if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR + "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang" OR + "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" +) + # private is overwritten in the gdb_api test cases + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-keyword-macro") +elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") + # This would be the place to enable warnings for Windows builds, although + # config.inc doesn't seem to do that currently +endif() + file(GLOB_RECURSE sources "*.cpp" "*.h") file(GLOB_RECURSE testing_utils "testing-utils/*.cpp" "testing-utils/*.h") diff --git a/unit/Makefile b/unit/Makefile index eeff4e499fe..f076326086e 100644 --- a/unit/Makefile +++ b/unit/Makefile @@ -17,6 +17,7 @@ SRC += unit_tests.cpp \ analyses/does_remove_const/is_type_at_least_as_const_as.cpp \ goto-programs/goto_trace_output.cpp \ path_strategies.cpp \ + memory-analyzer/gdb_api.cpp \ solvers/floatbv/float_utils.cpp \ solvers/refinement/array_pool/array_pool.cpp \ solvers/refinement/string_constraint_generator_valueof/calculate_max_string_length.cpp \ @@ -100,7 +101,13 @@ TESTS = unit_tests$(EXEEXT) \ miniBDD$(EXEEXT) \ # Empty last line -CLEANFILES = $(TESTS) +CLEANFILES = $(TESTS) \ + miniBDD$(OBJEXT) \ + #Empty last line + +ifneq ($(CC),cl) + CXXFLAGS += -Wno-keyword-macro +endif all: cprover.dir testing-utils.dir $(MAKE) $(MAKEARGS) $(TESTS) diff --git a/unit/memory-analyzer/gdb_api.cpp b/unit/memory-analyzer/gdb_api.cpp new file mode 100644 index 00000000000..80547419040 --- /dev/null +++ b/unit/memory-analyzer/gdb_api.cpp @@ -0,0 +1,235 @@ +// Copyright 2018 Malte Mues +#include + +#ifdef __linux__ +// \file Test that the regex expression used work as expected. +#define private public +#include +#include + +SCENARIO( + "gdb_apit uses regex as expected for memory", + "[core][memory-analyzer]") +{ + gdb_apit gdb_api(""); + GIVEN("The result of a struct pointer") + { + const std::string line = "$2 = (struct cycle_buffer *) 0x601050 "; + THEN("the result matches the memory address in the output") + { + REQUIRE(gdb_api.extract_memory(line) == "0x601050"); + } + THEN("adding '(gdb) ' to the line doesn't have an influence") + { + REQUIRE(gdb_api.extract_memory("(gdb) " + line) == "0x601050"); + } + } + + GIVEN("The result of a typedef pointer") + { + const std::string line = "$4 = (cycle_buffer_entry_t *) 0x602010"; + THEN("the result matches the memory address in the output") + { + REQUIRE(gdb_api.extract_memory(line) == "0x602010"); + } + THEN("adding '(gdb) ' to the line doesn't have an influence") + { + REQUIRE(gdb_api.extract_memory("(gdb) " + line) == "0x602010"); + } + } + + GIVEN("The result of a char pointer") + { + const std::string line = "$5 = 0x400734 \"snow\""; + THEN("the result matches the memory address in the output") + { + REQUIRE(gdb_api.extract_memory(line) == "0x400734"); + } + THEN("adding '(gdb) ' to the line doesn't have an influence") + { + REQUIRE(gdb_api.extract_memory("(gdb) " + line) == "0x400734"); + } + } + + GIVEN("The result of a null pointer") + { + const std::string line = "$2 = 0x0"; + THEN("the result matches the memory address in the output") + { + REQUIRE(gdb_api.extract_memory(line) == "0x0"); + } + THEN("adding '(gdb) ' to the line doesn't have an influence") + { + REQUIRE(gdb_api.extract_memory("(gdb) " + line) == "0x0"); + } + } + + GIVEN("The result of a char pointer at very low address") + { + const std::string line = "$34 = 0x007456 \"snow\""; + THEN("the result matches the memory address and not nullpointer") + { + REQUIRE(gdb_api.extract_memory(line) == "0x007456"); + } + THEN("adding '(gdb) ' to the line doesn't have an influence") + { + REQUIRE(gdb_api.extract_memory("(gdb) " + line) == "0x007456"); + } + } + + GIVEN("The result of a char pointer with some more whitespaces") + { + const std::string line = "$12 = 0x400752 \"thunder storm\"\n"; + THEN("the result matches the memory address in the output") + { + REQUIRE(gdb_api.extract_memory(line) == "0x400752"); + } + THEN("adding '(gdb) ' to the line doesn't have an influence") + { + REQUIRE(gdb_api.extract_memory("(gdb) " + line) == "0x400752"); + } + } + + GIVEN("The result of an array pointer") + { + const std::string line = "$5 = (struct a_sub_type_t (*)[4]) 0x602080"; + THEN("the result matches the memory address in the output") + { + REQUIRE(gdb_api.extract_memory(line) == "0x602080"); + } + THEN("adding '(gdb) ' to the line doesn't have an influence") + { + REQUIRE(gdb_api.extract_memory("(gdb) " + line) == "0x602080"); + } + } + + GIVEN("A constant struct pointer pointing to 0x0") + { + const std::string line = "$3 = (const struct name *) 0x0"; + THEN("the returned memory address should be 0x0") + { + REQUIRE(gdb_api.extract_memory(line) == "0x0"); + } + } + + GIVEN("An enum address") + { + const std::string line = "$2 = (char *(*)[5]) 0x7e5500 "; + THEN("the returned address is the address of the enum") + { + REQUIRE(gdb_api.extract_memory(line) == "0x7e5500"); + } + } + + GIVEN("The result of an int pointer") + { + const std::string line = "$1 = (int *) 0x601088 \n"; + THEN("the result is the value of memory address of the int pointer") + { + REQUIRE(gdb_api.extract_memory(line) == "0x601088"); + } + THEN("adding '(gdb) ' to the line doesn't have an influence") + { + REQUIRE(gdb_api.extract_memory("(gdb) " + line) == "0x601088"); + } + } + + GIVEN("Non matching result") + { + const std::string line = "Something that must not match 0x605940"; + THEN("an exception is thrown") + { + REQUIRE_THROWS_AS( + gdb_api.extract_memory(line), gdb_interaction_exceptiont); + } + } +} + +SCENARIO( + "gdb_apit uses regex as expected for value extraction", + "[core][memory-analyzer]") +{ + gdb_apit gdb_api(""); + GIVEN("An integer value") + { + const std::string line = "$90 = 100"; + THEN("the result schould be '100'") + { + REQUIRE(gdb_api.extract_value(line) == "100"); + } + } + + GIVEN("A string value") + { + const std::string line = "$5 = 0x602010 \"snow\""; + THEN("the result should be 'snow'") + { + REQUIRE(gdb_api.extract_value(line) == "snow"); + } + } + + GIVEN("A string with withe spaces") + { + const std::string line = "$1323 = 0x1243253 \"thunder storm\"\n"; + THEN("the result should be 'thunder storm'") + { + REQUIRE(gdb_api.extract_value(line) == "thunder storm"); + } + } + + GIVEN("A byte value") + { + const std::string line = "$2 = 243 '\363'"; + THEN("the result should be 243") + { + REQUIRE(gdb_api.extract_value(line) == "243"); + } + } + + GIVEN("A negative int value") + { + const std::string line = "$8 = -32"; + THEN("the result should be -32") + { + REQUIRE(gdb_api.extract_value(line) == "-32"); + } + } + + GIVEN("An enum value") + { + const std::string line = "$1 = INFO"; + THEN("the result should be INFO") + { + REQUIRE(gdb_api.extract_value(line) == "INFO"); + } + } + + GIVEN("A void pointer value") + { + const std::string line = "$6 = (const void *) 0x71"; + THEN("the requried result should be 0x71") + { + REQUIRE(gdb_api.extract_value(line) == "0x71"); + } + } + + GIVEN("A gdb response that contains 'cannot access memory'") + { + const std::string line = "Cannot access memory at address 0x71"; + THEN("a gdb_inaccesible_memoryt excepition must be raised") + { + REQUIRE_THROWS_AS(gdb_api.extract_value(line), gdb_inaccessible_memoryt); + } + } + + GIVEN("A value that must not match") + { + const std::string line = "$90 = must not match 20"; + THEN("an exception is raised") + { + REQUIRE_THROWS_AS( + gdb_api.extract_value(line), gdb_interaction_exceptiont); + } + } +} +#endif diff --git a/unit/memory-analyzer/module_dependencies.txt b/unit/memory-analyzer/module_dependencies.txt new file mode 100644 index 00000000000..aa1bc7f84f8 --- /dev/null +++ b/unit/memory-analyzer/module_dependencies.txt @@ -0,0 +1,2 @@ +memory-analyzer +testing-utils diff --git a/unit/module_dependencies.txt b/unit/module_dependencies.txt index 1640922e72d..9b9375c73bf 100644 --- a/unit/module_dependencies.txt +++ b/unit/module_dependencies.txt @@ -5,6 +5,7 @@ goto-programs goto-symex json langapi # should go away +memory-analyzer solvers/flattening solvers/floatbv solvers/miniBDD