Skip to content

Synthesise definitions of linker script symbols #1173

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 4, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions regression/ansi-c/linker_script_start+end/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
extern char src_start[];
extern char src_end[];
extern char dst_start[];

void *memcpy(void *dest, void *src, unsigned n){
return (void *)0;
}

int main(){
memcpy(dst_start, src_start, (unsigned)src_end - (unsigned)src_start);
return 0;
}
22 changes: 22 additions & 0 deletions regression/ansi-c/linker_script_start+end/script.ld
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
MEMORY {
RAM : ORIGIN = 0x0, LENGTH = 25M
}

SECTIONS
{
/* GCC insists on having these */
.note.gnu.build-id : { } > RAM
.text : { } > RAM

.src_section : {
src_start = .;
*(.text*)
src_end = .;
} > RAM

.dst_section : {
dst_start = .;
*(.text*)
dst_end = .;
} > RAM
}
17 changes: 17 additions & 0 deletions regression/ansi-c/linker_script_start+end/test.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
CORE
main.c
-o out.gb -T script.ld -nostdlib
^EXIT=0$
^SIGNAL=0$
--
^warning: ignoring
^CONVERSION ERROR$
--
Tesing the functionality of goto-cc's linker script parsing
functionality, ensuring that it can get the values of symbols that are
defined in linker scripts.

This test ensures that goto-cc and ls-parse can:

- get the value of a symbol whose value indicates the start of a section;
- get the value of a symbol whose value indicates the end of a section.
12 changes: 12 additions & 0 deletions regression/ansi-c/linker_script_start+size/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
extern char src_start[];
extern char src_size[];
extern char dst_start[];

void *memcpy(void *dest, void *src, unsigned n){
return (void *)0;
}

int main(){
memcpy(dst_start, src_start, (unsigned)src_size);
return 0;
}
25 changes: 25 additions & 0 deletions regression/ansi-c/linker_script_start+size/script.ld
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@

MEMORY {
RAM : ORIGIN = 0x0, LENGTH = 25M
}

SECTIONS
{
/* GCC insists on having these */
.note.gnu.build-id : { } > RAM
.text : { } > RAM

.src_section : {
src_start = .;
*(.text*)
src_end = .;
} > RAM

src_size = src_end - src_start;

.dst_section : {
dst_start = .;
*(.text*)
dst_end = .;
} > RAM
}
19 changes: 19 additions & 0 deletions regression/ansi-c/linker_script_start+size/test.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
CORE
main.c
-o out.gb -T script.ld -nostdlib
^EXIT=0$
^SIGNAL=0$
--
^warning: ignoring
^CONVERSION ERROR$
--
Tesing the functionality of goto-cc's linker script parsing
functionality, ensuring that it can get the values of symbols that are
defined in linker scripts.

This test ensures that goto-cc and ls-parse can:

- get the value of a symbol whose value indicates the start of a section;
- get the value of a symbol whose value indicates the size of a section,
and whose value has been generated through a basic arithmetic
expression in the linker script.
6 changes: 6 additions & 0 deletions regression/ansi-c/linker_script_symbol-only/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
extern char sym[];

int main(){
int foo = (int)sym;
return 0;
}
15 changes: 15 additions & 0 deletions regression/ansi-c/linker_script_symbol-only/script.ld
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
MEMORY {
RAM : ORIGIN = 0x0, LENGTH = 25M
}

SECTIONS
{
/* GCC insists on having these */
.note.gnu.build-id : { } > RAM
.text : { } > RAM

.src_section : {
sym = .;
*(.text*)
} > RAM
}
15 changes: 15 additions & 0 deletions regression/ansi-c/linker_script_symbol-only/test.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
CORE
main.c
-o out.gb -T script.ld -nostdlib
^EXIT=0$
^SIGNAL=0$
--
^warning: ignoring
^CONVERSION ERROR$
--
Tesing the functionality of goto-cc's linker script parsing
functionality, ensuring that it can get the values of symbols that are
defined in linker scripts.

This test ensures that goto-cc and ls-parse can get the value of a
symbol whose value indicates the start of a section.
22 changes: 17 additions & 5 deletions scripts/ls_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,10 @@ def get_linker_script_data(script):
# script is a whitespace.
text = " %s" % text

close_brace = re.compile(r"\s}")
# Lex out comments
text = re.sub(r"/\*.*?\*/", " ", text)

close_brace = re.compile(r"\s}(\s*>\s*\w+)?")
memory_cmd = re.compile(r"\sMEMORY\s*{")
sections_cmd = re.compile(r"\sSECTIONS\s*{")
assign_current = re.compile(r"\s(?P<sym>\w+)\s*=\s*\.\s*;")
Expand Down Expand Up @@ -256,6 +259,7 @@ def close_brace_fun(state, _, buf):
sym = state["end-valid"].group("sym")
info("'%s' marks the end of section '%s'", sym, sec)
state["sections"][sec]["end"] = sym
state["end-valid"] = None
else:
# Linker script assigned end-of-section to a symbol, but not
# the start. this is useless to us.
Expand Down Expand Up @@ -388,7 +392,7 @@ def symbols_from(object_file):

def match_up_addresses(script_data, symbol_table):
ret = []
for data in script_data["sections"].values():
for name, data in script_data["sections"].items():
ok = False
if "size" in data and "start" in data:
ok = True
Expand All @@ -404,6 +408,7 @@ def match_up_addresses(script_data, symbol_table):
region["start"] = {"sym": sym, "val": value}
if "end" in data and sym == data["end"]:
region["end"] = {"sym": sym, "val": value}
region["section"] = name
append = False
if "size" in region and "start" in region:
append = True
Expand All @@ -424,6 +429,9 @@ def get_region_range(region):
e_var = region["end"]["sym"]
ret["start"] = start
ret["size"] = size
ret["start-symbol"] = s_var
ret["end-symbol"] = e_var
ret["has-end-symbol"] = True
ret["annot"] = "__CPROVER_allocated_memory(%s, %d);" % (hex(start), size)
ret["commt"] = "from %s to %s" % (s_var, e_var)
elif "size" in region:
Expand All @@ -433,10 +441,14 @@ def get_region_range(region):
z_var = region["size"]["sym"]
ret["start"] = start
ret["size"] = size
ret["start-symbol"] = s_var
ret["size-symbol"] = z_var
ret["has-end-symbol"] = False
ret["annot"] = "__CPROVER_allocated_memory(%s, %d);" % (hex(start), size)
ret["commt"] = "from %s for %s bytes" % (s_var, z_var)
else:
raise "Malformatted region\n%s" % str(region)
ret["section"] = region["section"]
return ret


Expand Down Expand Up @@ -502,9 +514,9 @@ def main():

regions = match_up_addresses(script_data, symbol_table)

info(json.dumps(symbol_table, indent=2))
info(json.dumps(script_data, indent=2))
info(json.dumps(regions, indent=2))
info("symbol table %s" % json.dumps(symbol_table, indent=2))
info("script data %s" % json.dumps(script_data, indent=2))
info("regions %s" % json.dumps(regions, indent=2))

final = json.dumps(final_json_output(regions, symbol_table),
indent=2)
Expand Down
1 change: 1 addition & 0 deletions src/goto-cc/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ SRC = armcc_cmdline.cpp \
goto_cc_main.cpp \
goto_cc_mode.cpp \
ld_cmdline.cpp \
linker_script_merge.cpp \
ms_cl_cmdline.cpp \
ms_cl_mode.cpp \
# Empty last line
Expand Down
35 changes: 31 additions & 4 deletions src/goto-cc/gcc_mode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,26 +19,37 @@ Author: CM Wintersteiger, 2006
#include <sysexits.h>
#endif

#include <algorithm>
#include <cstddef>
#include <cstdio>
#include <iostream>
#include <iterator>
#include <fstream>
#include <cstring>
#include <numeric>
#include <sstream>

#include <json/json_parser.h>

#include <util/expr.h>
#include <util/c_types.h>
#include <util/arith_tools.h>
#include <util/string2int.h>
#include <util/invariant.h>
#include <util/tempdir.h>
#include <util/tempfile.h>
#include <util/config.h>
#include <util/prefix.h>
#include <util/suffix.h>
#include <util/get_base_name.h>
#include <util/run.h>
#include <util/replace_symbol.h>

#include <goto-programs/read_goto_binary.h>

#include <cbmc/version.h>

#include "compile.h"
#include "linker_script_merge.h"

static std::string compiler_name(
const cmdlinet &cmdline,
Expand Down Expand Up @@ -98,6 +109,7 @@ gcc_modet::gcc_modet(
produce_hybrid_binary(_produce_hybrid_binary),
act_as_ld(base_name=="ld" ||
base_name.find("goto-ld")!=std::string::npos),
goto_binary_tmp_suffix(".goto-cc-saved"),

// Keys are architectures specified in configt::set_arch().
// Values are lists of GCC architectures that can be supplied as
Expand Down Expand Up @@ -837,7 +849,7 @@ int gcc_modet::run_gcc(const compilet &compiler)
return run(new_argv[0], new_argv, cmdline.stdin_file, "");
}

int gcc_modet::gcc_hybrid_binary(const compilet &compiler)
int gcc_modet::gcc_hybrid_binary(compilet &compiler)
{
{
bool have_files=false;
Expand Down Expand Up @@ -891,17 +903,20 @@ int gcc_modet::gcc_hybrid_binary(const compilet &compiler)
<< " to generate hybrid binary" << eom;

// save the goto-cc output files
std::list<std::string> goto_binaries;
for(std::list<std::string>::const_iterator
it=output_files.begin();
it!=output_files.end();
it++)
{
int result=rename(it->c_str(), (*it+".goto-cc-saved").c_str());
std::string bin_name=*it+goto_binary_tmp_suffix;
int result=rename(it->c_str(), bin_name.c_str());
if(result!=0)
{
error() << "Rename failed: " << std::strerror(errno) << eom;
return result;
}
goto_binaries.push_back(bin_name);
}

std::string objcopy_cmd;
Expand All @@ -919,6 +934,18 @@ int gcc_modet::gcc_hybrid_binary(const compilet &compiler)

int result=run_gcc(compiler);

if(result==0)
{
linker_script_merget ls_merge(
compiler, output_files, goto_binaries, cmdline, gcc_message_handler);
const int fail=ls_merge.add_linker_script_definitions();
if(fail!=0)
{
error() << "Unable to merge linker script symbols" << eom;
return fail;
}
}

// merge output from gcc with goto-binaries
// using objcopy, or do cleanup if an earlier call failed
for(std::list<std::string>::const_iterator
Expand All @@ -927,7 +954,7 @@ int gcc_modet::gcc_hybrid_binary(const compilet &compiler)
it++)
{
debug() << "merging " << *it << eom;
std::string saved=*it+".goto-cc-saved";
std::string saved=*it+goto_binary_tmp_suffix;

#ifdef __linux__
if(result==0 && !cmdline.isset('c'))
Expand Down
6 changes: 5 additions & 1 deletion src/goto-cc/gcc_mode.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Date: June 2006

#include <util/cout_message.h>

#include "compile.h"
#include "goto_cc_mode.h"

class compilet;
Expand All @@ -39,6 +40,9 @@ class gcc_modet:public goto_cc_modet
const bool act_as_ld;
std::string native_tool_name;

const std::string goto_binary_tmp_suffix;

/// \brief Associate CBMC architectures with processor names
const std::map<std::string, std::set<std::string>> arch_map;

int preprocess(
Expand All @@ -50,7 +54,7 @@ class gcc_modet:public goto_cc_modet
/// \brief call gcc with original command line
int run_gcc(const compilet &compiler);

int gcc_hybrid_binary(const compilet &compiler);
int gcc_hybrid_binary(compilet &compiler);

int asm_output(
bool act_as_bcc,
Expand Down
Loading