Skip to content

Commit cb467c9

Browse files
authored
Merge pull request #1173 from karkhaz/kk-synthesise-linker-symbols
Synthesise definitions of linker script symbols
2 parents c8f69b5 + 85521b0 commit cb467c9

File tree

18 files changed

+1189
-11
lines changed

18 files changed

+1189
-11
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
extern char src_start[];
2+
extern char src_end[];
3+
extern char dst_start[];
4+
5+
void *memcpy(void *dest, void *src, unsigned n){
6+
return (void *)0;
7+
}
8+
9+
int main(){
10+
memcpy(dst_start, src_start, (unsigned)src_end - (unsigned)src_start);
11+
return 0;
12+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
MEMORY {
2+
RAM : ORIGIN = 0x0, LENGTH = 25M
3+
}
4+
5+
SECTIONS
6+
{
7+
/* GCC insists on having these */
8+
.note.gnu.build-id : { } > RAM
9+
.text : { } > RAM
10+
11+
.src_section : {
12+
src_start = .;
13+
*(.text*)
14+
src_end = .;
15+
} > RAM
16+
17+
.dst_section : {
18+
dst_start = .;
19+
*(.text*)
20+
dst_end = .;
21+
} > RAM
22+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
CORE
2+
main.c
3+
-o out.gb -T script.ld -nostdlib
4+
^EXIT=0$
5+
^SIGNAL=0$
6+
--
7+
^warning: ignoring
8+
^CONVERSION ERROR$
9+
--
10+
Tesing the functionality of goto-cc's linker script parsing
11+
functionality, ensuring that it can get the values of symbols that are
12+
defined in linker scripts.
13+
14+
This test ensures that goto-cc and ls-parse can:
15+
16+
- get the value of a symbol whose value indicates the start of a section;
17+
- get the value of a symbol whose value indicates the end of a section.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
extern char src_start[];
2+
extern char src_size[];
3+
extern char dst_start[];
4+
5+
void *memcpy(void *dest, void *src, unsigned n){
6+
return (void *)0;
7+
}
8+
9+
int main(){
10+
memcpy(dst_start, src_start, (unsigned)src_size);
11+
return 0;
12+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
2+
MEMORY {
3+
RAM : ORIGIN = 0x0, LENGTH = 25M
4+
}
5+
6+
SECTIONS
7+
{
8+
/* GCC insists on having these */
9+
.note.gnu.build-id : { } > RAM
10+
.text : { } > RAM
11+
12+
.src_section : {
13+
src_start = .;
14+
*(.text*)
15+
src_end = .;
16+
} > RAM
17+
18+
src_size = src_end - src_start;
19+
20+
.dst_section : {
21+
dst_start = .;
22+
*(.text*)
23+
dst_end = .;
24+
} > RAM
25+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
CORE
2+
main.c
3+
-o out.gb -T script.ld -nostdlib
4+
^EXIT=0$
5+
^SIGNAL=0$
6+
--
7+
^warning: ignoring
8+
^CONVERSION ERROR$
9+
--
10+
Tesing the functionality of goto-cc's linker script parsing
11+
functionality, ensuring that it can get the values of symbols that are
12+
defined in linker scripts.
13+
14+
This test ensures that goto-cc and ls-parse can:
15+
16+
- get the value of a symbol whose value indicates the start of a section;
17+
- get the value of a symbol whose value indicates the size of a section,
18+
and whose value has been generated through a basic arithmetic
19+
expression in the linker script.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
extern char sym[];
2+
3+
int main(){
4+
int foo = (int)sym;
5+
return 0;
6+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
MEMORY {
2+
RAM : ORIGIN = 0x0, LENGTH = 25M
3+
}
4+
5+
SECTIONS
6+
{
7+
/* GCC insists on having these */
8+
.note.gnu.build-id : { } > RAM
9+
.text : { } > RAM
10+
11+
.src_section : {
12+
sym = .;
13+
*(.text*)
14+
} > RAM
15+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
CORE
2+
main.c
3+
-o out.gb -T script.ld -nostdlib
4+
^EXIT=0$
5+
^SIGNAL=0$
6+
--
7+
^warning: ignoring
8+
^CONVERSION ERROR$
9+
--
10+
Tesing the functionality of goto-cc's linker script parsing
11+
functionality, ensuring that it can get the values of symbols that are
12+
defined in linker scripts.
13+
14+
This test ensures that goto-cc and ls-parse can get the value of a
15+
symbol whose value indicates the start of a section.

scripts/ls_parse.py

+17-5
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,10 @@ def get_linker_script_data(script):
7777
# script is a whitespace.
7878
text = " %s" % text
7979

80-
close_brace = re.compile(r"\s}")
80+
# Lex out comments
81+
text = re.sub(r"/\*.*?\*/", " ", text)
82+
83+
close_brace = re.compile(r"\s}(\s*>\s*\w+)?")
8184
memory_cmd = re.compile(r"\sMEMORY\s*{")
8285
sections_cmd = re.compile(r"\sSECTIONS\s*{")
8386
assign_current = re.compile(r"\s(?P<sym>\w+)\s*=\s*\.\s*;")
@@ -256,6 +259,7 @@ def close_brace_fun(state, _, buf):
256259
sym = state["end-valid"].group("sym")
257260
info("'%s' marks the end of section '%s'", sym, sec)
258261
state["sections"][sec]["end"] = sym
262+
state["end-valid"] = None
259263
else:
260264
# Linker script assigned end-of-section to a symbol, but not
261265
# the start. this is useless to us.
@@ -388,7 +392,7 @@ def symbols_from(object_file):
388392

389393
def match_up_addresses(script_data, symbol_table):
390394
ret = []
391-
for data in script_data["sections"].values():
395+
for name, data in script_data["sections"].items():
392396
ok = False
393397
if "size" in data and "start" in data:
394398
ok = True
@@ -404,6 +408,7 @@ def match_up_addresses(script_data, symbol_table):
404408
region["start"] = {"sym": sym, "val": value}
405409
if "end" in data and sym == data["end"]:
406410
region["end"] = {"sym": sym, "val": value}
411+
region["section"] = name
407412
append = False
408413
if "size" in region and "start" in region:
409414
append = True
@@ -424,6 +429,9 @@ def get_region_range(region):
424429
e_var = region["end"]["sym"]
425430
ret["start"] = start
426431
ret["size"] = size
432+
ret["start-symbol"] = s_var
433+
ret["end-symbol"] = e_var
434+
ret["has-end-symbol"] = True
427435
ret["annot"] = "__CPROVER_allocated_memory(%s, %d);" % (hex(start), size)
428436
ret["commt"] = "from %s to %s" % (s_var, e_var)
429437
elif "size" in region:
@@ -433,10 +441,14 @@ def get_region_range(region):
433441
z_var = region["size"]["sym"]
434442
ret["start"] = start
435443
ret["size"] = size
444+
ret["start-symbol"] = s_var
445+
ret["size-symbol"] = z_var
446+
ret["has-end-symbol"] = False
436447
ret["annot"] = "__CPROVER_allocated_memory(%s, %d);" % (hex(start), size)
437448
ret["commt"] = "from %s for %s bytes" % (s_var, z_var)
438449
else:
439450
raise "Malformatted region\n%s" % str(region)
451+
ret["section"] = region["section"]
440452
return ret
441453

442454

@@ -502,9 +514,9 @@ def main():
502514

503515
regions = match_up_addresses(script_data, symbol_table)
504516

505-
info(json.dumps(symbol_table, indent=2))
506-
info(json.dumps(script_data, indent=2))
507-
info(json.dumps(regions, indent=2))
517+
info("symbol table %s" % json.dumps(symbol_table, indent=2))
518+
info("script data %s" % json.dumps(script_data, indent=2))
519+
info("regions %s" % json.dumps(regions, indent=2))
508520

509521
final = json.dumps(final_json_output(regions, symbol_table),
510522
indent=2)

src/goto-cc/Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ SRC = armcc_cmdline.cpp \
1313
goto_cc_main.cpp \
1414
goto_cc_mode.cpp \
1515
ld_cmdline.cpp \
16+
linker_script_merge.cpp \
1617
ms_cl_cmdline.cpp \
1718
ms_cl_mode.cpp \
1819
# Empty last line

src/goto-cc/gcc_mode.cpp

+31-4
Original file line numberDiff line numberDiff line change
@@ -19,26 +19,37 @@ Author: CM Wintersteiger, 2006
1919
#include <sysexits.h>
2020
#endif
2121

22+
#include <algorithm>
2223
#include <cstddef>
2324
#include <cstdio>
2425
#include <iostream>
26+
#include <iterator>
2527
#include <fstream>
2628
#include <cstring>
2729
#include <numeric>
2830
#include <sstream>
2931

32+
#include <json/json_parser.h>
33+
34+
#include <util/expr.h>
35+
#include <util/c_types.h>
36+
#include <util/arith_tools.h>
3037
#include <util/string2int.h>
3138
#include <util/invariant.h>
3239
#include <util/tempdir.h>
40+
#include <util/tempfile.h>
3341
#include <util/config.h>
3442
#include <util/prefix.h>
3543
#include <util/suffix.h>
3644
#include <util/get_base_name.h>
3745
#include <util/run.h>
46+
#include <util/replace_symbol.h>
47+
48+
#include <goto-programs/read_goto_binary.h>
3849

3950
#include <cbmc/version.h>
4051

41-
#include "compile.h"
52+
#include "linker_script_merge.h"
4253

4354
static std::string compiler_name(
4455
const cmdlinet &cmdline,
@@ -98,6 +109,7 @@ gcc_modet::gcc_modet(
98109
produce_hybrid_binary(_produce_hybrid_binary),
99110
act_as_ld(base_name=="ld" ||
100111
base_name.find("goto-ld")!=std::string::npos),
112+
goto_binary_tmp_suffix(".goto-cc-saved"),
101113

102114
// Keys are architectures specified in configt::set_arch().
103115
// Values are lists of GCC architectures that can be supplied as
@@ -837,7 +849,7 @@ int gcc_modet::run_gcc(const compilet &compiler)
837849
return run(new_argv[0], new_argv, cmdline.stdin_file, "");
838850
}
839851

840-
int gcc_modet::gcc_hybrid_binary(const compilet &compiler)
852+
int gcc_modet::gcc_hybrid_binary(compilet &compiler)
841853
{
842854
{
843855
bool have_files=false;
@@ -891,17 +903,20 @@ int gcc_modet::gcc_hybrid_binary(const compilet &compiler)
891903
<< " to generate hybrid binary" << eom;
892904

893905
// save the goto-cc output files
906+
std::list<std::string> goto_binaries;
894907
for(std::list<std::string>::const_iterator
895908
it=output_files.begin();
896909
it!=output_files.end();
897910
it++)
898911
{
899-
int result=rename(it->c_str(), (*it+".goto-cc-saved").c_str());
912+
std::string bin_name=*it+goto_binary_tmp_suffix;
913+
int result=rename(it->c_str(), bin_name.c_str());
900914
if(result!=0)
901915
{
902916
error() << "Rename failed: " << std::strerror(errno) << eom;
903917
return result;
904918
}
919+
goto_binaries.push_back(bin_name);
905920
}
906921

907922
std::string objcopy_cmd;
@@ -919,6 +934,18 @@ int gcc_modet::gcc_hybrid_binary(const compilet &compiler)
919934

920935
int result=run_gcc(compiler);
921936

937+
if(result==0)
938+
{
939+
linker_script_merget ls_merge(
940+
compiler, output_files, goto_binaries, cmdline, gcc_message_handler);
941+
const int fail=ls_merge.add_linker_script_definitions();
942+
if(fail!=0)
943+
{
944+
error() << "Unable to merge linker script symbols" << eom;
945+
return fail;
946+
}
947+
}
948+
922949
// merge output from gcc with goto-binaries
923950
// using objcopy, or do cleanup if an earlier call failed
924951
for(std::list<std::string>::const_iterator
@@ -927,7 +954,7 @@ int gcc_modet::gcc_hybrid_binary(const compilet &compiler)
927954
it++)
928955
{
929956
debug() << "merging " << *it << eom;
930-
std::string saved=*it+".goto-cc-saved";
957+
std::string saved=*it+goto_binary_tmp_suffix;
931958

932959
#ifdef __linux__
933960
if(result==0 && !cmdline.isset('c'))

src/goto-cc/gcc_mode.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Date: June 2006
1616

1717
#include <util/cout_message.h>
1818

19+
#include "compile.h"
1920
#include "goto_cc_mode.h"
2021

2122
class compilet;
@@ -39,6 +40,9 @@ class gcc_modet:public goto_cc_modet
3940
const bool act_as_ld;
4041
std::string native_tool_name;
4142

43+
const std::string goto_binary_tmp_suffix;
44+
45+
/// \brief Associate CBMC architectures with processor names
4246
const std::map<std::string, std::set<std::string>> arch_map;
4347

4448
int preprocess(
@@ -50,7 +54,7 @@ class gcc_modet:public goto_cc_modet
5054
/// \brief call gcc with original command line
5155
int run_gcc(const compilet &compiler);
5256

53-
int gcc_hybrid_binary(const compilet &compiler);
57+
int gcc_hybrid_binary(compilet &compiler);
5458

5559
int asm_output(
5660
bool act_as_bcc,

0 commit comments

Comments
 (0)