Skip to content

Commit ed76af8

Browse files
authored
Merge pull request diffblue#133 from diffblue/marek/instr_props_PR
Extension of instrumentation props for support of shadow variables.
2 parents ca7f29c + 372aecf commit ed76af8

File tree

4 files changed

+256
-22
lines changed

4 files changed

+256
-22
lines changed

security-scanner/presentation.py

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,45 @@ def build_HTML_interface_to_slicer_instrumentation_props(props,fname):
139139
with open(fname,"w") as ofile:
140140
ofile.write(get_html_prefix("Instrumentation props"))
141141

142-
ofile.write("<h2>Definition of states of the instrumentation automaton</h2>\n")
142+
ofile.write("<h1>Instrumentation properties</h1>\n")
143+
144+
ofile.write("<p>\n")
145+
ofile.write("These data represent a complete information for instrumentation\n")
146+
ofile.write("of shadow variables and related program statements into the GOTO program.\n")
147+
ofile.write("</p>\n")
148+
149+
ofile.write("<h2>Definition of data types for instrumentation</h2>\n")
150+
151+
ofile.write("<p>\n")
152+
ofile.write("This is a list of program types which will be instrumented by shadow variables\n")
153+
ofile.write("identified here by the corresponding names of tokens.\n")
154+
ofile.write("</p>\n")
155+
156+
ofile.write("<table>\n")
157+
ofile.write(" <tr>\n")
158+
ofile.write(" <th>Type name</th>\n")
159+
ofile.write(" <th>Shadow variables</th>\n")
160+
ofile.write(" <th>Create sub-class?</th>\n")
161+
ofile.write(" </tr>\n")
162+
for dtype in props["datatypes"]:
163+
ofile.write(" <tr>\n")
164+
ofile.write(" <td>" + escape_text_to_HTML(dtype["type_name"]) + "</td>\n")
165+
ofile.write(" <td>\n")
166+
ofile.write(" <ul>\n")
167+
for var in dtype["shadow_vars"]:
168+
ofile.write(" <li>" + escape_text_to_HTML(var) + "</li>\n")
169+
ofile.write(" </ul>\n")
170+
ofile.write(" </td>\n")
171+
ofile.write(" <td align=\"center\">" + str(dtype["make_subclass"]) + "</td>\n")
172+
ofile.write(" </tr>\n")
173+
ofile.write("</table>\n")
174+
175+
ofile.write("<h2>Definition of instrumentation statements</h2>\n")
176+
177+
ofile.write("<p>\n")
178+
ofile.write("This is a list of program locations and descriptions of program statemenets\n")
179+
ofile.write("to be instrumented (at those locations).\n")
180+
ofile.write("</p>\n")
143181

144182
state_index = 0
145183
for loc in props["location_props"]:

src/taint-slicer/instrumentation_props.cpp

Lines changed: 167 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,60 @@ instrumentation_props data structure.
2222
\*******************************************************************/
2323

2424
#include <taint-slicer/instrumentation_props.h>
25+
#include <goto-programs/remove_returns.h>
26+
#include <java_bytecode/java_types.h>
27+
#include <java_bytecode/expr2java.h>
28+
#include <util/prefix.h>
2529
#include <util/msgstream.h>
2630
#include <deque>
2731

32+
bool is_primitive_type(const typet &type)
33+
{
34+
return type==java_boolean_type() ||
35+
type==java_byte_type() ||
36+
type==java_char_type() ||
37+
type==java_short_type() ||
38+
type==java_int_type() ||
39+
type==java_long_type() ||
40+
type==java_float_type() ||
41+
type==java_double_type();
42+
}
43+
44+
std::string unwrap_type_name(const typet &type, const namespacet &ns)
45+
{
46+
if(type.id()==ID_pointer)
47+
return unwrap_type_name(to_pointer_type(type).subtype(), ns);
48+
if(type.id()==ID_array)
49+
return unwrap_type_name(to_array_type(type).subtype(), ns);
50+
if(type.id()==ID_symbol)
51+
return as_string(to_symbol_type(type).get_identifier());
52+
const std::string result=type2java(type, ns);
53+
if(!is_primitive_type(type))
54+
throw std::logic_error(
55+
"ERROR: expecting a primitive data type, but received '" + result + "'.");
56+
return result;
57+
}
58+
59+
bool is_java_array_type_name(const std::string &datatype)
60+
{
61+
return has_prefix(datatype, "java::array[") && *datatype.rbegin()==']';
62+
}
63+
64+
bool does_instrumentation_of_type_require_subclass(
65+
const std::string &datatype, const typet &type)
66+
{
67+
return is_primitive_type(type) ||
68+
is_java_array_type_name(datatype) ||
69+
datatype=="java::java.lang.Object";
70+
}
71+
72+
static typet get_return_type_from_function_name(
73+
const std::string &fname, const symbol_tablet &symbol_table)
74+
{
75+
assert(symbol_table.has_symbol(fname+RETURN_VALUE_SUFFIX));
76+
return symbol_table.lookup(fname+RETURN_VALUE_SUFFIX).type;
77+
}
78+
2879
/*******************************************************************\
2980
3081
Function: perform_BFS
@@ -72,6 +123,7 @@ static void perform_BFS(
72123

73124
taint_instrumentation_propst::taint_instrumentation_propst(
74125
const taint_propagation_chainst &chains,
126+
const taint_programt &program,
75127
const taint_function_idt &_root,
76128
const std::set<taint_function_idt> &in_functions,
77129
const std::set<taint_function_idt> &in_suppressed)
@@ -107,14 +159,96 @@ taint_instrumentation_propst::taint_instrumentation_propst(
107159
sinks.insert(location_props.size());
108160
location_props.push_back(chains.get_nodes().at(nid));
109161
}
162+
163+
build_map_from_typenames_to_tokennames(chains, program);
164+
}
165+
166+
167+
/*******************************************************************\
168+
169+
Function: build_map_from_typenames_to_tokennames
170+
171+
Inputs:
172+
173+
Outputs:
174+
175+
Purpose:
176+
The function fills in the member map "datatypes" so that for each
177+
data type (identified by its type name, see "unwrap_type_name") to be
178+
instrumented by a shadow variable there is computed an instance of
179+
"taint_instrumentation_propst::datatype_infot" type holding details
180+
about the instrumentation of the shadow variable into that type.
181+
182+
\*******************************************************************/
183+
void taint_instrumentation_propst::build_map_from_typenames_to_tokennames(
184+
const taint_propagation_chainst &chains,
185+
const taint_programt &program)
186+
{
187+
for(const auto &loc : get_location_props())
188+
{
189+
const goto_programt::instructiont &I=*loc.get_instruction_id();
190+
assert(I.type==FUNCTION_CALL);
191+
const code_function_callt &fn_call = to_code_function_call(I.code);
192+
const exprt &callee_expr = fn_call.function();
193+
assert(callee_expr.id()==ID_symbol);
194+
irep_idt callee_id = to_symbol_expr(callee_expr).get_identifier();
195+
const std::string callee_ident = as_string(callee_id);
196+
const code_typet &fn_type =
197+
program.get_functions().function_map.at(callee_id).type;
198+
199+
std::set<argidx_and_tokennamet> to_process;
200+
for(const auto &elem : loc.get_assumption())
201+
to_process.insert(elem);
202+
for(const auto &elem : loc.get_turn_on())
203+
to_process.insert(elem);
204+
for(const auto &elem : loc.get_turn_off())
205+
to_process.insert(elem);
206+
207+
for(const auto& arg_token : to_process)
208+
{
209+
assert(
210+
arg_token.get_argidx()!=
211+
taint_tokens_propagation_grapht::get_void_loc());
212+
std::string datatype;
213+
typet type;
214+
if(arg_token.get_argidx()==-1) // Return value?
215+
type=get_return_type_from_function_name(
216+
callee_ident,
217+
program.get_symbol_table());
218+
else
219+
type=fn_type.parameters().at(arg_token.get_argidx()).type();
220+
datatype=unwrap_type_name(type, program.get_namespace());
221+
auto it=datatypes.find(datatype);
222+
if(it!=datatypes.end())
223+
{
224+
assert(
225+
does_instrumentation_of_type_require_subclass(datatype, type)==
226+
it->second.subclass_required() &&
227+
is_primitive_type(type)==it->second.is_primitive());
228+
it->second.add_token(arg_token.get_token_name());
229+
}
230+
else
231+
{
232+
datatypes.insert(
233+
{
234+
datatype,
235+
{
236+
datatype,
237+
type,
238+
does_instrumentation_of_type_require_subclass(datatype, type),
239+
is_primitive_type(type),
240+
{ arg_token.get_token_name() }
241+
}
242+
});
243+
}
244+
}
245+
}
110246
}
111247

248+
112249
void taint_build_instrumentation_props(
113250
const taint_propagation_chainst &chains,
114-
const taint_tokens_propagation_grapht &tokens_propagation_graph,
115-
const goto_functionst &program_functions,
116-
const call_grapht &call_graph,
117-
const call_grapht &inverted_call_graph,
251+
const taint_programt &program,
118252
std::vector<taint_instrumentation_propst> &output)
119253
{
120254
// First we collect all functions mentioned in the graph of chains.
@@ -137,7 +271,8 @@ void taint_build_instrumentation_props(
137271
// all of them in the call graph; a nearest common caller has no
138272
// callees which are themselves common callers.
139273
std::set<irep_idt> roots;
140-
find_nearest_common_callees(inverted_call_graph, functions, roots);
274+
find_nearest_common_callees(
275+
program.get_inverted_call_graph(), functions, roots);
141276

142277
for(const auto &root : roots)
143278
{
@@ -150,7 +285,8 @@ void taint_build_instrumentation_props(
150285
std::set<taint_function_idt> suppressed;
151286
{
152287
// First we called all callees including those which should be suppressed.
153-
find_direct_or_indirect_callees_of_function(call_graph, root, callees);
288+
find_direct_or_indirect_callees_of_function(
289+
program.get_call_graph(), root, callees);
154290
// Now we compute suppressed functions and erase them from the callees
155291
// computed above. We do so in 3 steps.
156292
// Step 1: We collect functions which definitelly should be suppressed.
@@ -165,8 +301,8 @@ void taint_build_instrumentation_props(
165301
const std::string full_function_name=as_string(to_symbol_expr(
166302
to_code_function_call(I.code).function()).get_identifier());
167303
if(callees.count(full_function_name)!=0UL &&
168-
program_functions.function_map.at(full_function_name)
169-
.body_available())
304+
program.get_functions().function_map.at(full_function_name)
305+
.body_available())
170306
{
171307
suppressed.insert(full_function_name);
172308
}
@@ -178,7 +314,7 @@ void taint_build_instrumentation_props(
178314
std::unordered_set<irep_idt, dstring_hash> suppressions;
179315
for(const auto &fn : suppressed)
180316
find_direct_or_indirect_callees_of_function(
181-
call_graph, fn, suppressions);
317+
program.get_call_graph(), fn, suppressions);
182318
// Step 3: We copy from "suppressions" to "suppressed" each function
183319
// reachable from the root without passing through any function
184320
// collected in the step 1.
@@ -187,7 +323,8 @@ void taint_build_instrumentation_props(
187323
std::unordered_set<irep_idt, dstring_hash> ignored_functions(
188324
suppressed.cbegin(), suppressed.cend());
189325
if(!exists_direct_or_indirect_call(
190-
call_graph, root, *suppressions.cbegin(), ignored_functions))
326+
program.get_call_graph(), root, *suppressions.cbegin(),
327+
ignored_functions))
191328
{
192329
suppressed.insert(as_string(*suppressions.cbegin()));
193330
callees.erase(*suppressions.cbegin());
@@ -202,13 +339,14 @@ void taint_build_instrumentation_props(
202339
// table.)
203340
std::set<taint_function_idt> available_functions;
204341
for(const auto &fn : callees)
205-
if(program_functions.function_map.at(fn).body_available())
342+
if(program.get_functions().function_map.at(fn).body_available())
206343
available_functions.insert(as_string(fn));
207344
// We are ready to create the instrumentation props for the root function.
208345
// This may fail, if the set of the corresponding chains is empty.
209346
const taint_instrumentation_propst props
210347
{
211348
chains,
349+
program,
212350
as_string(root),
213351
available_functions,
214352
suppressed
@@ -220,6 +358,24 @@ void taint_build_instrumentation_props(
220358

221359
void dump_as_json(const taint_instrumentation_propst &props, json_objectt &out)
222360
{
361+
{
362+
json_arrayt out_types;
363+
for(const auto &elem : props.get_datatypes())
364+
{
365+
json_arrayt out_tokens;
366+
for(const auto &name : elem.second.get_tokens())
367+
out_tokens.push_back(json_stringt(msgstream() << name));
368+
json_objectt out_type_props;
369+
out_type_props["type_name"]=json_stringt(elem.first);
370+
out_type_props["shadow_vars"]=out_tokens;
371+
out_type_props["make_subclass"]=jsont::json_boolean(
372+
elem.second.subclass_required());
373+
out_type_props["is_primitive"]=jsont::json_boolean(
374+
elem.second.is_primitive());
375+
out_types.push_back(out_type_props);
376+
}
377+
out["datatypes"]=out_types;
378+
}
223379
{
224380
json_arrayt out_locations;
225381
for(const auto &loc : props.get_location_props())

0 commit comments

Comments
 (0)