Skip to content

Commit cedd7c8

Browse files
authored
Merge pull request diffblue#179 from diffblue/feature/refined_separation_of_instrumentation_pipelines
SEC-45: Separating instrumentation pipelines sooner - in instrumentation_props.
2 parents e7d0feb + 7a92a1b commit cedd7c8

File tree

6 files changed

+116
-71
lines changed

6 files changed

+116
-71
lines changed

security-scanner/presentation.py

Lines changed: 46 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -149,28 +149,38 @@ def build_HTML_interface_to_slicer_instrumentation_props(props,fname):
149149
ofile.write("<h2>Definition of data types for instrumentation</h2>\n")
150150

151151
ofile.write("<p>\n")
152-
ofile.write("This is a list of program types which will be instrumented by shadow variables\n")
153-
ofile.write("identified here by the corresponding names of tokens.\n")
152+
if props["data_flow_insensitive_instrumentation_applied"]:
153+
ofile.write("Control-flow (but not data-flow) sensitive propagation of taint information was used.\n")
154+
else:
155+
ofile.write("Control- and data-flow sensitive propagation of taint information was used.\n")
154156
ofile.write("</p>\n")
155157

156-
ofile.write("<table>\n")
157-
ofile.write(" <tr>\n")
158-
ofile.write(" <th>Type name</th>\n")
159-
ofile.write(" <th>Shadow variables</th>\n")
160-
ofile.write(" <th>Create sub-class?</th>\n")
161-
ofile.write(" </tr>\n")
162-
for dtype in props["datatypes"]:
158+
if not props["data_flow_insensitive_instrumentation_applied"]:
159+
ofile.write("<h2>Definition of data types for instrumentation</h2>\n")
160+
161+
ofile.write("<p>\n")
162+
ofile.write("This is a list of program types which will be instrumented by shadow variables\n")
163+
ofile.write("identified here by the corresponding names of tokens.\n")
164+
ofile.write("</p>\n")
165+
166+
ofile.write("<table>\n")
163167
ofile.write(" <tr>\n")
164-
ofile.write(" <td>" + escape_text_to_HTML(dtype["type_name"]) + "</td>\n")
165-
ofile.write(" <td>\n")
166-
ofile.write(" <ul>\n")
167-
for var in dtype["shadow_vars"]:
168-
ofile.write(" <li>" + escape_text_to_HTML(var) + "</li>\n")
169-
ofile.write(" </ul>\n")
170-
ofile.write(" </td>\n")
171-
ofile.write(" <td align=\"center\">" + str(dtype["make_subclass"]) + "</td>\n")
168+
ofile.write(" <th>Type name</th>\n")
169+
ofile.write(" <th>Shadow variables</th>\n")
170+
ofile.write(" <th>Create sub-class?</th>\n")
172171
ofile.write(" </tr>\n")
173-
ofile.write("</table>\n")
172+
for dtype in props["datatypes"]:
173+
ofile.write(" <tr>\n")
174+
ofile.write(" <td>" + escape_text_to_HTML(dtype["type_name"]) + "</td>\n")
175+
ofile.write(" <td>\n")
176+
ofile.write(" <ul>\n")
177+
for var in dtype["shadow_vars"]:
178+
ofile.write(" <li>" + escape_text_to_HTML(var) + "</li>\n")
179+
ofile.write(" </ul>\n")
180+
ofile.write(" </td>\n")
181+
ofile.write(" <td align=\"center\">" + str(dtype["make_subclass"]) + "</td>\n")
182+
ofile.write(" </tr>\n")
183+
ofile.write("</table>\n")
174184

175185
ofile.write("<h2>Definition of instrumentation statements</h2>\n")
176186

@@ -468,7 +478,11 @@ def build_HTML_interface_to_results_and_statistics(
468478
ofile.write(" <td>" + str(cmdline.timeout) + "</td>\n")
469479
ofile.write(" </tr>\n")
470480
ofile.write(" <tr>\n")
471-
ofile.write(" <td>Verbosity level fors logging</td>\n")
481+
ofile.write(" <td>Use data-flow insensitive propagation of taint information?</td>\n")
482+
ofile.write(" <td>" + str(cmdline.data_flow_insensitive_instrumentation) + "</td>\n")
483+
ofile.write(" </tr>\n")
484+
ofile.write(" <tr>\n")
485+
ofile.write(" <td>Verbosity level for logging</td>\n")
472486
ofile.write(" <td>" + str(cmdline.verbosity) + "</td>\n")
473487
ofile.write(" </tr>\n")
474488
ofile.write(" <tr>\n")
@@ -580,6 +594,19 @@ def build_HTML_interface_to_results_and_statistics(
580594
ofile.write(" </tr>\n")
581595
ofile.write("</table>\n")
582596

597+
if cmdline.dump_html_program:
598+
original_program_root_html_filename = os.path.join(cmdline.results_dir,"goto-program","HTML","index.html")
599+
ofile.write("<p>\n")
600+
if os.path.isfile(original_program_root_html_filename):
601+
ofile.write("The listing of the original GOTO program can be found "
602+
"<a href=\"" + os.path.relpath(original_program_root_html_filename, cmdline.results_dir) +
603+
"\">here</a>.\n")
604+
else:
605+
ofile.write("ERROR: Cannot find root HTML file of the saved original GOTO program '\n")
606+
ofile.write(original_program_root_html_filename)
607+
ofile.write("'.\n")
608+
ofile.write("</p>\n")
609+
583610

584611
#######################################################################################################
585612
# PHASE 2

src/taint-slicer/instrumentation_props.cpp

Lines changed: 53 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -107,10 +107,13 @@ taint_instrumentation_propst::taint_instrumentation_propst(
107107
const taint_programt &program,
108108
const taint_function_idt &_root,
109109
const std::set<taint_function_idt> &in_functions,
110-
const std::set<taint_function_idt> &in_suppressed)
110+
const std::set<taint_function_idt> &in_suppressed,
111+
const bool use_data_flow_insensitive_instrumentation)
111112
: root(_root)
112113
, functions(in_functions)
113114
, suppressed(in_suppressed)
115+
, use_data_flow_insensitive_version(
116+
use_data_flow_insensitive_instrumentation)
114117
{
115118
// First we compute "valid" nodes of the propagation chaint w.r.t. the root
116119
// function. We find these nodes by 2 BFSs: one from sources and one from
@@ -141,7 +144,8 @@ taint_instrumentation_propst::taint_instrumentation_propst(
141144
location_props.push_back(chains.get_nodes().at(nid));
142145
}
143146

144-
build_map_from_typenames_to_tokennames(chains, program);
147+
if(!use_data_flow_insensitive_instrumentation)
148+
build_map_from_typenames_to_tokennames(chains, program);
145149
}
146150

147151

@@ -219,7 +223,8 @@ void taint_instrumentation_propst::build_map_from_typenames_to_tokennames(
219223
void taint_build_instrumentation_props(
220224
const taint_propagation_chainst &chains,
221225
const taint_programt &program,
222-
std::vector<taint_instrumentation_propst> &output)
226+
std::vector<taint_instrumentation_propst> &output,
227+
const bool use_data_flow_insensitive_instrumentation)
223228
{
224229
// First we collect all functions mentioned in the graph of chains.
225230
std::set<irep_idt> functions;
@@ -257,49 +262,54 @@ void taint_build_instrumentation_props(
257262
// First we called all callees including those which should be suppressed.
258263
find_direct_or_indirect_callees_of_function(
259264
program.get_call_graph(), root, callees);
260-
// Now we compute suppressed functions and erase them from the callees
261-
// computed above. We do so in 3 steps.
262-
// Step 1: We collect functions which definitelly should be suppressed.
263-
// I.e. those corrensponding to applications of transition rules.
264-
for(const auto &node : chains.get_nodes())
265+
266+
if(use_data_flow_insensitive_instrumentation)
265267
{
266-
if(functions.count(node.get_function_id())!=0UL)
268+
// Now we compute suppressed functions and erase them from the callees
269+
// computed above. We do so in 3 steps.
270+
// Step 1: We collect functions which definitelly should be suppressed
271+
// (those corrensponding to applications of transition rules.)
272+
for(const auto &node : chains.get_nodes())
267273
{
268-
goto_programt::instructiont const& I=*node.get_instruction_id();
269-
assert(I.type==FUNCTION_CALL);
270-
assert(to_code_function_call(I.code).function().id()==ID_symbol);
271-
const std::string full_function_name=as_string(to_symbol_expr(
272-
to_code_function_call(I.code).function()).get_identifier());
273-
if(callees.count(full_function_name)!=0UL &&
274-
program.get_functions().function_map.at(full_function_name)
275-
.body_available())
274+
if(functions.count(node.get_function_id())!=0UL)
276275
{
277-
suppressed.insert(full_function_name);
276+
goto_programt::instructiont const& I=*node.get_instruction_id();
277+
INVARIANT(I.type==FUNCTION_CALL, "");
278+
INVARIANT(to_code_function_call(I.code).function().id()==ID_symbol,
279+
"It must be a call via function identifier.");
280+
const std::string full_function_name=as_string(to_symbol_expr(
281+
to_code_function_call(I.code).function()).get_identifier());
282+
if(callees.count(full_function_name)!=0UL &&
283+
program.get_functions().function_map.at(full_function_name)
284+
.body_available())
285+
{
286+
suppressed.insert(full_function_name);
287+
}
278288
}
279289
}
280-
}
281-
// Step 2: We collect a potentially suppressed function into a temporary
282-
// collection "suppressions". These function are all those
283-
// call-graph reachable from functions collected in the step 1.
284-
std::unordered_set<irep_idt, dstring_hash> suppressions;
285-
for(const auto &fn : suppressed)
286-
find_direct_or_indirect_callees_of_function(
287-
program.get_call_graph(), fn, suppressions);
288-
// Step 3: We copy from "suppressions" to "suppressed" each function
289-
// reachable from the root without passing through any function
290-
// collected in the step 1.
291-
while(!suppressions.empty())
292-
{
293-
std::unordered_set<irep_idt, dstring_hash> ignored_functions(
294-
suppressed.cbegin(), suppressed.cend());
295-
if(!exists_direct_or_indirect_call(
296-
program.get_call_graph(), root, *suppressions.cbegin(),
297-
ignored_functions))
290+
// Step 2: We collect a potentially suppressed function into a temporary
291+
// collection "suppressions". These function are all those
292+
// call-graph reachable from functions collected in the step 1.
293+
std::unordered_set<irep_idt, dstring_hash> suppressions;
294+
for(const auto &fn : suppressed)
295+
find_direct_or_indirect_callees_of_function(
296+
program.get_call_graph(), fn, suppressions);
297+
// Step 3: We copy from "suppressions" to "suppressed" each function
298+
// reachable from the root without passing through any function
299+
// collected in the step 1.
300+
while(!suppressions.empty())
298301
{
299-
suppressed.insert(as_string(*suppressions.cbegin()));
300-
callees.erase(*suppressions.cbegin());
302+
std::unordered_set<irep_idt, dstring_hash> ignored_functions(
303+
suppressed.cbegin(), suppressed.cend());
304+
if(!exists_direct_or_indirect_call(
305+
program.get_call_graph(), root, *suppressions.cbegin(),
306+
ignored_functions))
307+
{
308+
suppressed.insert(as_string(*suppressions.cbegin()));
309+
callees.erase(*suppressions.cbegin());
310+
}
311+
suppressions.erase(suppressions.cbegin());
301312
}
302-
suppressions.erase(suppressions.cbegin());
303313
}
304314
}
305315
// We are only interested in callees with body defined (it makes no sense
@@ -319,7 +329,8 @@ void taint_build_instrumentation_props(
319329
program,
320330
as_string(root),
321331
available_functions,
322-
suppressed
332+
suppressed,
333+
use_data_flow_insensitive_instrumentation
323334
};
324335
if(!props.get_sources().empty() && !props.get_sinks().empty())
325336
output.push_back(props);
@@ -328,6 +339,8 @@ void taint_build_instrumentation_props(
328339

329340
void dump_as_json(const taint_instrumentation_propst &props, json_objectt &out)
330341
{
342+
out["data_flow_insensitive_instrumentation_applied"]=jsont::json_boolean(
343+
props.data_flow_insensitive_version_applied());
331344
{
332345
json_arrayt out_types;
333346
for(const auto &elem : props.get_datatypes())

src/taint-slicer/instrumentation_props.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,8 @@ class taint_instrumentation_propst
8888
const taint_programt &program,
8989
const taint_function_idt &_root,
9090
const std::set<taint_function_idt> &in_functions,
91-
const std::set<taint_function_idt> &in_suppressed);
91+
const std::set<taint_function_idt> &in_suppressed,
92+
const bool use_data_flow_insensitive_instrumentation);
9293

9394
const std::vector<location_propst> &get_location_props() const
9495
{ return location_props; }
@@ -103,6 +104,8 @@ class taint_instrumentation_propst
103104
{ return suppressed; }
104105
const from_typenames_to_tokennames_mapt &get_datatypes() const
105106
{ return datatypes; }
107+
bool data_flow_insensitive_version_applied() const
108+
{ return use_data_flow_insensitive_version; }
106109

107110
private:
108111
void build_map_from_typenames_to_tokennames(
@@ -116,6 +119,7 @@ class taint_instrumentation_propst
116119
std::set<taint_function_idt> functions;
117120
std::set<taint_function_idt> suppressed;
118121
from_typenames_to_tokennames_mapt datatypes;
122+
bool use_data_flow_insensitive_version;
119123
};
120124

121125
typedef taint_instrumentation_propst::datatype_infot taint_datatype_infot;
@@ -127,7 +131,8 @@ typedef taint_instrumentation_propst::datatype_infot taint_datatype_infot;
127131
void taint_build_instrumentation_props(
128132
const taint_propagation_chainst &chains,
129133
const taint_programt &program,
130-
std::vector<taint_instrumentation_propst> &output);
134+
std::vector<taint_instrumentation_propst> &output,
135+
const bool use_data_flow_insensitive_instrumentation);
131136

132137
bool is_primitive_type(const typet &type);
133138
std::string unwrap_type_name(const typet &type, const namespacet &ns);

src/taint-slicer/instrumenter.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,12 @@ static irept add_shadow_variables_to_type(
7676
taint_instrumentert::taint_instrumentert(
7777
const taint_instrumentation_propst &in_props,
7878
const taint_programt *const in_program,
79-
taint_statisticst *const in_statistics,
80-
const bool use_data_flow_insensitive_instrumentation)
79+
taint_statisticst *const in_statistics)
8180
: props(in_props)
8281
, program(in_program)
8382
, statistics(in_statistics)
84-
, use_data_flow_insensitive_version(use_data_flow_insensitive_instrumentation)
83+
, use_data_flow_insensitive_version(
84+
in_props.data_flow_insensitive_version_applied())
8585
{
8686
}
8787

src/taint-slicer/instrumenter.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,7 @@ class taint_instrumentert
4646
taint_instrumentert(
4747
const taint_instrumentation_propst &props,
4848
const taint_programt * const in_program,
49-
taint_statisticst * const in_statistics,
50-
const bool use_data_flow_insensitive_instrumentation);
49+
taint_statisticst * const in_statistics);
5150

5251
void run();
5352

@@ -60,6 +59,9 @@ class taint_instrumentert
6059
const std::map<taint_tokent::namet, automaton_variable_idt> &
6160
get_from_tokens_to_vars() const { return from_tokens_to_vars; }
6261

62+
bool data_flow_insensitive_version_applied() const
63+
{ return use_data_flow_insensitive_version; }
64+
6365
private:
6466

6567
void instrument_data_types(const taint_instrumentation_propst &props);

src/taint-slicer/slicer.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,8 @@ void taint_slicert::compute_slice(
8080
taint_build_instrumentation_props(
8181
propagation_chains,
8282
*program,
83-
instrumentation_props);
83+
instrumentation_props,
84+
use_data_flow_insensitive_instrumentation);
8485

8586
// Dump all computed instrumentation props into JSON files.
8687
for(std::size_t i=0U; i!=instrumentation_props.size(); ++i)
@@ -98,10 +99,7 @@ void taint_slicert::compute_slice(
9899
for(std::size_t i=0UL, n=instrumentation_props.size(); i!=n; ++i)
99100
{
100101
taint_instrumentert instrumenter(
101-
instrumentation_props.at(i),
102-
program,
103-
statistics,
104-
use_data_flow_insensitive_instrumentation);
102+
instrumentation_props.at(i), program, statistics);
105103

106104
instrumenter.run();
107105

0 commit comments

Comments
 (0)