Skip to content

Commit e7e43da

Browse files
authored
Merge pull request diffblue#538 from diffblue/smowton/cleanup/goto-binary-save-routine
Cleanup: separate GOTO binary postprocessing from saving
2 parents 1cd1eeb + 01a59f9 commit e7e43da

7 files changed

+411
-500
lines changed

src/taint-analysis/taint_security_scanner.cpp

+1-3
Original file line numberDiff line numberDiff line change
@@ -180,9 +180,7 @@ bool taint_do_security_scan(
180180
"INSTRUMENTER")
181181
.native(),
182182
statistics,
183-
logger,
184-
config.is_html_dump_of_instrumented_program_enabled(),
185-
config.is_string_refinement_enabled());
183+
logger);
186184
instrumentation_driver.run(
187185
config.use_data_flow_insensitive_instrumentation());
188186

src/taint-instrumenter/instrumentation_driver.cpp

+123-31
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,13 @@
66

77
#include <algorithm>
88
#include <fstream>
9+
10+
#include <goto-programs/goto_statistics.h>
11+
#include <goto-programs/write_goto_binary.h>
912
#include <summaries/summary_dump.h>
1013
#include <taint-instrumenter/instrumentation_driver.h>
1114
#include <taint-instrumenter/instrumentation_props.h>
12-
#include <taint-instrumenter/instrumented_goto_binary_builder.h>
15+
#include <taint-instrumenter/postprocess_instrumented_goto_model.h>
1316
#include <taint-instrumenter/instrumenter.h>
1417
#include <taint-instrumenter/search_for_rule_applications.h>
1518
#include <util/file_util.h>
@@ -24,24 +27,117 @@ taint_instrumentation_drivert::taint_instrumentation_drivert(
2427
const boost::filesystem::path &results_dir,
2528
const boost::filesystem::path &temp_dir,
2629
taint_statisticst &statistics,
27-
messaget &logger,
28-
const bool perform_html_dump,
29-
const bool string_refinement_enabled)
30+
messaget &logger)
3031
: program(program),
3132
summaries(summaries),
3233
tokens_propagation_graph(tokens_propagation_graph),
3334
// , timeout(timeout)
3435
results_dir(results_dir),
3536
temp_dir(temp_dir),
3637
statistics(statistics),
37-
logger(logger),
38-
perform_html_dump(perform_html_dump),
39-
string_refinement_enabled(string_refinement_enabled)
38+
logger(logger)
4039
{
4140
boost::filesystem::create_directory(this->results_dir);
4241
boost::filesystem::create_directory(this->temp_dir);
4342
}
4443

44+
static void save_goto_binary(
45+
goto_modelt &instrumented_goto_model,
46+
const boost::filesystem::path &binary_path,
47+
messaget &logger)
48+
{
49+
logger.status() << " Saving the goto model as goto binary to disk."
50+
<< messaget::eom;
51+
52+
// Finally save the constructed goto_model on the disc as GOTO binary.
53+
const bool fail = write_goto_binary(
54+
binary_path.native(), instrumented_goto_model, logger.get_message_handler());
55+
INVARIANT(!fail, "write_goto_binary should always succeed");
56+
57+
logger.status() << " Saving statistics of the saved the goto program."
58+
<< messaget::eom;
59+
60+
// We compute and save basic statistics of the saved
61+
// GOTO program (i.e. the GOTO binary). These statistics will later
62+
// be used by the Python driver script to compare them with similar
63+
// statistics from the full slicer (implemented in goto-instrument)
64+
// in order to get the efficiency of the slicing.
65+
66+
auto out_json_file_pathname = binary_path;
67+
out_json_file_pathname.replace_extension("stats.json");
68+
69+
// We start with safety check related to the file to be written.
70+
bool can_write_stats = true;
71+
if(boost::filesystem::is_directory(out_json_file_pathname))
72+
{
73+
logger.error()
74+
<< "The path-name '" << out_json_file_pathname.native()
75+
<< "' for the statistics JSON output is actually an existing "
76+
<< "directory." << messaget::eom;
77+
can_write_stats = false;
78+
}
79+
if(can_write_stats)
80+
{
81+
// And let's compute and save the statistics.
82+
goto_statisticst stats;
83+
stats.extend(instrumented_goto_model);
84+
std::ofstream ofile(out_json_file_pathname.native());
85+
if(!ofile)
86+
{
87+
logger.error()
88+
<< "Failed to open the JSON file '"
89+
<< out_json_file_pathname.native()
90+
<< "' for writing. The statistic of the GOTO program won't be "
91+
<< "available." << messaget::eom;
92+
}
93+
else
94+
ofile << to_json(stats);
95+
}
96+
}
97+
98+
static void save_goto_binary_description(
99+
const taint_instrumentation_propst &props,
100+
const boost::filesystem::path &output_path,
101+
const boost::filesystem::path &goto_binary_path)
102+
{
103+
// Now we collect sink locations. This is necessary in subsequent phases
104+
// of the whole analysis. Namely, for recognition which error traces should
105+
// be considered and which discarded.
106+
107+
json_objectt binary_description;
108+
binary_description["goto_binary_file"] =
109+
json_stringt(goto_binary_path.native());
110+
111+
json_arrayt jsinks;
112+
typedef std::tuple<irep_idt, irep_idt, irep_idt> sink_loct;
113+
std::set<sink_loct> seen_sinks;
114+
115+
for(const auto loc_id : props.get_sinks())
116+
{
117+
const auto &loc = props.get_location_props()
118+
.at(loc_id)
119+
.get_instruction_id()
120+
->source_location;
121+
122+
if(seen_sinks.insert(
123+
sink_loct{loc.get_file(), loc.get_function(), loc.get_line()}).second)
124+
{
125+
json_objectt jloc;
126+
jloc["file"] = json_stringt(id2string(loc.get_file()));
127+
jloc["function"] = json_stringt(id2string(loc.get_function()));
128+
jloc["line"] = json_numbert(id2string(loc.get_line()));
129+
130+
jsinks.push_back(jloc);
131+
}
132+
}
133+
134+
binary_description["sinks"] = jsinks;
135+
136+
std::ofstream ostr(output_path.native());
137+
ostr << binary_description;
138+
}
139+
140+
45141
void taint_instrumentation_drivert::run(
46142
const bool use_data_flow_insensitive_instrumentation)
47143
{
@@ -108,33 +204,29 @@ void taint_instrumentation_drivert::run(
108204
statistics.end_instrumenter();
109205
statistics.begin_instrumented_goto_binary_build();
110206

111-
logger.status() << " Starting the builder of the instrumented GOTO binary."
207+
logger.status() << " Post-processing the instrumented model"
112208
<< messaget::eom;
113209

114-
const std::pair<taint_instrumented_goto_binary_infot, std::string>
115-
task_valid = build_instrumented_goto_binary(
116-
instrumentation_props,
117-
instrumenter,
118-
results_dir,
119-
instrumented_goto_model,
120-
program,
121-
statistics,
122-
logger,
123-
perform_html_dump,
124-
string_refinement_enabled);
125-
if(task_valid.second.empty())
126-
{
127-
logger.status() << " Saving info JSON file about the saved GOTO binary."
128-
<< messaget::eom;
210+
postprocess_instrumented_goto_model(
211+
instrumentation_props,
212+
instrumenter,
213+
instrumented_goto_model,
214+
program,
215+
logger);
129216

130-
json_objectt jtask;
131-
dump_as_json(task_valid.first, jtask);
132-
std::ofstream ostr(
133-
(results_dir / "instrumented_goto_program.json").native());
134-
ostr << jtask;
135-
}
136-
else
137-
logger.status() << task_valid.second << messaget::eom;
217+
logger.status() << " Saving the instrumented model"
218+
<< messaget::eom;
219+
220+
const auto binary_path = results_dir / "instrumented_goto_program.gbf";
221+
save_goto_binary(instrumented_goto_model, binary_path, logger);
222+
223+
logger.status() << " Saving info JSON file about the saved GOTO binary."
224+
<< messaget::eom;
225+
226+
const auto binary_description_path =
227+
results_dir / "instrumented_goto_program.json";
228+
save_goto_binary_description(
229+
instrumentation_props, binary_description_path, binary_path);
138230

139231
statistics.end_instrumented_goto_binary_build();
140232
}

src/taint-instrumenter/instrumentation_driver.h

+1-5
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,7 @@ class taint_instrumentation_drivert
2929
const boost::filesystem::path &results_dir,
3030
const boost::filesystem::path &temp_dir,
3131
taint_statisticst &statistics,
32-
messaget &logger,
33-
const bool perform_html_dump,
34-
const bool string_refinement_enabled);
32+
messaget &logger);
3533

3634
/// The method computes an instrumented GOTO program (referenced in the member
3735
/// `program`) The original GOTO program is NOT modified.
@@ -60,8 +58,6 @@ class taint_instrumentation_drivert
6058
boost::filesystem::path temp_dir;
6159
taint_statisticst &statistics;
6260
messaget &logger;
63-
bool perform_html_dump;
64-
bool string_refinement_enabled;
6561
};
6662

6763
#endif

0 commit comments

Comments
 (0)