Skip to content

Commit dc0ce77

Browse files
authored
Merge pull request diffblue#465 from diffblue/smowton/feature/csvsa-efficiency-improvement
SEC-495 CSVSA: don't store all domains or all values
2 parents 21a1954 + 746c580 commit dc0ce77

File tree

8 files changed

+214
-29
lines changed

8 files changed

+214
-29
lines changed

driver/analyser.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,8 @@ def run_security_analyser(
5656
data_flow_insensitive_instrumentation,
5757
results_dir,
5858
temp_root_dir,
59-
lazy_methods_context_sensitive
59+
lazy_methods_context_sensitive,
60+
verify_csvsa_sparse_domains
6061
):
6162
prof = {}
6263
prof_start_time = time.time()
@@ -80,7 +81,8 @@ def run_security_analyser(
8081
"output-dir": "./",
8182
"temp-dir": temp_root_dir,
8283
"data-flow-insensitive-instrumentation": data_flow_insensitive_instrumentation,
83-
"lazy-methods-context-sensitive": lazy_methods_context_sensitive
84+
"lazy-methods-context-sensitive": lazy_methods_context_sensitive,
85+
"verify-csvsa-sparse-domains": verify_csvsa_sparse_domains
8486
}
8587
with open(root_config_json_fname, "w") as root_config_json_file:
8688
root_config_json_file.write(json.dumps(root_config_json, sort_keys=True, indent=4))

driver/run.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,8 @@ def __parse_cmd_line():
150150
"paths.")
151151
parser.add_argument("--lazy-methods-context-sensitive", action="store_true",
152152
help="Use context-sensitive method loading in the first stage")
153+
parser.add_argument("--verify-csvsa-sparse-domains", action="store_true",
154+
help="Verify CSVSA's sparse domains optimisation does not alter its output")
153155
return parser.parse_args()
154156

155157

@@ -200,7 +202,8 @@ def evaluate(cmdline, common_libraries):
200202
cmdline.data_flow_insensitive_instrumentation,
201203
cmdline.results_dir,
202204
os.path.abspath(os.path.join(cmdline.temp_dir,"GOTO-ANALYSER-TEMP")),
203-
cmdline.lazy_methods_context_sensitive
205+
cmdline.lazy_methods_context_sensitive,
206+
cmdline.verify_csvsa_sparse_domains
204207
)
205208

206209
instrumented_programs_json_path = \

regression/end_to_end/driver.py

+3
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,9 @@ def run_security_analyser_pipeline(
123123

124124
if "SECURITY_REGRESSION_TESTS_USE_CSVSA" in os.environ and os.environ["SECURITY_REGRESSION_TESTS_USE_CSVSA"] != "0":
125125
cmdline.append("--lazy-methods-context-sensitive")
126+
# Check that CSVSA's behaviour with and without sparse domains
127+
# optimisation matches:
128+
cmdline.append("--verify-csvsa-sparse-domains")
126129

127130
pretty_cmdline = " ".join(quote_pathnames_in_command_line(cmdline))
128131

src/driver/sec_driver_parse_options.cpp

+88
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,72 @@ static irep_idt get_cprover_start_main_callee(const goto_modelt &goto_model)
135135
.get_identifier();
136136
}
137137

138+
class csvsa_graph_mismatcht : public std::logic_error
139+
{
140+
public:
141+
explicit csvsa_graph_mismatcht(const std::string &s) : logic_error(s)
142+
{
143+
}
144+
};
145+
146+
static void check_csvsa_graphs_equal(
147+
const context_sensitive_value_set_analysis_drivert &analysis_a,
148+
const context_sensitive_value_set_analysis_drivert &analysis_b)
149+
{
150+
const auto &graph_a = analysis_a.get_context_graph();
151+
const auto &graph_b = analysis_b.get_context_graph();
152+
153+
if(graph_a.size() != graph_b.size())
154+
{
155+
throw csvsa_graph_mismatcht(
156+
"Different function context count: " + std::to_string(graph_a.size()) +
157+
" vs. " + std::to_string(graph_b.size()));
158+
}
159+
160+
for(std::size_t node_idx = 0; node_idx < graph_a.size(); ++node_idx)
161+
{
162+
const csvsa_context_graph_nodet &node_a = graph_a[node_idx];
163+
const csvsa_context_graph_nodet &node_b = graph_b[node_idx];
164+
const irep_idt &function_a = node_a.context->function();
165+
const irep_idt &function_b = node_b.context->function();
166+
167+
if(function_a != function_b)
168+
{
169+
throw csvsa_graph_mismatcht(
170+
"Node " + std::to_string(node_idx) + " refers to functions " +
171+
id2string(function_a) + " vs. " + id2string(function_b));
172+
}
173+
174+
if(node_a.out.size() != node_b.out.size())
175+
{
176+
throw csvsa_graph_mismatcht(
177+
"Node " + std::to_string(node_idx) + " has " +
178+
std::to_string(node_a.out.size()) + " vs. " +
179+
std::to_string(node_b.out.size()) + " successors");
180+
}
181+
182+
for(auto out_iter_a = node_a.out.begin(), out_iter_b = node_b.out.begin();
183+
out_iter_a != node_a.out.end();
184+
++out_iter_a, ++out_iter_b)
185+
{
186+
if(out_iter_a->first != out_iter_b->first)
187+
{
188+
throw csvsa_graph_mismatcht(
189+
"Node " + std::to_string(node_idx) + " has differing children: " +
190+
"at least node " + std::to_string(out_iter_a->first) + " vs. " +
191+
std::to_string(out_iter_b->first));
192+
}
193+
194+
if(out_iter_a->second.is_recursive() != out_iter_b->second.is_recursive())
195+
{
196+
throw csvsa_graph_mismatcht(
197+
"Node " + std::to_string(node_idx) + " has an outgoing edge that "
198+
"disagrees regarding whether the edge is recursive");
199+
}
200+
}
201+
}
202+
}
203+
138204
/// invoke main modules
139205
int sec_driver_parse_optionst::doit()
140206
{
@@ -189,6 +255,25 @@ int sec_driver_parse_optionst::doit()
189255

190256
return CPROVER_EXIT_SUCCESS;
191257
}
258+
259+
if(cmdline.isset("verify-csvsa-sparse-domains"))
260+
{
261+
auto csvsa_without_sparse_domains =
262+
csvsa_load_and_analyze_functions_without_sparse_domains(
263+
lazy_goto_model, get_message_handler());
264+
265+
try
266+
{
267+
check_csvsa_graphs_equal(
268+
*csvsa_analysis, *csvsa_without_sparse_domains);
269+
}
270+
catch(const csvsa_graph_mismatcht &difference)
271+
{
272+
error() << "CSVSA results with and without sparse domains mismatch: "
273+
<< difference.what() << messaget::eom;
274+
return CPROVER_EXIT_INTERNAL_ERROR;
275+
}
276+
}
192277
}
193278
else
194279
lazy_goto_model.load_all_functions();
@@ -630,6 +715,9 @@ void sec_driver_parse_optionst::help()
630715
" their virtual callsites\n"
631716
" --show-csvsa-value-sets print CSVSA's value sets (possible pointer\n" // NOLINT (*)
632717
" targets) and exit.\n"
718+
" --verify-csvsa-sparse-domains after CSVSA completes, re-run it with\n"
719+
" sparse domains disabled and verify the\n"
720+
" results are the same.\n"
633721
"\n"
634722
"Java Bytecode frontend options:\n"
635723
" --classpath dir/jar set the classpath\n"

src/driver/sec_driver_parse_options.h

+1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ class optionst;
4949
"(rebuild-taint-cache)" \
5050
"(lazy-methods-context-sensitive)" \
5151
"(show-csvsa-value-sets)" \
52+
"(verify-csvsa-sparse-domains)" \
5253
"(java-assume-inputs-non-null)" \
5354
"(java-max-input-array-length):" \
5455
"(string-max-input-length):" \

src/pointer-analysis/context_sensitive_value_set_analysis.cpp

+36-12
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,13 @@ void csvsa_function_contextt::csvsa_visit(locationt l)
6262
{
6363
// Function returns are special-- if anything changed,
6464
// queue all of this context's callsites' successors:
65-
const auto &end_state = (*this)[l];
66-
std::unique_ptr<const value_set_domaint> end_state_without_locals;
65+
auto &end_state = (*this)[l];
66+
end_state.set_seen();
67+
68+
std::unique_ptr<const domaint> end_state_without_locals;
6769
for(const auto &caller : callers)
6870
{
69-
const value_set_domaint *export_state;
71+
const domaint *export_state;
7072
// Remove locals only on an edge that leaves a recursive part
7173
// of the program:
7274
if(recursive_status == RECURSIVE_CHILD)
@@ -120,7 +122,7 @@ void csvsa_function_contextt::csvsa_visit(locationt l)
120122

121123
void csvsa_function_contextt::merge_call_return_state(
122124
locationt call_loc,
123-
const value_set_domaint &callee_state)
125+
const domaint &callee_state)
124126
{
125127
value_sett callee_values = callee_state.value_set;
126128

@@ -203,9 +205,12 @@ bool csvsa_function_contextt::assign_formal_parameters(
203205
if(param_id.empty())
204206
continue;
205207
auto findit = entry_state_vs.values.find(param_id);
208+
bool should_track_parameter = csvsa_should_track_value(param);
206209
INVARIANT(
207-
findit != entry_state_vs.values.end(),
208-
"all parameters should have been assigned to by now");
210+
(findit != entry_state_vs.values.end()) == should_track_parameter,
211+
"(only) pointer-typed parameters should have been assigned to by now");
212+
if(!should_track_parameter)
213+
continue;
209214
if(findit->second.object_map.read().size() != *sizes_before_iter)
210215
return true;
211216
++sizes_before_iter;
@@ -266,14 +271,14 @@ void csvsa_function_contextt::read_function_arguments(
266271
}
267272
}
268273

269-
std::unique_ptr<const value_set_domaint>
270-
csvsa_function_contextt::cleanup_locals(const value_set_domaint &last_state)
274+
std::unique_ptr<const csvsa_function_contextt::domaint>
275+
csvsa_function_contextt::cleanup_locals(const domaint &last_state)
271276
{
272277
// Remove local variables, formal parameters etc from this function's
273278
// final state.
274279
// Shouldn't do this if this is a recursive function, as our locals
275280
// will inherit values via a (mutually) recursive call:
276-
std::unique_ptr<value_set_domaint> tmp_state(static_cast<value_set_domaint *>(
281+
std::unique_ptr<domaint> tmp_state(static_cast<domaint *>(
277282
make_temporary_state(last_state).release()));
278283
std::vector<irep_idt> to_erase;
279284
for(const auto &kv : last_state.value_set.values)
@@ -689,10 +694,11 @@ void csvsa_function_contextt::print_tree(
689694
}
690695
}
691696

692-
std::unique_ptr<context_sensitive_value_set_analysis_drivert>
697+
static std::unique_ptr<context_sensitive_value_set_analysis_drivert>
693698
csvsa_load_and_analyze_functions(
694699
lazy_goto_modelt &lazy_model,
695-
message_handlert &msg)
700+
message_handlert &msg,
701+
bool use_sparse_domains)
696702
{
697703
get_goto_programt get_function_from_lazy_model =
698704
[&lazy_model](const irep_idt &name) -> const goto_programt & {
@@ -701,14 +707,32 @@ csvsa_load_and_analyze_functions(
701707

702708
std::unique_ptr<context_sensitive_value_set_analysis_drivert> driver =
703709
util_make_unique<context_sensitive_value_set_analysis_drivert>(
704-
lazy_model.symbol_table, get_function_from_lazy_model);
710+
lazy_model.symbol_table,
711+
get_function_from_lazy_model,
712+
use_sparse_domains);
705713
driver->set_message_handler(msg);
706714

707715
(*driver)(goto_functionst::entry_point());
708716

709717
return driver;
710718
}
711719

720+
std::unique_ptr<context_sensitive_value_set_analysis_drivert>
721+
csvsa_load_and_analyze_functions(
722+
lazy_goto_modelt &lazy_model,
723+
message_handlert &msg)
724+
{
725+
return csvsa_load_and_analyze_functions(lazy_model, msg, true);
726+
}
727+
728+
std::unique_ptr<context_sensitive_value_set_analysis_drivert>
729+
csvsa_load_and_analyze_functions_without_sparse_domains(
730+
lazy_goto_modelt &lazy_model,
731+
message_handlert &msg)
732+
{
733+
return csvsa_load_and_analyze_functions(lazy_model, msg, false);
734+
}
735+
712736
bool context_sensitive_value_set_analysis_drivert
713737
::compare_working_set_entriest::operator()(
714738
const working_set_entryt &a, const working_set_entryt &b)

0 commit comments

Comments
 (0)