Skip to content

Commit 4258f86

Browse files
authored
Merge pull request diffblue#451 from diffblue/smowton/feature/csvsa-tolerate-unknown-callsites
SEC-465: CSVSA: Improve algorithm for determining possible callees:
2 parents 5e11686 + ab15e08 commit 4258f86

17 files changed

+208
-6
lines changed
250 Bytes
Binary file not shown.
263 Bytes
Binary file not shown.
263 Bytes
Binary file not shown.
481 Bytes
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
package CSVSA.TestIncompatibleTypes;
2+
3+
public class Test {
4+
5+
public static void test(int unknown) {
6+
A a = unknown == 0 ? new B() : new C();
7+
8+
if(a instanceof C) {
9+
C c = (C)a;
10+
// CSVSA should spot that the `new B()` above is not a possible callee.
11+
c.f();
12+
}
13+
}
14+
15+
}
16+
17+
class A {
18+
19+
public void f() { }
20+
21+
}
22+
23+
class B extends A {
24+
25+
public void f() { }
26+
27+
}
28+
29+
class C extends A {
30+
31+
public void f() { }
32+
33+
}

regression/CSVSA/TestIncompatibleTypes/__init__.py

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from regression.CSVSA.csvsa_driver import CsvsaDriver
2+
3+
4+
folder_name = 'TestIncompatibleTypes'
5+
6+
7+
def test_tolerate_unreachable_functions():
8+
csvsa_driver = \
9+
CsvsaDriver(folder_name).with_test_function('test').with_show_goto_functions(False)
10+
with csvsa_driver.run() as csvsa_expectation:
11+
csvsa_expectation.check_successful_run()
12+
csvsa_expectation.check_matches("Callsite at 44 may call: java::CSVSA.TestIncompatibleTypes.C.f")
13+
csvsa_expectation.check_does_not_match("Callsite at 44 may call: java::CSVSA.TestIncompatibleTypes.B.f")
Binary file not shown.
Binary file not shown.
543 Bytes
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
package CSVSA.TestOpaqueTypePromotion;
2+
3+
public class Test {
4+
5+
public static void test() {
6+
7+
AbstractTest at = Opaque.getAbstractTest();
8+
// CSVSA should notice that the opaque object returned must have been
9+
// a ConcreteTest if the `f()` call is reachable:
10+
((ConcreteTest)at).f();
11+
12+
// In this case CSVSA won't know what the call might target, and should stub
13+
// the callsite (introduce a synthetic stub function)
14+
at.f();
15+
16+
// In this case CSVSA shouldn't guess that we actually target ConcreteTest,
17+
// as it doesn't know the parent-child relationship between the opaque type
18+
// and ConcreteTest.
19+
(new Opaque()).f();
20+
21+
}
22+
23+
}
24+
25+
abstract class AbstractTest {
26+
27+
abstract void f();
28+
29+
}
30+
31+
class ConcreteTest extends AbstractTest {
32+
33+
void f() { }
34+
35+
}
36+
37+
class Opaque extends ConcreteTest {
38+
39+
// For testing, this class is deleted, so the method
40+
// definition is irrelevant.
41+
public static AbstractTest getAbstractTest() { return new ConcreteTest(); }
42+
43+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from regression.CSVSA.csvsa_driver import CsvsaDriver
2+
3+
4+
folder_name = 'TestOpaqueTypePromotion'
5+
6+
7+
def test_opaque_type_promotion():
8+
csvsa_driver = \
9+
CsvsaDriver(folder_name).with_test_function('test').with_show_goto_functions(False)
10+
with csvsa_driver.run() as csvsa_expectation:
11+
csvsa_expectation.check_successful_run()
12+
csvsa_expectation.check_matches("Callsite at 17 may call: java::CSVSA.TestOpaqueTypePromotion.ConcreteTest.f:()V")
13+
csvsa_expectation.check_matches("Callsite at 25 (java::CSVSA.TestOpaqueTypePromotion.AbstractTest.f:()V) stubbed")
14+
csvsa_expectation.check_matches("Callsite at 43 (java::CSVSA.TestOpaqueTypePromotion.Opaque.f:()V) stubbed")

regression/CSVSA/csvsa_driver.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ def __init__(self, folder_name):
2222
self.folder_name = folder_name
2323
self.with_test_class_name('Test')
2424
self.with_test_function('f')
25+
self.show_goto_functions = True
2526

2627
def with_test_class_name(self, test_class_name):
2728
self.class_name = test_class_name
@@ -32,14 +33,21 @@ def with_test_function(self, test_function_name):
3233
self.function_name = test_function_name
3334
return self
3435

36+
def with_show_goto_functions(self, show_goto_functions):
37+
self.show_goto_functions = show_goto_functions
38+
return self
39+
3540
def run(self):
3641
"""Run CSVSA and parse the output before returning it"""
3742
fq_class_name = '.'.join(['CSVSA', self.folder_name, self.class_name])
3843
fq_function_name = fq_class_name + '.' + self.function_name
3944
executable_path = os.path.join(os.environ['SECURITY_SCANNER_HOME'], 'bin', 'security-analyzer')
4045
class_path = os.path.join('CSVSA', self.folder_name, self.class_filename)
41-
cmd = [executable_path, '--function', fq_function_name, '--lazy-methods-context-sensitive',
42-
'--show-goto-functions', class_path]
46+
cmd = [executable_path, '--function', fq_function_name, '--lazy-methods-context-sensitive', class_path]
47+
if self.show_goto_functions:
48+
cmd.append('--show-goto-functions')
49+
else:
50+
cmd.append('--show-csvsa-value-sets')
4351

4452
executable_runner = ExecutableRunner(cmd)
4553
(stdout, _, return_code) = executable_runner.run(pipe_stderr_to_stdout=True)

src/driver/sec_driver_parse_options.cpp

+13
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,14 @@ int sec_driver_parse_optionst::doit()
179179
}
180180
csvsa_analysis=
181181
csvsa_load_and_analyze_functions(lazy_goto_model, get_message_handler());
182+
183+
if(cmdline.isset("show-csvsa-value-sets"))
184+
{
185+
csvsa_analysis->get_context(0).print_tree(lazy_goto_model, status());
186+
status() << messaget::eom;
187+
188+
return CPROVER_EXIT_SUCCESS;
189+
}
182190
}
183191
else
184192
lazy_goto_model.load_all_functions();
@@ -572,6 +580,11 @@ void sec_driver_parse_optionst::help()
572580
" --xml file_name output results in XML format to given file\n"
573581
" --function set main function name\n"
574582
" --lvsa-summary-directory directory to store LVSA summaries\n"
583+
" --lazy-methods-context-sensitive use CSVSA to determine which methods to\n" // NOLINT (*)
584+
" load and specialize functions to resolve\n" // NOLINT (*)
585+
" their virtual callsites\n"
586+
" --show-csvsa-value-sets print CSVSA's value sets (possible pointer\n" // NOLINT (*)
587+
" targets) and exit.\n"
575588
"\n"
576589
"Java Bytecode frontend options:\n"
577590
" --classpath dir/jar set the classpath\n"

src/driver/sec_driver_parse_options.h

+1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ class optionst;
4747
"(do-not-use-precise-access-paths)" \
4848
"(rebuild-taint-cache)" \
4949
"(lazy-methods-context-sensitive)" \
50+
"(show-csvsa-value-sets)" \
5051
UI_MESSAGE_OPTIONS \
5152
JAVA_BYTECODE_LANGUAGE_OPTIONS \
5253
// End of options

src/pointer-analysis/context_sensitive_value_set_analysis.cpp

+68-4
Original file line numberDiff line numberDiff line change
@@ -419,12 +419,31 @@ void csvsa_function_contextt::transform_function_stub(
419419
transform_child_callsite(l_call, {fname});
420420
}
421421

422+
// TODO: Replace this with a graph visitor that can terminate the graph walk
423+
// when it finds a node of interest.
424+
static bool is_ancestor(
425+
const irep_idt &ancestor,
426+
irep_idt candidate,
427+
const class_hierarchyt &class_hierarchy)
428+
{
429+
if(ancestor == candidate)
430+
return true;
431+
auto ancestors = class_hierarchy.get_parents_trans(candidate);
432+
return
433+
std::find(ancestors.begin(), ancestors.end(), ancestor) != ancestors.end();
434+
}
435+
422436
dispatch_table_entriest
423437
csvsa_function_contextt::get_virtual_callees(locationt l_call) const
424438
{
425439
const auto &call = to_code_function_call(l_call->code);
426440
INVARIANT(!call.arguments().empty(), "virtual calls must have a 'this' arg");
427441
const auto &this_expr = call.arguments()[0];
442+
irep_idt namespaced_this_tag =
443+
"java::" +
444+
id2string(
445+
to_struct_type(
446+
ns.follow(to_pointer_type(this_expr.type()).subtype())).get_tag());
428447

429448
dispatch_table_entriest callees;
430449
std::unordered_set<irep_idt, irep_id_hash> seen_class_ids;
@@ -440,6 +459,8 @@ csvsa_function_contextt::get_virtual_callees(locationt l_call) const
440459
const_cast<csvsa_function_contextt *>(this)->get_values(
441460
l_call, this_expr, raw_values);
442461

462+
const auto &class_hierarchy = driver.get_class_hierarchy();
463+
443464
for(const auto &raw_value : raw_values)
444465
{
445466
// Ignore invalid / failed objects, which represent uninitialized pointers,
@@ -456,7 +477,23 @@ csvsa_function_contextt::get_virtual_callees(locationt l_call) const
456477
INVARIANT(
457478
raw_value.type().id() == ID_pointer,
458479
"an unknown expression used as a `this` arg should have pointer type");
480+
459481
val = exprt(ID_unknown, raw_value.type().subtype());
482+
483+
// If the unknown is a parent of the expected this-argument type, or is of
484+
// opaque type and so unknown position in the class hierarchy, lift
485+
// it to at least that type. Presumably this is something like an opaque
486+
// method that returns an Object, which is later cast to a derived type.
487+
const auto &struct_type = to_struct_type(ns.follow(val.type()));
488+
const auto &struct_tag = struct_type.get_tag();
489+
irep_idt namespaced_tag = "java::"+id2string(struct_tag);
490+
491+
if(
492+
!struct_type.get_bool(ID_incomplete_class) &&
493+
is_ancestor(namespaced_tag, namespaced_this_tag, class_hierarchy))
494+
{
495+
val.type() = symbol_typet(namespaced_this_tag);
496+
}
460497
}
461498
else
462499
val = to_object_descriptor_expr(raw_value).object();
@@ -499,11 +536,21 @@ csvsa_function_contextt::get_virtual_callees(locationt l_call) const
499536
if(!seen_class_ids.insert(struct_tag).second)
500537
continue;
501538

502-
irep_idt namespaced_tag = "java::" + id2string(struct_tag);
539+
irep_idt namespaced_tag = "java::"+id2string(struct_tag);
503540
const auto &function_name = call.function().get(ID_component_name);
541+
resolve_inherited_componentt resolver(
542+
ns.get_symbol_table(), class_hierarchy);
504543

505544
const auto actual_callee = resolver(namespaced_tag, function_name, false);
506-
if(actual_callee.is_valid())
545+
if(!real_type.get_bool(ID_incomplete_class) &&
546+
!is_ancestor(namespaced_this_tag, namespaced_tag, class_hierarchy))
547+
{
548+
// Skip values that can't be the callee because they are not derived
549+
// from the `this` type of the callee (e.g. an `Object` instance cannot
550+
// possibly be the callee of `A.f()`)
551+
continue;
552+
}
553+
else if(actual_callee.is_valid())
507554
{
508555
dispatch_table_entryt new_callee;
509556
new_callee.class_id = namespaced_tag;
@@ -516,10 +563,27 @@ csvsa_function_contextt::get_virtual_callees(locationt l_call) const
516563
{
517564
// Stub call -- guess it exists on the first opaque parent type of the
518565
// given type:
519-
const auto &hierarchy = driver.get_class_hierarchy();
520566
irep_idt callee_class = call.function().get(ID_C_class);
521567
while(!ns.lookup(callee_class).type.get_bool(ID_incomplete_class))
522-
callee_class = hierarchy.class_map.at(callee_class).parents.at(0);
568+
{
569+
const auto &parents =
570+
class_hierarchy.class_map.at(callee_class).parents;
571+
if(parents.size() == 0)
572+
{
573+
warning() << "Can't find a possible callee for a value of type "
574+
<< struct_tag << " calling "
575+
<< call.function().get(ID_identifier) << messaget::eom;
576+
callee_class = irep_idt();
577+
break;
578+
}
579+
else
580+
{
581+
callee_class = parents.at(0);
582+
}
583+
}
584+
585+
if(callee_class.empty())
586+
continue;
523587

524588
irep_idt callee_function =
525589
id2string(callee_class) + "." + id2string(function_name);

src/pointer-analysis/context_sensitive_value_set_analysis.h

+13
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,19 @@ class csvsa_function_contextt : private value_set_analysist, public messaget
464464
std::ostream &out,
465465
bool brief = false) const;
466466

467+
/// Print a plain-text dump starting from the entry point:
468+
void print_tree(
469+
abstract_goto_modelt &goto_model,
470+
std::ostream &out,
471+
bool brief = false) const
472+
{
473+
print_tree(
474+
goto_model.get_goto_function(goto_functionst::entry_point()).body,
475+
0,
476+
out,
477+
brief);
478+
}
479+
467480
/// Print a brief plain-text dump of this context and its children
468481
void print_tree_brief(
469482
const goto_programt &prog,

0 commit comments

Comments
 (0)