Skip to content

Commit ca17b87

Browse files
author
owen-jones-diffblue
authored
Merge pull request diffblue#444 from diffblue/alfresco-fix-taint-lvsa-interface
SEC-445: Fix Alfresco failure to generate GOTO binary
2 parents 2ce7e85 + 3cd0d5e commit ca17b87

File tree

5 files changed

+89
-96
lines changed

5 files changed

+89
-96
lines changed

benchmarks/TRAINING/OWASP/extract_benchmarks.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def extract_benchmark(benchmark_name, java_bench_dir, html_files):
7979

8080
def extract_benchmarks():
8181
if not os.path.exists(get_app_target_dir()):
82-
print("ERROR: The directory '" + get_app_target_dir() + "' does not exists.")
82+
print("ERROR: The directory '" + get_app_target_dir() + "' does not exist.")
8383
return
8484
html_files = collect_html_files()
8585
java_bench_dir = os.path.join(get_java_dir(), get_packages_relative_dir(), "testcode")
@@ -123,7 +123,7 @@ def main(cmdline):
123123
benchmark_pathname =\
124124
os.path.join(get_class_dir(), get_packages_relative_dir(), "testcode", cmdline.single + ".class")
125125
if not os.path.exists(benchmark_pathname):
126-
print("ERROR: The benchmark file '" + benchmark_pathname + "' does not exists.")
126+
print("ERROR: The benchmark file '" + benchmark_pathname + "' does not exist.")
127127
return
128128

129129
extract_benchmark(

regression/december_demo_sprint/goto-analyzer/run.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,19 +89,19 @@ def evaluate_one_directory(cmdline):
8989
print("ERROR: Directory containing specification files was not specified.")
9090
return
9191
if not os.path.exists(cmdline.spec_dir) or os.path.isfile(cmdline.spec_dir):
92-
print("ERROR: Directory containing specification files does not exists: " + cmdline.spec_dir)
92+
print("ERROR: Directory containing specification files does not exist: " + cmdline.spec_dir)
9393
return
9494
if cmdline.sources_dir is None:
9595
print("ERROR: Sources dir of the analysed Java web application was not specified.")
9696
return
9797
if not os.path.exists(cmdline.sources_dir) or os.path.isfile(cmdline.sources_dir):
98-
print("ERROR: Sources dir of the analysed Java web application does not exists: " + cmdline.sources_dir)
98+
print("ERROR: Sources dir of the analysed Java web application does not exist: " + cmdline.sources_dir)
9999
return
100100
if cmdline.binaries_dir is None:
101101
print("ERROR: Binaries dir of the analysed Java web application was not specified.")
102102
return
103103
if not os.path.exists(cmdline.binaries_dir) or os.path.isfile(cmdline.binaries_dir):
104-
print("ERROR: Binaries dir of the analysed Java web application does not exists: " + cmdline.binaries_dir)
104+
print("ERROR: Binaries dir of the analysed Java web application does not exist: " + cmdline.binaries_dir)
105105
return
106106
if cmdline.temp_dir is None:
107107
print("ERROR: Temp dir of the analyser was not specified.")

src/pointer-analysis/local_value_set.cpp

Lines changed: 50 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,30 @@ upcast_to_type(exprt expr, const typet &intended_type, const namespacet &ns)
8585
return expr;
8686
}
8787

88-
/// Remove all superclass accesses from `expr` and return the result
89-
static exprt remove_all_typecasts_and_superclass_accesses(exprt expr)
88+
/// Check whether the passed expression represents an upcast to a parent.
89+
/// \param expr: The examined expression
90+
/// \param ns: A namespace; the 'follow' method is needed
91+
/// \return True, if 'expr' is an upcast, and false otherwise.
92+
bool is_parent_member_access(const member_exprt &expr, const namespacet &ns)
93+
{
94+
const typet &parent_type = ns.follow(expr.type());
95+
if(parent_type.id() != ID_struct)
96+
return false;
97+
const typet &child_type = ns.follow(expr.compound().type());
98+
INVARIANT(
99+
child_type.id() == ID_struct,
100+
"The compound of a member_exprt should be a struct");
101+
optionalt<typet> base_type = get_base_type(child_type, ns);
102+
return base_type && *base_type == parent_type;
103+
}
104+
105+
/// Remove all superclass accesses from `expr`
106+
/// \param expr: input expression
107+
/// \param ns: namespace
108+
/// \return input expression with all typecasts and superclass accesses removed
109+
exprt remove_all_typecasts_and_superclass_accesses(
110+
exprt expr,
111+
const namespacet &ns)
90112
{
91113
while(expr.id() == ID_member || expr.id() == ID_typecast)
92114
{
@@ -96,20 +118,36 @@ static exprt remove_all_typecasts_and_superclass_accesses(exprt expr)
96118
}
97119
else
98120
{
99-
const member_exprt &member = to_member_expr(expr);
100-
const std::string &component_name =
101-
id2string(member.get_component_name());
102-
const bool is_superclass_access = has_prefix(component_name, "@") &&
103-
component_name != "@lock" &&
104-
component_name != "@class_identifier";
105-
if(!is_superclass_access)
121+
const member_exprt &member = expr_checked_cast<member_exprt>(expr);
122+
if(!is_parent_member_access(member, ns))
106123
break;
107124
expr = member.compound();
108125
}
109126
}
110127
return expr;
111128
}
112129

130+
/// Remove any typecasts or soft casts from `input` and return the result, where
131+
/// a "soft cast" means dereferencing a pointer to a class, accessing the field
132+
/// corresponding to its base class, and taking the address of that field -
133+
/// `&input->parent_class`
134+
/// \param input: input expression
135+
/// \param ns: namespace
136+
/// \return input expression with all typecasts and soft casts removed
137+
exprt remove_casts(exprt input, const namespacet &ns)
138+
{
139+
while(input.id() == ID_typecast)
140+
input = input.op0();
141+
if(input.id() == ID_address_of)
142+
{
143+
const exprt &input_target = to_address_of_expr(input).object();
144+
exprt expr = remove_all_typecasts_and_superclass_accesses(input_target, ns);
145+
if(expr != input_target && expr.id() == ID_dereference)
146+
return to_dereference_expr(expr).pointer();
147+
}
148+
return input;
149+
}
150+
113151
void local_value_sett::make_union_adjusting_types(
114152
object_mapt &dest,
115153
const object_mapt &src,
@@ -120,7 +158,8 @@ void local_value_sett::make_union_adjusting_types(
120158
{
121159
// Remove soft-casts (and hard casts?) from the rhs value
122160
const exprt underlying_object =
123-
remove_all_typecasts_and_superclass_accesses(object_numbering[num.first]);
161+
remove_all_typecasts_and_superclass_accesses(
162+
object_numbering[num.first], ns);
124163

125164
if(underlying_object.id() == ID_external_value_set)
126165
{
@@ -273,24 +312,6 @@ static const typet &type_from_suffix(
273312
return *ret;
274313
}
275314

276-
/// Remove any soft casts from `input` and return the result, where a "soft
277-
/// cast" means dereferencing a pointer to a class, accessing the field
278-
/// corresponding to its base class, and taking the address of that field -
279-
/// `&input->parent_class`
280-
static exprt remove_casts(exprt input)
281-
{
282-
while(input.id() == ID_typecast)
283-
input = input.op0();
284-
if(input.id() == ID_address_of)
285-
{
286-
const exprt &input_target = to_address_of_expr(input).object();
287-
exprt expr = remove_all_typecasts_and_superclass_accesses(input_target);
288-
if(expr != input_target && expr.id() == ID_dereference)
289-
return to_dereference_expr(expr).pointer();
290-
}
291-
return input;
292-
}
293-
294315
void local_value_sett::get_value_set_rec(
295316
const exprt &expr,
296317
object_mapt &dest,
@@ -310,7 +331,7 @@ void local_value_sett::get_value_set_rec(
310331
object_mapt tmp;
311332
// Hard casts have been removed by the simplifier but soft casts need
312333
// to be removed too (maybe?)
313-
exprt expr_to_cast = remove_casts(typecast_expr.op());
334+
exprt expr_to_cast = remove_casts(typecast_expr.op(), ns);
314335
get_value_set_rec(expr_to_cast, tmp, suffix, original_type, ns);
315336
make_union_adjusting_types(dest, tmp, typecast_expr.type().subtype(), ns);
316337
}

src/pointer-analysis/local_value_set.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,5 +169,9 @@ class local_value_sett : public value_sett
169169
/// \return the name of the base type
170170
const irep_idt &
171171
update_expr_with_soft_cast_to_base_type(exprt &expr, const namespacet &ns);
172-
172+
bool is_parent_member_access(const member_exprt &expr, const namespacet &ns);
173+
exprt remove_all_typecasts_and_superclass_accesses(
174+
exprt expr,
175+
const namespacet &ns);
176+
exprt remove_casts(exprt input, const namespacet &ns);
173177
#endif

src/taint-analysis/taint_summary.cpp

Lines changed: 29 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -162,38 +162,39 @@ static exprt transform_external_objects(const exprt& e)
162162
return e;
163163
}
164164

165+
/// Use the output of LVSA to create a set of expressions which `query` might
166+
/// point to at a given point in the program. String literals, null objects and
167+
/// precise access path external values sets are not included in the output.
168+
/// \param query: input expression
169+
/// \param ns: namespace
170+
/// \param lvsa: the output of LVSA, to work out what `query` might point to
171+
/// \param instit: the point in the program to do the query
172+
/// \param [out] result: output
165173
static void collect_lvsa_access_paths(
166-
exprt const& query_in,
167-
namespacet const& ns,
168-
local_value_set_analysist& lvsa,
169-
instruction_iteratort const& instit,
174+
exprt const &query,
175+
namespacet const &ns,
176+
local_value_set_analysist &lvsa,
177+
instruction_iteratort const &instit,
170178
std::set<exprt> &result)
171179
{
172180
TMPROF_BLOCK();
173181

174-
if(query_in.id()==ID_side_effect)
182+
if(query.id() == ID_side_effect)
175183
{
176-
side_effect_exprt const see=to_side_effect_expr(query_in);
184+
side_effect_exprt const &see = to_side_effect_expr(query);
177185
if(see.get_statement()==ID_nondet)
178186
{
179187
// TODO(marek-trtik): Add computation of a proper points-to set for NONDET
180188
// in a side-effect expression
181-
assert(false);
182189
}
183190
}
184191

185-
const exprt* query=&query_in;
186-
while(query->id()==ID_typecast)
187-
query=&query->op0();
188-
const exprt& e = *query;
189-
190-
if(e.id()==ID_symbol ||
191-
e.id()==ID_index ||
192-
e.id()==ID_member ||
193-
e.id()==ID_dereference)
192+
if(
193+
query.id() == ID_symbol || query.id() == ID_index ||
194+
query.id() == ID_member || query.id() == ID_dereference)
194195
{
195196
value_setst::valuest referees;
196-
lvsa.get_values_reconstructed(instit,address_of_exprt(e),referees);
197+
lvsa.get_values_reconstructed(instit, address_of_exprt(query), referees);
197198
for(const auto& target : referees)
198199
{
199200
if(target.id()==ID_unknown)
@@ -230,7 +231,7 @@ static void collect_lvsa_access_paths(
230231
}
231232
else
232233
{
233-
forall_operands(it,e)
234+
forall_operands(it, query)
234235
collect_lvsa_access_paths(
235236
*it,
236237
ns,
@@ -240,6 +241,8 @@ static void collect_lvsa_access_paths(
240241
}
241242
}
242243

244+
/// Call `collect_lvsa_access_paths` and use taint_object_numbering to convert
245+
/// the results from `exprt`s to the format required for `lvalue_numbers_sett`
243246
static void collect_lvsa_access_paths(
244247
exprt const& query_in,
245248
namespacet const& ns,
@@ -361,52 +364,20 @@ exprt dereference_if_pointer(exprt expr)
361364
return expr;
362365
}
363366

364-
/// \brief Checks whether the passed expression represents an up cast to a
365-
/// parent.
366-
/// \param expr: The examined expression.
367-
/// \param hierarchy: The class heierarchy used for checking parent-child
368-
/// relation.
369-
/// \param ns: A namespace; the 'follow' method is needed.
370-
/// \return True, if 'expr' is an up cast, and false otherwise.
371-
bool is_parent_member_access(
372-
const member_exprt &expr,
373-
const class_hierarchyt &hierarchy,
374-
const namespacet &ns)
375-
{
376-
if(expr.get_component_number() != 0UL)
377-
return false;
378-
const typet &parent_type = ns.follow(expr.type());
379-
if(parent_type.id() != ID_struct)
380-
return false;
381-
const typet &child_type = ns.follow(expr.compound().type());
382-
INVARIANT(child_type.id() == ID_struct, "Compound of member_exprt is struct");
383-
const std::string parent_name =
384-
"java::" + as_string(to_struct_type(parent_type).get_tag());
385-
const std::string child_name =
386-
"java::" + as_string(to_struct_type(child_type).get_tag());
387-
const auto it = hierarchy.class_map.find(parent_name);
388-
if(it == hierarchy.class_map.cend())
389-
return false;
390-
for(const auto &child : it->second.children)
391-
if(child == child_name)
392-
return true;
393-
return false;
394-
}
395-
396367
/// \brief An extended version of the original LVSA utility function
397368
/// 'collect_lvsa_access_paths'. Namely, when the original function
398369
/// returns an empty set, then the extended one introduces a fresh
399-
/// EVS object for the passed (and dereferenced) expression. For non-empy
370+
/// EVS object for the passed (and dereferenced) expression. For non-empty
400371
/// output from the original function it iterates over all retrieved
401372
/// numbers and for each number representing an upcast (member_exprt) it calls
402373
/// itself in order to retrieve numbers also to parent accesses. This is because
403374
/// an upcast is represented in LVSA by one number and we also need LVSA
404375
/// number(s) of the actual parent object.
405-
/// \param expr: The expression whose dereference is passed to LVSA.
406-
/// \param it: The current program location (instruction iteratior)
376+
/// \param expr: The expression to look up in LVSA.
377+
/// \param it: The current program location (instruction iterator)
407378
/// \param lvsa: The LVSA analysis.
408379
/// \param result: The resulting points-to set.
409-
/// \return True, if the resulting points-to set is definitelly singular, and
380+
/// \return True, if the resulting points-to set is definitely singular, and
410381
/// false otherwise.
411382
bool taint_algorithm_computing_summary_of_functiont::
412383
collect_lvsa_access_paths_extended(
@@ -419,7 +390,8 @@ bool taint_algorithm_computing_summary_of_functiont::
419390
TMPROF_BLOCK();
420391

421392
bool singular = false;
422-
const exprt query_expr = dereference_if_pointer(expr);
393+
const exprt query_expr =
394+
dereference_if_pointer(remove_casts(expr, program->get_namespace()));
423395
lvalue_numbers_sett raw_result;
424396
collect_lvsa_access_paths(
425397
query_expr,
@@ -454,11 +426,7 @@ bool taint_algorithm_computing_summary_of_functiont::
454426
const exprt &raw_expr = numbering->at(raw_number);
455427
if(auto member_expr = expr_try_dynamic_cast<member_exprt>(raw_expr))
456428
{
457-
if(
458-
is_parent_member_access(
459-
*member_expr,
460-
program->get_class_hierarchy(),
461-
program->get_namespace()))
429+
if(is_parent_member_access(*member_expr, program->get_namespace()))
462430
{
463431
if(
464432
!collect_lvsa_access_paths_extended(
@@ -1202,7 +1170,7 @@ numbered_lvalue_to_taint_mapt taint_algorithm_computing_summary_of_functiont::
12021170
std::to_string(propagation_rule.get_id()),
12031171
"Although a propagation rule " +
12041172
std::to_string(propagation_rule.get_id()) +
1205-
"was found, it was NOT applied, because the symbolic value "
1173+
" was found, it was NOT applied, because the symbolic value "
12061174
"to be assigned is actually BOTTOM."});
12071175
}
12081176
else

0 commit comments

Comments
 (0)