Skip to content

Commit 97e6cc4

Browse files
committed
goto-symex: optionally support updates across member boundaries
If using the new option --symex-no-member-bounds, goto-symex will rewrite all indexed writes that constant propagation does not show to be within bounds to writes to the member object.
1 parent ccf8f57 commit 97e6cc4

File tree

7 files changed

+179
-13
lines changed

7 files changed

+179
-13
lines changed
Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
KNOWNBUG
1+
CORE
22
main.c
3-
--no-simplify
3+
--no-simplify --symex-no-member-bounds
44
^VERIFICATION SUCCESSFUL$
55
^EXIT=0$
66
^SIGNAL=0$
@@ -9,3 +9,5 @@ main.c
99
--
1010
This test passes when using the simplifier, but results in "WITH" expressions
1111
that update elements outside their bounds without the simplifier.
12+
--symex-no-member-bounds ensures that such expressions are rewritten by
13+
goto-symex.

scripts/expected_doxygen_warnings.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
warning: Include graph for 'goto_instrument_parse_options.cpp' not generated, too many nodes (93), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
22
warning: Included by graph for 'goto_model.h' not generated, too many nodes (95), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
3-
warning: Included by graph for 'arith_tools.h' not generated, too many nodes (167), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
3+
warning: Included by graph for 'arith_tools.h' not generated, too many nodes (168), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
44
warning: Included by graph for 'c_types.h' not generated, too many nodes (128), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
55
warning: Included by graph for 'config.h' not generated, too many nodes (78), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
66
warning: Included by graph for 'invariant.h' not generated, too many nodes (172), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.

src/cbmc/cbmc_parse_options.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,9 @@ void cbmc_parse_optionst::get_command_line_options(optionst &options)
345345
options.set_option(
346346
"symex-cache-dereferences", cmdline.isset("symex-cache-dereferences"));
347347

348+
options.set_option(
349+
"symex-no-member-bounds", cmdline.isset("symex-no-member-bounds"));
350+
348351
if(cmdline.isset("incremental-loop"))
349352
{
350353
options.set_option(

src/goto-checker/bmc_util.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ void run_property_decider(
197197
"(unwind-max):" \
198198
"(ignore-properties-before-unwind-min)" \
199199
"(symex-cache-dereferences)" \
200+
"(symex-no-member-bounds)" \
200201

201202
#define HELP_BMC \
202203
" --paths [strategy] explore paths one at a time\n" \
@@ -252,6 +253,7 @@ void run_property_decider(
252253
" gets blacklisted\n" \
253254
" --graphml-witness filename write the witness in GraphML format to filename\n" /* NOLINT(*) */ \
254255
" --symex-cache-dereferences enable caching of repeated dereferences" \
256+
" --symex-no-member-bounds support updates beyond bounds of a compound member" /* NOLINT(*) */ \
255257
// clang-format on
256258

257259
#endif // CPROVER_GOTO_CHECKER_BMC_UTIL_H

src/goto-symex/symex_assign.cpp

Lines changed: 162 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,14 @@ Author: Daniel Kroening, [email protected]
1111

1212
#include "symex_assign.h"
1313

14-
#include "expr_skeleton.h"
15-
#include "goto_symex_state.h"
14+
#include <util/arith_tools.h>
1615
#include <util/byte_operators.h>
1716
#include <util/expr_util.h>
17+
#include <util/pointer_offset_size.h>
1818
#include <util/range.h>
1919

20+
#include "expr_skeleton.h"
21+
#include "goto_symex_state.h"
2022
#include "symex_config.h"
2123

2224
// We can either use with_exprt or update_exprt when building expressions that
@@ -253,11 +255,100 @@ void symex_assignt::assign_array(
253255
{
254256
const exprt &lhs_array=lhs.array();
255257
const exprt &lhs_index=lhs.index();
256-
const typet &lhs_index_type = lhs_array.type();
258+
const array_typet &lhs_array_type = to_array_type(lhs_array.type());
259+
260+
const bool may_be_out_of_bounds =
261+
symex_config.updates_across_member_bounds &&
262+
[&lhs_index, &lhs_array_type]() {
263+
if(
264+
lhs_index.id() != ID_constant ||
265+
lhs_array_type.size().id() != ID_constant)
266+
{
267+
return true;
268+
}
269+
270+
const auto lhs_index_int =
271+
numeric_cast_v<mp_integer>(to_constant_expr(lhs_index));
272+
273+
return lhs_index_int < 0 ||
274+
lhs_index_int >= numeric_cast_v<mp_integer>(
275+
to_constant_expr(lhs_array_type.size()));
276+
}();
277+
278+
if(
279+
may_be_out_of_bounds &&
280+
(lhs_array.id() == ID_member || lhs_array.id() == ID_index))
281+
{
282+
const auto subtype_bytes_opt = size_of_expr(lhs_array.type().subtype(), ns);
283+
CHECK_RETURN(subtype_bytes_opt.has_value());
257284

258-
PRECONDITION(lhs_index_type.id() == ID_array);
285+
exprt new_offset = mult_exprt{
286+
lhs_index,
287+
typecast_exprt::conditional_cast(*subtype_bytes_opt, lhs_index.type())};
259288

260-
if(use_update)
289+
exprt parent;
290+
if(lhs_array.id() == ID_member)
291+
{
292+
const member_exprt &member = to_member_expr(lhs_array);
293+
const auto member_offset = member_offset_expr(member, ns);
294+
CHECK_RETURN(member_offset.has_value());
295+
296+
parent = member.compound();
297+
new_offset = plus_exprt{
298+
typecast_exprt::conditional_cast(*member_offset, new_offset.type()),
299+
new_offset};
300+
}
301+
else
302+
{
303+
const index_exprt &index = to_index_expr(lhs_array);
304+
305+
const auto element_size_opt =
306+
size_of_expr(to_array_type(index.array().type()).subtype(), ns);
307+
CHECK_RETURN(element_size_opt.has_value());
308+
309+
parent = index.array();
310+
new_offset =
311+
plus_exprt{mult_exprt{typecast_exprt::conditional_cast(
312+
*element_size_opt, new_offset.type()),
313+
typecast_exprt::conditional_cast(
314+
index.index(), new_offset.type())},
315+
new_offset};
316+
}
317+
318+
if(symex_config.simplify_opt)
319+
simplify(new_offset, ns);
320+
321+
const byte_update_exprt new_rhs = make_byte_update(parent, new_offset, rhs);
322+
const expr_skeletont new_skeleton =
323+
full_lhs.compose(expr_skeletont::remove_op0(lhs_array));
324+
assign_rec(parent, new_skeleton, new_rhs, guard);
325+
}
326+
else if(
327+
may_be_out_of_bounds && (lhs_array.id() == ID_byte_extract_big_endian ||
328+
lhs_array.id() == ID_byte_extract_little_endian))
329+
{
330+
const byte_extract_exprt &byte_extract = to_byte_extract_expr(lhs_array);
331+
332+
const auto subtype_bytes_opt = size_of_expr(lhs_array.type().subtype(), ns);
333+
CHECK_RETURN(subtype_bytes_opt.has_value());
334+
335+
exprt new_offset =
336+
plus_exprt{mult_exprt{lhs_index,
337+
typecast_exprt::conditional_cast(
338+
*subtype_bytes_opt, lhs_index.type())},
339+
typecast_exprt::conditional_cast(
340+
byte_extract.offset(), lhs_index.type())};
341+
342+
if(symex_config.simplify_opt)
343+
simplify(new_offset, ns);
344+
345+
byte_extract_exprt new_lhs = byte_extract;
346+
new_lhs.offset() = new_offset;
347+
new_lhs.type() = rhs.type();
348+
349+
assign_rec(new_lhs, full_lhs, rhs, guard);
350+
}
351+
else if(use_update)
261352
{
262353
// turn
263354
// a[i]=e
@@ -378,8 +469,70 @@ void symex_assignt::assign_byte_extract(
378469
else
379470
UNREACHABLE;
380471

381-
const byte_update_exprt new_rhs{byte_update_id, lhs.op(), lhs.offset(), rhs};
382-
const expr_skeletont new_skeleton =
383-
full_lhs.compose(expr_skeletont::remove_op0(lhs));
384-
assign_rec(lhs.op(), new_skeleton, new_rhs, guard);
472+
const bool may_be_out_of_bounds =
473+
symex_config.updates_across_member_bounds && [&lhs, this]() {
474+
if(lhs.offset().id() != ID_constant)
475+
return true;
476+
const auto extract_size_opt = pointer_offset_size(lhs.type(), ns);
477+
if(!extract_size_opt.has_value())
478+
return true;
479+
const auto object_size_opt = pointer_offset_size(lhs.op().type(), ns);
480+
if(!object_size_opt.has_value())
481+
return true;
482+
const auto lhs_offset_int =
483+
numeric_cast_v<mp_integer>(to_constant_expr(lhs.offset()));
484+
return lhs_offset_int < 0 ||
485+
lhs_offset_int + *extract_size_opt > *object_size_opt;
486+
}();
487+
488+
if(
489+
may_be_out_of_bounds &&
490+
(lhs.op().id() == ID_member || lhs.op().id() == ID_index))
491+
{
492+
exprt new_offset = lhs.offset();
493+
exprt parent;
494+
if(lhs.op().id() == ID_member)
495+
{
496+
const member_exprt &member = to_member_expr(lhs.op());
497+
const auto member_offset = member_offset_expr(member, ns);
498+
CHECK_RETURN(member_offset.has_value());
499+
500+
parent = member.compound();
501+
new_offset = plus_exprt{
502+
typecast_exprt::conditional_cast(*member_offset, new_offset.type()),
503+
new_offset};
504+
}
505+
else
506+
{
507+
const index_exprt &index = to_index_expr(lhs.op());
508+
509+
const auto element_size_opt =
510+
size_of_expr(to_array_type(index.array().type()).subtype(), ns);
511+
CHECK_RETURN(element_size_opt.has_value());
512+
513+
parent = index.array();
514+
new_offset =
515+
plus_exprt{mult_exprt{typecast_exprt::conditional_cast(
516+
*element_size_opt, new_offset.type()),
517+
typecast_exprt::conditional_cast(
518+
index.index(), new_offset.type())},
519+
new_offset};
520+
}
521+
522+
if(symex_config.simplify_opt)
523+
simplify(new_offset, ns);
524+
525+
const byte_update_exprt new_rhs{byte_update_id, parent, new_offset, rhs};
526+
const expr_skeletont new_skeleton =
527+
full_lhs.compose(expr_skeletont::remove_op0(lhs.op()));
528+
assign_rec(parent, new_skeleton, new_rhs, guard);
529+
}
530+
else
531+
{
532+
const byte_update_exprt new_rhs{
533+
byte_update_id, lhs.op(), lhs.offset(), rhs};
534+
const expr_skeletont new_skeleton =
535+
full_lhs.compose(expr_skeletont::remove_op0(lhs));
536+
assign_rec(lhs.op(), new_skeleton, new_rhs, guard);
537+
}
385538
}

src/goto-symex/symex_config.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ struct symex_configt final
6363
/// Used in goto_symext::dereference_rec
6464
bool cache_dereferences;
6565

66+
/// Handle assignments beyond the bounds prescribed by a struct or array
67+
/// access path. See tests in regression/cbmc/member_bounds* for examples.
68+
bool updates_across_member_bounds;
69+
6670
/// \brief Construct a symex_configt using options specified in an
6771
/// \ref optionst
6872
explicit symex_configt(const optionst &options);

src/goto-symex/symex_main.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,9 @@ symex_configt::symex_configt(const optionst &options)
5656
: DEFAULT_MAX_FIELD_SENSITIVITY_ARRAY_SIZE),
5757
complexity_limits_active(
5858
options.get_signed_int_option("symex-complexity-limit") > 0),
59-
cache_dereferences{options.get_bool_option("symex-cache-dereferences")}
59+
cache_dereferences{options.get_bool_option("symex-cache-dereferences")},
60+
updates_across_member_bounds{
61+
options.get_bool_option("symex-no-member-bounds")}
6062
{
6163
}
6264

0 commit comments

Comments
 (0)