Skip to content

Commit 2ebcd47

Browse files
committed
Encode integer addresses together with pointer object/offset
Instead of encoding pointers with the same bit width a pointer on a given platform has, just widen the bit-blasted encoding to include both the previous object/offset encoding as well as an (integer) address. The encoding is thus also trivially extended to handle larger numbers of objects and offsets of the same width as the address. Furthermore clean up the code to encapsulate encoding properly, and make in-code layout of pointer encoding more natural (it's now object, offset, integer-address). Fixes: diffblue#436 Fixes: diffblue#311 Fixes: diffblue#94
1 parent 75797d3 commit 2ebcd47

File tree

2 files changed

+238
-50
lines changed

2 files changed

+238
-50
lines changed

src/solvers/flattening/bv_pointers.cpp

Lines changed: 186 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1160,6 +1160,179 @@ void bv_pointerst::do_postponed(
11601160
UNREACHABLE;
11611161
}
11621162

1163+
void bv_pointerst::encode_object_bounds(bounds_mapt &dest)
1164+
{
1165+
const auto &objects = pointer_logic.objects;
1166+
std::size_t number = 0;
1167+
1168+
const bvt null_pointer = pointer_bits.at(pointer_logic.get_null_object());
1169+
const bvt &invalid_object =
1170+
pointer_bits.at(pointer_logic.get_invalid_object());
1171+
1172+
bvt conj;
1173+
conj.reserve(objects.size());
1174+
1175+
for(const exprt &expr : objects)
1176+
{
1177+
const auto size_expr = size_of_expr(expr.type(), ns);
1178+
1179+
if(!size_expr.has_value())
1180+
{
1181+
++number;
1182+
continue;
1183+
}
1184+
1185+
bvt size_bv = convert_bv(*size_expr);
1186+
1187+
// NULL, INVALID have no size
1188+
DATA_INVARIANT(
1189+
number != pointer_logic.get_null_object(),
1190+
"NULL object cannot have a size");
1191+
DATA_INVARIANT(
1192+
number != pointer_logic.get_invalid_object(),
1193+
"INVALID object cannot have a size");
1194+
1195+
bvt object_bv;
1196+
encode(number, object_bv);
1197+
1198+
// prepare comparison over integers
1199+
bvt bv = object_bv;
1200+
address_bv(bv, pointer_type(expr.type()));
1201+
DATA_INVARIANT(
1202+
bv.size() == null_pointer.size(),
1203+
"NULL pointer encoding does not have matching width");
1204+
DATA_INVARIANT(
1205+
bv.size() == invalid_object.size(),
1206+
"INVALID pointer encoding does not have matching width");
1207+
DATA_INVARIANT(
1208+
size_bv.size() == bv.size(),
1209+
"pointer encoding does not have matching width");
1210+
1211+
// NULL, INVALID must not be within object bounds
1212+
literalt null_lower_bound = bv_utils.rel(
1213+
null_pointer, ID_lt, bv, bv_utilst::representationt::UNSIGNED);
1214+
1215+
literalt inv_obj_lower_bound = bv_utils.rel(
1216+
invalid_object, ID_lt, bv, bv_utilst::representationt::UNSIGNED);
1217+
1218+
// compute the upper bound with the side effect of enforcing the
1219+
// object addresses not to wrap around/overflow
1220+
bvt obj_upper_bound = bv_utils.add_sub_no_overflow(
1221+
bv, size_bv, false, bv_utilst::representationt::UNSIGNED);
1222+
1223+
literalt null_upper_bound = bv_utils.rel(
1224+
null_pointer,
1225+
ID_ge,
1226+
obj_upper_bound,
1227+
bv_utilst::representationt::UNSIGNED);
1228+
1229+
literalt inv_obj_upper_bound = bv_utils.rel(
1230+
invalid_object,
1231+
ID_ge,
1232+
obj_upper_bound,
1233+
bv_utilst::representationt::UNSIGNED);
1234+
1235+
// store bounds for re-use
1236+
dest.insert({number, {bv, obj_upper_bound}});
1237+
1238+
conj.push_back(prop.lor(null_lower_bound, null_upper_bound));
1239+
conj.push_back(prop.lor(inv_obj_lower_bound, inv_obj_upper_bound));
1240+
1241+
++number;
1242+
}
1243+
1244+
if(!conj.empty())
1245+
prop.l_set_to_true(prop.land(conj));
1246+
}
1247+
1248+
void bv_pointerst::do_postponed_typecast(
1249+
const postponedt &postponed,
1250+
const bounds_mapt &bounds)
1251+
{
1252+
if(postponed.expr.id() != ID_typecast)
1253+
return;
1254+
1255+
const pointer_typet &type = to_pointer_type(postponed.expr.type());
1256+
const std::size_t bits = boolbv_width.get_offset_width(type) +
1257+
boolbv_width.get_object_width(type) +
1258+
boolbv_width.get_address_width(type);
1259+
1260+
// given an integer (possibly representing an address) postponed.op,
1261+
// compute the object and offset that it may refer to
1262+
bvt saved_bv = postponed.op;
1263+
1264+
bvt conj, oob_conj;
1265+
conj.reserve(bounds.size() + 3);
1266+
oob_conj.reserve(bounds.size());
1267+
1268+
for(const auto &bounds_entry : bounds)
1269+
{
1270+
std::size_t number = bounds_entry.first;
1271+
1272+
// pointer must be within object bounds
1273+
const bvt &lb = bounds_entry.second.first;
1274+
const bvt &ub = bounds_entry.second.second;
1275+
1276+
literalt lower_bound =
1277+
bv_utils.rel(saved_bv, ID_ge, lb, bv_utilst::representationt::UNSIGNED);
1278+
1279+
literalt upper_bound =
1280+
bv_utils.rel(saved_bv, ID_lt, ub, bv_utilst::representationt::UNSIGNED);
1281+
1282+
// compute the offset within the object, and the corresponding
1283+
// pointer bv
1284+
bvt offset = bv_utils.sub(saved_bv, lb);
1285+
1286+
bvt bv;
1287+
encode(number, bv);
1288+
object_bv(bv, type);
1289+
DATA_INVARIANT(
1290+
offset.size() == boolbv_width.get_offset_width(type),
1291+
"pointer encoding does not have matching width");
1292+
bv.insert(bv.end(), offset.begin(), offset.end());
1293+
bv.insert(bv.end(), saved_bv.begin(), saved_bv.end());
1294+
DATA_INVARIANT(
1295+
bv.size() == bits, "pointer encoding does not have matching width");
1296+
1297+
// if the integer address is within the object bounds, return an
1298+
// adjusted offset
1299+
literalt in_bounds = prop.land(lower_bound, upper_bound);
1300+
conj.push_back(prop.limplies(in_bounds, bv_utils.equal(bv, postponed.bv)));
1301+
oob_conj.push_back(!in_bounds);
1302+
}
1303+
1304+
// append integer address as both offset and address
1305+
bvt invalid_bv, null_bv;
1306+
encode(pointer_logic.get_invalid_object(), invalid_bv);
1307+
object_bv(invalid_bv, type);
1308+
invalid_bv.insert(invalid_bv.end(), saved_bv.begin(), saved_bv.end());
1309+
invalid_bv.insert(invalid_bv.end(), saved_bv.begin(), saved_bv.end());
1310+
encode(pointer_logic.get_null_object(), null_bv);
1311+
object_bv(null_bv, type);
1312+
null_bv.insert(null_bv.end(), saved_bv.begin(), saved_bv.end());
1313+
null_bv.insert(null_bv.end(), saved_bv.begin(), saved_bv.end());
1314+
1315+
// NULL is always NULL
1316+
conj.push_back(prop.limplies(
1317+
bv_utils.equal(saved_bv, pointer_bits.at(pointer_logic.get_null_object())),
1318+
bv_utils.equal(null_bv, postponed.bv)));
1319+
1320+
// INVALID is always INVALID
1321+
conj.push_back(prop.limplies(
1322+
bv_utils.equal(
1323+
saved_bv, pointer_bits.at(pointer_logic.get_invalid_object())),
1324+
bv_utils.equal(invalid_bv, postponed.bv)));
1325+
1326+
// one of the objects or NULL or INVALID with an offset
1327+
conj.push_back(prop.limplies(
1328+
prop.land(oob_conj),
1329+
prop.lor(
1330+
bv_utils.equal(null_bv, postponed.bv),
1331+
bv_utils.equal(invalid_bv, postponed.bv))));
1332+
1333+
prop.l_set_to_true(prop.land(conj));
1334+
}
1335+
11631336
void bv_pointerst::post_process()
11641337
{
11651338
// post-processing arrays may yield further objects, do this first
@@ -1169,7 +1342,19 @@ void bv_pointerst::post_process()
11691342
it=postponed_list.begin();
11701343
it!=postponed_list.end();
11711344
it++)
1172-
do_postponed(*it);
1345+
do_postponed_non_typecast(*it);
1346+
1347+
if(need_address_bounds)
1348+
{
1349+
// make sure NULL and INVALID are unique addresses
1350+
bounds_mapt bounds;
1351+
encode_object_bounds(bounds);
1352+
1353+
for(postponed_listt::const_iterator it = postponed_list.begin();
1354+
it != postponed_list.end();
1355+
it++)
1356+
do_postponed_typecast(*it, bounds);
1357+
}
11731358

11741359
// Clear the list to avoid re-doing in case of incremental usage.
11751360
postponed_list.clear();

src/solvers/smt2/smt2_conv.cpp

Lines changed: 52 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -235,10 +235,9 @@ void smt2_convt::define_object_size(
235235
PRECONDITION(expr.id() == ID_object_size);
236236
const exprt &ptr = to_unary_expr(expr).op();
237237
std::size_t size_width = boolbv_width(expr.type());
238-
std::size_t pointer_width = boolbv_width(ptr.type());
239238
std::size_t number = 0;
240-
std::size_t h=pointer_width-1;
241-
std::size_t l=pointer_width-config.bv_encoding.object_bits;
239+
std::size_t object_bits =
240+
boolbv_width.get_object_width(to_pointer_type(ptr.type()));
242241

243242
for(const auto &o : pointer_logic.objects)
244243
{
@@ -256,11 +255,11 @@ void smt2_convt::define_object_size(
256255
}
257256

258257
out << "(assert (=> (= "
259-
<< "((_ extract " << h << " " << l << ") ";
258+
<< "((_ extract " << object_bits - 1 << " 0) ";
260259
convert_expr(ptr);
261-
out << ") (_ bv" << number << " " << config.bv_encoding.object_bits << "))"
262-
<< "(= " << id << " (_ bv" << *object_size << " " << size_width
263-
<< "))))\n";
260+
out << ") (_ bv" << number << " " << object_bits << "))"
261+
<< "(= " << id << " (_ bv" << object_size->to_ulong() << " "
262+
<< size_width << "))))\n";
264263

265264
++number;
266265
}
@@ -593,10 +592,14 @@ exprt smt2_convt::parse_rec(const irept &src, const typet &type)
593592
mp_integer v = numeric_cast_v<mp_integer>(bv_expr);
594593

595594
// split into object and offset
596-
mp_integer pow=power(2, width-config.bv_encoding.object_bits);
595+
std::size_t object_bits =
596+
boolbv_width.get_object_width(to_pointer_type(type));
597+
std::size_t offset_bits =
598+
boolbv_width.get_offset_width(to_pointer_type(type));
599+
mp_integer pow = power(2, object_bits);
597600
pointer_logict::pointert ptr;
598-
ptr.object = numeric_cast_v<std::size_t>(v / pow);
599-
ptr.offset=v%pow;
601+
ptr.object = numeric_cast_v<std::size_t>(v % pow);
602+
ptr.offset = (v % power(2, object_bits + offset_bits)) / pow;
600603
return pointer_logic.pointer_expr(ptr, to_pointer_type(type));
601604
}
602605
else if(type.id()==ID_struct)
@@ -639,12 +642,18 @@ void smt2_convt::convert_address_of_rec(
639642
expr.id()==ID_string_constant ||
640643
expr.id()==ID_label)
641644
{
642-
out
643-
<< "(concat (_ bv"
644-
<< pointer_logic.add_object(expr) << " "
645-
<< config.bv_encoding.object_bits << ")"
646-
<< " (_ bv0 "
647-
<< boolbv_width(result_type)-config.bv_encoding.object_bits << "))";
645+
std::string addr =
646+
expr.id() == ID_symbol
647+
? expr.get_string(ID_identifier) + "$address"
648+
: "(_ bv0 " +
649+
std::to_string(boolbv_width.get_address_width(result_type)) + ")";
650+
651+
out << "(concat "
652+
<< "(concat "
653+
<< "(_ bv" << pointer_logic.add_object(expr) << " "
654+
<< boolbv_width.get_object_width(result_type) << ") "
655+
<< "(_ bv0 " << boolbv_width.get_offset_width(result_type) << ")) "
656+
<< addr << ")";
648657
}
649658
else if(expr.id()==ID_index)
650659
{
@@ -1485,23 +1494,21 @@ void smt2_convt::convert_expr(const exprt &expr)
14851494
op.type().id() == ID_pointer,
14861495
"operand of pointer offset expression shall be of pointer type");
14871496

1497+
std::size_t object_bits =
1498+
boolbv_width.get_object_width(to_pointer_type(op.type()));
14881499
std::size_t offset_bits =
1489-
boolbv_width(op.type()) - config.bv_encoding.object_bits;
1490-
std::size_t result_width=boolbv_width(expr.type());
1491-
1492-
// max extract width
1493-
if(offset_bits>result_width)
1494-
offset_bits=result_width;
1500+
boolbv_width.get_offset_width(to_pointer_type(op.type()));
1501+
std::size_t ext = boolbv_width(expr.type()) - offset_bits;
14951502

1496-
// too few bits?
1497-
if(result_width>offset_bits)
1498-
out << "((_ zero_extend " << result_width-offset_bits << ") ";
1503+
if(ext > 0)
1504+
out << "((_ zero_extend " << ext << ") ";
14991505

1500-
out << "((_ extract " << offset_bits-1 << " 0) ";
1506+
out << "((_ extract " << object_bits + offset_bits - 1 << " " << object_bits
1507+
<< ") ";
15011508
convert_expr(op);
15021509
out << ")";
15031510

1504-
if(result_width>offset_bits)
1511+
if(ext > 0)
15051512
out << ")"; // zero_extend
15061513
}
15071514
else if(expr.id()==ID_pointer_object)
@@ -1512,15 +1519,14 @@ void smt2_convt::convert_expr(const exprt &expr)
15121519
op.type().id() == ID_pointer,
15131520
"pointer object expressions should be of pointer type");
15141521

1515-
std::size_t ext=boolbv_width(expr.type())-config.bv_encoding.object_bits;
1516-
std::size_t pointer_width = boolbv_width(op.type());
1522+
std::size_t object_bits =
1523+
boolbv_width.get_object_width(to_pointer_type(op.type()));
1524+
std::size_t ext = boolbv_width(expr.type()) - object_bits;
15171525

15181526
if(ext>0)
15191527
out << "((_ zero_extend " << ext << ") ";
15201528

1521-
out << "((_ extract "
1522-
<< pointer_width-1 << " "
1523-
<< pointer_width-config.bv_encoding.object_bits << ") ";
1529+
out << "((_ extract " << object_bits - 1 << " 0) ";
15241530
convert_expr(op);
15251531
out << ")";
15261532

@@ -1533,14 +1539,13 @@ void smt2_convt::convert_expr(const exprt &expr)
15331539
}
15341540
else if(expr.id() == ID_is_invalid_pointer)
15351541
{
1536-
const auto &op = to_unary_expr(expr).op();
1537-
std::size_t pointer_width = boolbv_width(op.type());
1538-
out << "(= ((_ extract "
1539-
<< pointer_width-1 << " "
1540-
<< pointer_width-config.bv_encoding.object_bits << ") ";
1541-
convert_expr(op);
1542-
out << ") (_ bv" << pointer_logic.get_invalid_object()
1543-
<< " " << config.bv_encoding.object_bits << "))";
1542+
std::size_t object_bits = boolbv_width.get_object_width(
1543+
to_pointer_type(to_unary_expr(expr).op().type()));
1544+
1545+
out << "(= ((_ extract " << object_bits - 1 << " 0) ";
1546+
convert_expr(to_unary_expr(expr).op());
1547+
out << ") (_ bv" << pointer_logic.get_invalid_object() << " " << object_bits
1548+
<< "))";
15441549
}
15451550
else if(expr.id()==ID_string_constant)
15461551
{
@@ -3013,30 +3018,28 @@ void smt2_convt::convert_is_dynamic_object(const unary_exprt &expr)
30133018
std::vector<std::size_t> dynamic_objects;
30143019
pointer_logic.get_dynamic_objects(dynamic_objects);
30153020

3021+
std::size_t object_bits =
3022+
boolbv_width.get_object_width(to_pointer_type(expr.op().type()));
3023+
30163024
if(dynamic_objects.empty())
30173025
out << "false";
30183026
else
30193027
{
3020-
std::size_t pointer_width = boolbv_width(expr.op().type());
3021-
3022-
out << "(let ((?obj ((_ extract "
3023-
<< pointer_width-1 << " "
3024-
<< pointer_width-config.bv_encoding.object_bits << ") ";
3028+
out << "(let ((?obj ((_ extract " << object_bits << " 0) ";
30253029
convert_expr(expr.op());
30263030
out << "))) ";
30273031

30283032
if(dynamic_objects.size()==1)
30293033
{
3030-
out << "(= (_ bv" << dynamic_objects.front()
3031-
<< " " << config.bv_encoding.object_bits << ") ?obj)";
3034+
out << "(= (_ bv" << dynamic_objects.front() << " " << object_bits
3035+
<< ") ?obj)";
30323036
}
30333037
else
30343038
{
30353039
out << "(or";
30363040

30373041
for(const auto &object : dynamic_objects)
3038-
out << " (= (_ bv" << object
3039-
<< " " << config.bv_encoding.object_bits << ") ?obj)";
3042+
out << " (= (_ bv" << object << " " << object_bits << ") ?obj)";
30403043

30413044
out << ")"; // or
30423045
}

0 commit comments

Comments
 (0)