Skip to content

Commit 237d8be

Browse files
committed
Fix and extend byte extract flattening
Properly handle byte_extract from arrays or structs, and also support the case where at least one of source or target size are known. This is a best-effort attempt: there could be out-of-bounds accesses in the program under scrutiny, which will be ignored. Finally ensure that flatten_byte_extract never returns another byte_extract.
1 parent 91aef54 commit 237d8be

File tree

1 file changed

+238
-98
lines changed

1 file changed

+238
-98
lines changed

src/solvers/flattening/flatten_byte_operators.cpp

+238-98
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,160 @@ Author: Daniel Kroening, [email protected]
66
77
\*******************************************************************/
88

9+
#include <util/c_types.h>
910
#include <util/expr.h>
1011
#include <util/std_types.h>
1112
#include <util/std_expr.h>
1213
#include <util/arith_tools.h>
1314
#include <util/pointer_offset_size.h>
1415
#include <util/byte_operators.h>
1516
#include <util/namespace.h>
17+
#include <util/replace_symbol.h>
18+
#include <util/simplify_expr.h>
1619

1720
#include "flatten_byte_operators.h"
1821

1922
/*******************************************************************\
2023
24+
Function: unpack_rec
25+
26+
Inputs:
27+
src object to unpack
28+
little_endian true, iff assumed endianness is little-endian
29+
max_bytes if not nil, use as upper bound of the number of bytes
30+
to unpack
31+
ns namespace for type lookups
32+
33+
Outputs: array of bytes in the sequence found in memory
34+
35+
Purpose: rewrite an object into its individual bytes
36+
37+
\*******************************************************************/
38+
39+
static exprt unpack_rec(
40+
const exprt &src,
41+
bool little_endian,
42+
const exprt &max_bytes,
43+
const namespacet &ns,
44+
bool unpack_byte_array=false)
45+
{
46+
array_exprt array(
47+
array_typet(unsignedbv_typet(8), from_integer(0, size_type())));
48+
49+
// endianness_mapt should be the point of reference for mapping out
50+
// endianness, but we need to work on elements here instead of
51+
// individual bits
52+
53+
const typet &type=ns.follow(src.type());
54+
55+
if(type.id()==ID_array)
56+
{
57+
const array_typet &array_type=to_array_type(type);
58+
const typet &subtype=array_type.subtype();
59+
60+
mp_integer element_width=pointer_offset_bits(subtype, ns);
61+
// this probably doesn't really matter
62+
#if 0
63+
if(element_width<=0)
64+
throw "cannot unpack array with non-constant element width:\n"+
65+
type.pretty();
66+
else if(element_width%8!=0)
67+
throw "cannot unpack array of non-byte aligned elements:\n"+
68+
type.pretty();
69+
#endif
70+
71+
if(!unpack_byte_array && element_width==8)
72+
return src;
73+
74+
mp_integer num_elements;
75+
if(to_integer(max_bytes, num_elements) &&
76+
to_integer(array_type.size(), num_elements))
77+
throw "cannot unpack array of non-const size:\n"+type.pretty();
78+
79+
// all array members will have the same structure; do this just
80+
// once and then replace the dummy symbol by a suitable index
81+
// expression in the loop below
82+
symbol_exprt dummy(ID_C_incomplete, subtype);
83+
exprt sub=unpack_rec(dummy, little_endian, max_bytes, ns, true);
84+
85+
for(mp_integer i=0; i<num_elements; ++i)
86+
{
87+
index_exprt index(src, from_integer(i, index_type()));
88+
replace_symbolt replace;
89+
replace.insert(ID_C_incomplete, index);
90+
91+
for(const auto &op : sub.operands())
92+
{
93+
exprt new_op=op;
94+
replace(new_op);
95+
simplify(new_op, ns);
96+
array.copy_to_operands(new_op);
97+
}
98+
}
99+
}
100+
else if(type.id()==ID_struct)
101+
{
102+
const struct_typet &struct_type=to_struct_type(type);
103+
const struct_typet::componentst &components=struct_type.components();
104+
105+
for(const auto &comp : components)
106+
{
107+
mp_integer element_width=pointer_offset_bits(comp.type(), ns);
108+
109+
// the next member would be misaligned, abort
110+
if(element_width<=0 || element_width%8!=0)
111+
throw "cannot unpack struct with non-byte aligned components:\n"+
112+
struct_type.pretty();
113+
114+
member_exprt member(src, comp.get_name(), comp.type());
115+
exprt sub=unpack_rec(member, little_endian, max_bytes, ns, true);
116+
117+
for(const auto& op : sub.operands())
118+
array.copy_to_operands(op);
119+
}
120+
}
121+
else if(type.id()!=ID_empty)
122+
{
123+
// a basic type; we turn that into logical right shift and
124+
// extractbits while considering endianness
125+
mp_integer bits=pointer_offset_bits(type, ns);
126+
if(bits<0)
127+
{
128+
if(to_integer(max_bytes, bits))
129+
throw "cannot unpack object of non-constant width:\n"+
130+
src.pretty();
131+
else
132+
bits*=8;
133+
}
134+
135+
// cast to generic bit-vector
136+
typecast_exprt src_tc(src, unsignedbv_typet(integer2size_t(bits)));
137+
138+
for(mp_integer i=0; i<bits; i+=8)
139+
{
140+
lshr_exprt right_shift(src_tc, from_integer(i, index_type()));
141+
142+
extractbits_exprt extractbits(
143+
right_shift,
144+
from_integer(7, index_type()),
145+
from_integer(0, index_type()),
146+
unsignedbv_typet(8));
147+
148+
if(little_endian)
149+
array.copy_to_operands(extractbits);
150+
else
151+
array.operands().insert(array.operands().begin(), extractbits);
152+
}
153+
}
154+
155+
to_array_type(array.type()).size()=
156+
from_integer(array.operands().size(), size_type());
157+
158+
return array;
159+
}
160+
161+
/*******************************************************************\
162+
21163
Function: flatten_byte_extract
22164
23165
Inputs:
@@ -89,135 +231,133 @@ exprt flatten_byte_extract(
89231
else
90232
assert(false);
91233

92-
mp_integer size_bits=pointer_offset_bits(src.type(), ns);
93-
if(size_bits<0)
94-
throw "byte_extract flatting with non-constant size: "+src.pretty();
234+
// determine an upper bound of the number of bytes we might need
235+
exprt upper_bound=size_of_expr(src.type(), ns);
236+
if(upper_bound.is_not_nil())
237+
upper_bound=
238+
simplify_expr(
239+
plus_exprt(
240+
upper_bound,
241+
typecast_exprt(src.offset(), upper_bound.type())),
242+
ns);
95243

96-
if(src.op0().type().id()==ID_array)
97-
{
98-
const exprt &root=src.op0();
99-
const exprt &offset=src.op1();
244+
byte_extract_exprt unpacked(src);
245+
unpacked.op()=
246+
unpack_rec(src.op(), little_endian, upper_bound, ns);
100247

101-
const array_typet &array_type=to_array_type(root.type());
248+
const typet &type=ns.follow(src.type());
249+
250+
if(type.id()==ID_array)
251+
{
252+
const array_typet &array_type=to_array_type(type);
102253
const typet &subtype=array_type.subtype();
103254

104255
mp_integer element_width=pointer_offset_bits(subtype, ns);
105-
if(element_width<0) // failed
106-
throw "failed to flatten array with unknown element width";
107-
108-
mp_integer num_elements=
109-
size_bits/element_width+((size_bits%element_width==0)?0:1);
110-
111-
const typet &offset_type=ns.follow(offset.type());
112-
113-
// byte-array?
114-
if(element_width==8)
256+
mp_integer num_elements;
257+
// TODO: consider ways of dealing with arrays of unknown subtype
258+
// size or with a subtype size that does not fit byte boundaries
259+
if(element_width>0 && element_width%8==0 &&
260+
to_integer(array_type.size(), num_elements))
115261
{
116-
// get 'width'-many bytes, and concatenate
117-
std::size_t width_bytes=integer2unsigned(num_elements);
118-
exprt::operandst op;
119-
op.reserve(width_bytes);
262+
array_exprt array(array_type);
120263

121-
for(std::size_t i=0; i<width_bytes; i++)
264+
for(mp_integer i=0; i<num_elements; ++i)
122265
{
123-
// the most significant byte comes first in the concatenation!
124-
std::size_t offset_int=
125-
little_endian?(width_bytes-i-1):i;
266+
plus_exprt new_offset(
267+
unpacked.offset(),
268+
from_integer(i*element_width, unpacked.offset().type()));
126269

127-
plus_exprt offset_i(from_integer(offset_int, offset_type), offset);
128-
index_exprt index_expr(root, offset_i);
129-
op.push_back(index_expr);
130-
}
270+
byte_extract_exprt tmp(unpacked);
271+
tmp.type()=subtype;
272+
tmp.offset()=simplify_expr(new_offset, ns);
131273

132-
// TODO this doesn't seem correct if size_bits%8!=0 as more
133-
// bits than the original expression will be returned.
134-
if(width_bytes==1)
135-
return op.front();
136-
else // width_bytes>=2
137-
{
138-
concatenation_exprt concatenation(src.type());
139-
concatenation.operands().swap(op);
140-
return concatenation;
274+
array.copy_to_operands(flatten_byte_extract(tmp, ns));
141275
}
142-
}
143-
else // non-byte array
144-
{
145-
// add an extra element as the access need not be aligned with
146-
// element boundaries and could thus stretch over extra elements
147-
++num_elements;
148276

149-
assert(element_width!=0);
277+
return array;
278+
}
279+
}
280+
else if(type.id()==ID_struct)
281+
{
282+
const struct_typet &struct_type=to_struct_type(type);
283+
const struct_typet::componentst &components=struct_type.components();
150284

151-
// compute new root and offset
152-
concatenation_exprt concat(
153-
unsignedbv_typet(integer2unsigned(element_width*num_elements)));
285+
bool failed=false;
286+
struct_exprt s(struct_type);
154287

155-
assert(element_width%8==0);
156-
exprt first_index=
157-
div_exprt(offset, from_integer(element_width/8, offset_type));
288+
for(const auto &comp : components)
289+
{
290+
mp_integer element_width=pointer_offset_bits(comp.type(), ns);
158291

159-
// byte extract will do the appropriate mapping, thus MSB comes
160-
// last here (as opposed to the above, where no further byte
161-
// extract is involved)
162-
for(mp_integer i=0; i<num_elements; ++i)
292+
// the next member would be misaligned, abort
293+
if(element_width<=0 || element_width%8!=0)
163294
{
164-
// the most significant byte comes first in the concatenation!
165-
mp_integer index_offset=
166-
little_endian?(num_elements-i-1):i;
167-
168-
plus_exprt index(first_index, from_integer(index_offset, offset_type));
169-
concat.copy_to_operands(index_exprt(root, index));
295+
failed=true;
296+
break;
170297
}
171298

172-
// the new offset is offset%width
173-
mod_exprt new_offset(offset,
174-
from_integer(element_width/8, offset_type));
175-
176-
// build new byte-extract expression
177-
byte_extract_exprt tmp(src);
178-
tmp.op()=concat;
179-
tmp.offset()=new_offset;
299+
plus_exprt new_offset(
300+
unpacked.offset(),
301+
typecast_exprt(
302+
member_offset_expr(struct_type, comp.get_name(), ns),
303+
unpacked.offset().type()));
180304

181-
return tmp;
305+
byte_extract_exprt tmp(unpacked);
306+
tmp.type()=comp.type();
307+
tmp.offset()=simplify_expr(new_offset, ns);
182308
}
309+
310+
if(!failed)
311+
return s;
183312
}
184-
else // non-array
185-
{
186-
mp_integer op0_bits=pointer_offset_bits(src.op0().type(), ns);
187-
if(op0_bits<0)
188-
throw "byte_extract flatting of non-constant source size: "+src.pretty();
189313

190-
// We turn that into logical right shift and extractbits
314+
const exprt &root=unpacked.op();
315+
const exprt &offset=unpacked.offset();
191316

192-
const exprt &offset=src.op1();
193-
const typet &offset_type=ns.follow(offset.type());
317+
const array_typet &array_type=to_array_type(root.type());
318+
const typet &subtype=array_type.subtype();
194319

195-
// adjust for endianness
196-
exprt adjusted_offset;
320+
assert(pointer_offset_bits(subtype, ns)==8);
197321

198-
if(little_endian)
199-
adjusted_offset=offset;
322+
mp_integer size_bits=pointer_offset_bits(unpacked.type(), ns);
323+
if(size_bits<0)
324+
{
325+
mp_integer op0_bits=pointer_offset_bits(unpacked.op().type(), ns);
326+
if(op0_bits<0)
327+
throw "byte_extract flatting with non-constant size:\n"+
328+
unpacked.pretty();
200329
else
201-
{
202-
exprt width_constant=from_integer(op0_bits/8-1, offset_type);
203-
adjusted_offset=minus_exprt(width_constant, offset);
204-
}
330+
size_bits=op0_bits;
331+
}
205332

206-
mult_exprt times_eight(adjusted_offset, from_integer(8, offset_type));
333+
mp_integer num_elements=
334+
size_bits/8+((size_bits%8==0)?0:1);
207335

208-
// cast to generic bit-vector
209-
std::size_t op0_width=integer2unsigned(op0_bits);
210-
typecast_exprt src_op0_tc(src.op0(), bv_typet(op0_width));
211-
lshr_exprt left_shift(src_op0_tc, times_eight);
336+
const typet &offset_type=ns.follow(offset.type());
212337

213-
extractbits_exprt extractbits;
338+
// get 'width'-many bytes, and concatenate
339+
std::size_t width_bytes=integer2unsigned(num_elements);
340+
exprt::operandst op;
341+
op.reserve(width_bytes);
214342

215-
extractbits.src()=left_shift;
216-
extractbits.type()=src.type();
217-
extractbits.upper()=from_integer(size_bits-1, offset_type);
218-
extractbits.lower()=from_integer(0, offset_type);
343+
for(std::size_t i=0; i<width_bytes; i++)
344+
{
345+
// the most significant byte comes first in the concatenation!
346+
std::size_t offset_int=
347+
little_endian?(width_bytes-i-1):i;
348+
349+
plus_exprt offset_i(from_integer(offset_int, offset_type), offset);
350+
index_exprt index_expr(root, offset_i);
351+
op.push_back(index_expr);
352+
}
219353

220-
return extractbits;
354+
if(width_bytes==1)
355+
return simplify_expr(typecast_exprt(op.front(), src.type()), ns);
356+
else // width_bytes>=2
357+
{
358+
concatenation_exprt concatenation(src.type());
359+
concatenation.operands().swap(op);
360+
return concatenation;
221361
}
222362
}
223363

0 commit comments

Comments
 (0)