|
6 | 6 |
|
7 | 7 | \*******************************************************************/
|
8 | 8 |
|
| 9 | +#include <util/c_types.h> |
9 | 10 | #include <util/expr.h>
|
10 | 11 | #include <util/std_types.h>
|
11 | 12 | #include <util/std_expr.h>
|
12 | 13 | #include <util/arith_tools.h>
|
13 | 14 | #include <util/pointer_offset_size.h>
|
14 | 15 | #include <util/byte_operators.h>
|
15 | 16 | #include <util/namespace.h>
|
| 17 | +#include <util/replace_symbol.h> |
| 18 | +#include <util/simplify_expr.h> |
16 | 19 |
|
17 | 20 | #include "flatten_byte_operators.h"
|
18 | 21 |
|
19 | 22 | /*******************************************************************\
|
20 | 23 |
|
| 24 | +Function: unpack_rec |
| 25 | +
|
| 26 | + Inputs: |
| 27 | + src object to unpack |
| 28 | + little_endian true, iff assumed endianness is little-endian |
| 29 | + max_bytes if not nil, use as upper bound of the number of bytes |
| 30 | + to unpack |
| 31 | + ns namespace for type lookups |
| 32 | +
|
| 33 | + Outputs: array of bytes in the sequence found in memory |
| 34 | +
|
| 35 | + Purpose: rewrite an object into its individual bytes |
| 36 | +
|
| 37 | +\*******************************************************************/ |
| 38 | + |
| 39 | +static exprt unpack_rec( |
| 40 | + const exprt &src, |
| 41 | + bool little_endian, |
| 42 | + const exprt &max_bytes, |
| 43 | + const namespacet &ns, |
| 44 | + bool unpack_byte_array=false) |
| 45 | +{ |
| 46 | + array_exprt array( |
| 47 | + array_typet(unsignedbv_typet(8), from_integer(0, size_type()))); |
| 48 | + |
| 49 | + // endianness_mapt should be the point of reference for mapping out |
| 50 | + // endianness, but we need to work on elements here instead of |
| 51 | + // individual bits |
| 52 | + |
| 53 | + const typet &type=ns.follow(src.type()); |
| 54 | + |
| 55 | + if(type.id()==ID_array) |
| 56 | + { |
| 57 | + const array_typet &array_type=to_array_type(type); |
| 58 | + const typet &subtype=array_type.subtype(); |
| 59 | + |
| 60 | + mp_integer element_width=pointer_offset_bits(subtype, ns); |
| 61 | + // this probably doesn't really matter |
| 62 | + #if 0 |
| 63 | + if(element_width<=0) |
| 64 | + throw "cannot unpack array with non-constant element width:\n"+ |
| 65 | + type.pretty(); |
| 66 | + else if(element_width%8!=0) |
| 67 | + throw "cannot unpack array of non-byte aligned elements:\n"+ |
| 68 | + type.pretty(); |
| 69 | + #endif |
| 70 | + |
| 71 | + if(!unpack_byte_array && element_width==8) |
| 72 | + return src; |
| 73 | + |
| 74 | + mp_integer num_elements; |
| 75 | + if(to_integer(max_bytes, num_elements) && |
| 76 | + to_integer(array_type.size(), num_elements)) |
| 77 | + throw "cannot unpack array of non-const size:\n"+type.pretty(); |
| 78 | + |
| 79 | + // all array members will have the same structure; do this just |
| 80 | + // once and then replace the dummy symbol by a suitable index |
| 81 | + // expression in the loop below |
| 82 | + symbol_exprt dummy(ID_C_incomplete, subtype); |
| 83 | + exprt sub=unpack_rec(dummy, little_endian, max_bytes, ns, true); |
| 84 | + |
| 85 | + for(mp_integer i=0; i<num_elements; ++i) |
| 86 | + { |
| 87 | + index_exprt index(src, from_integer(i, index_type())); |
| 88 | + replace_symbolt replace; |
| 89 | + replace.insert(ID_C_incomplete, index); |
| 90 | + |
| 91 | + for(const auto &op : sub.operands()) |
| 92 | + { |
| 93 | + exprt new_op=op; |
| 94 | + replace(new_op); |
| 95 | + simplify(new_op, ns); |
| 96 | + array.copy_to_operands(new_op); |
| 97 | + } |
| 98 | + } |
| 99 | + } |
| 100 | + else if(type.id()==ID_struct) |
| 101 | + { |
| 102 | + const struct_typet &struct_type=to_struct_type(type); |
| 103 | + const struct_typet::componentst &components=struct_type.components(); |
| 104 | + |
| 105 | + for(const auto &comp : components) |
| 106 | + { |
| 107 | + mp_integer element_width=pointer_offset_bits(comp.type(), ns); |
| 108 | + |
| 109 | + // the next member would be misaligned, abort |
| 110 | + if(element_width<=0 || element_width%8!=0) |
| 111 | + throw "cannot unpack struct with non-byte aligned components:\n"+ |
| 112 | + struct_type.pretty(); |
| 113 | + |
| 114 | + member_exprt member(src, comp.get_name(), comp.type()); |
| 115 | + exprt sub=unpack_rec(member, little_endian, max_bytes, ns, true); |
| 116 | + |
| 117 | + for(const auto& op : sub.operands()) |
| 118 | + array.copy_to_operands(op); |
| 119 | + } |
| 120 | + } |
| 121 | + else if(type.id()!=ID_empty) |
| 122 | + { |
| 123 | + // a basic type; we turn that into logical right shift and |
| 124 | + // extractbits while considering endianness |
| 125 | + mp_integer bits=pointer_offset_bits(type, ns); |
| 126 | + if(bits<0) |
| 127 | + { |
| 128 | + if(to_integer(max_bytes, bits)) |
| 129 | + throw "cannot unpack object of non-constant width:\n"+ |
| 130 | + src.pretty(); |
| 131 | + else |
| 132 | + bits*=8; |
| 133 | + } |
| 134 | + |
| 135 | + // cast to generic bit-vector |
| 136 | + typecast_exprt src_tc(src, unsignedbv_typet(integer2size_t(bits))); |
| 137 | + |
| 138 | + for(mp_integer i=0; i<bits; i+=8) |
| 139 | + { |
| 140 | + lshr_exprt right_shift(src_tc, from_integer(i, index_type())); |
| 141 | + |
| 142 | + extractbits_exprt extractbits( |
| 143 | + right_shift, |
| 144 | + from_integer(7, index_type()), |
| 145 | + from_integer(0, index_type()), |
| 146 | + unsignedbv_typet(8)); |
| 147 | + |
| 148 | + if(little_endian) |
| 149 | + array.copy_to_operands(extractbits); |
| 150 | + else |
| 151 | + array.operands().insert(array.operands().begin(), extractbits); |
| 152 | + } |
| 153 | + } |
| 154 | + |
| 155 | + to_array_type(array.type()).size()= |
| 156 | + from_integer(array.operands().size(), size_type()); |
| 157 | + |
| 158 | + return array; |
| 159 | +} |
| 160 | + |
| 161 | +/*******************************************************************\ |
| 162 | +
|
21 | 163 | Function: flatten_byte_extract
|
22 | 164 |
|
23 | 165 | Inputs:
|
@@ -89,135 +231,133 @@ exprt flatten_byte_extract(
|
89 | 231 | else
|
90 | 232 | assert(false);
|
91 | 233 |
|
92 |
| - mp_integer size_bits=pointer_offset_bits(src.type(), ns); |
93 |
| - if(size_bits<0) |
94 |
| - throw "byte_extract flatting with non-constant size: "+src.pretty(); |
| 234 | + // determine an upper bound of the number of bytes we might need |
| 235 | + exprt upper_bound=size_of_expr(src.type(), ns); |
| 236 | + if(upper_bound.is_not_nil()) |
| 237 | + upper_bound= |
| 238 | + simplify_expr( |
| 239 | + plus_exprt( |
| 240 | + upper_bound, |
| 241 | + typecast_exprt(src.offset(), upper_bound.type())), |
| 242 | + ns); |
95 | 243 |
|
96 |
| - if(src.op0().type().id()==ID_array) |
97 |
| - { |
98 |
| - const exprt &root=src.op0(); |
99 |
| - const exprt &offset=src.op1(); |
| 244 | + byte_extract_exprt unpacked(src); |
| 245 | + unpacked.op()= |
| 246 | + unpack_rec(src.op(), little_endian, upper_bound, ns); |
100 | 247 |
|
101 |
| - const array_typet &array_type=to_array_type(root.type()); |
| 248 | + const typet &type=ns.follow(src.type()); |
| 249 | + |
| 250 | + if(type.id()==ID_array) |
| 251 | + { |
| 252 | + const array_typet &array_type=to_array_type(type); |
102 | 253 | const typet &subtype=array_type.subtype();
|
103 | 254 |
|
104 | 255 | mp_integer element_width=pointer_offset_bits(subtype, ns);
|
105 |
| - if(element_width<0) // failed |
106 |
| - throw "failed to flatten array with unknown element width"; |
107 |
| - |
108 |
| - mp_integer num_elements= |
109 |
| - size_bits/element_width+((size_bits%element_width==0)?0:1); |
110 |
| - |
111 |
| - const typet &offset_type=ns.follow(offset.type()); |
112 |
| - |
113 |
| - // byte-array? |
114 |
| - if(element_width==8) |
| 256 | + mp_integer num_elements; |
| 257 | + // TODO: consider ways of dealing with arrays of unknown subtype |
| 258 | + // size or with a subtype size that does not fit byte boundaries |
| 259 | + if(element_width>0 && element_width%8==0 && |
| 260 | + to_integer(array_type.size(), num_elements)) |
115 | 261 | {
|
116 |
| - // get 'width'-many bytes, and concatenate |
117 |
| - std::size_t width_bytes=integer2unsigned(num_elements); |
118 |
| - exprt::operandst op; |
119 |
| - op.reserve(width_bytes); |
| 262 | + array_exprt array(array_type); |
120 | 263 |
|
121 |
| - for(std::size_t i=0; i<width_bytes; i++) |
| 264 | + for(mp_integer i=0; i<num_elements; ++i) |
122 | 265 | {
|
123 |
| - // the most significant byte comes first in the concatenation! |
124 |
| - std::size_t offset_int= |
125 |
| - little_endian?(width_bytes-i-1):i; |
| 266 | + plus_exprt new_offset( |
| 267 | + unpacked.offset(), |
| 268 | + from_integer(i*element_width, unpacked.offset().type())); |
126 | 269 |
|
127 |
| - plus_exprt offset_i(from_integer(offset_int, offset_type), offset); |
128 |
| - index_exprt index_expr(root, offset_i); |
129 |
| - op.push_back(index_expr); |
130 |
| - } |
| 270 | + byte_extract_exprt tmp(unpacked); |
| 271 | + tmp.type()=subtype; |
| 272 | + tmp.offset()=simplify_expr(new_offset, ns); |
131 | 273 |
|
132 |
| - // TODO this doesn't seem correct if size_bits%8!=0 as more |
133 |
| - // bits than the original expression will be returned. |
134 |
| - if(width_bytes==1) |
135 |
| - return op.front(); |
136 |
| - else // width_bytes>=2 |
137 |
| - { |
138 |
| - concatenation_exprt concatenation(src.type()); |
139 |
| - concatenation.operands().swap(op); |
140 |
| - return concatenation; |
| 274 | + array.copy_to_operands(flatten_byte_extract(tmp, ns)); |
141 | 275 | }
|
142 |
| - } |
143 |
| - else // non-byte array |
144 |
| - { |
145 |
| - // add an extra element as the access need not be aligned with |
146 |
| - // element boundaries and could thus stretch over extra elements |
147 |
| - ++num_elements; |
148 | 276 |
|
149 |
| - assert(element_width!=0); |
| 277 | + return array; |
| 278 | + } |
| 279 | + } |
| 280 | + else if(type.id()==ID_struct) |
| 281 | + { |
| 282 | + const struct_typet &struct_type=to_struct_type(type); |
| 283 | + const struct_typet::componentst &components=struct_type.components(); |
150 | 284 |
|
151 |
| - // compute new root and offset |
152 |
| - concatenation_exprt concat( |
153 |
| - unsignedbv_typet(integer2unsigned(element_width*num_elements))); |
| 285 | + bool failed=false; |
| 286 | + struct_exprt s(struct_type); |
154 | 287 |
|
155 |
| - assert(element_width%8==0); |
156 |
| - exprt first_index= |
157 |
| - div_exprt(offset, from_integer(element_width/8, offset_type)); |
| 288 | + for(const auto &comp : components) |
| 289 | + { |
| 290 | + mp_integer element_width=pointer_offset_bits(comp.type(), ns); |
158 | 291 |
|
159 |
| - // byte extract will do the appropriate mapping, thus MSB comes |
160 |
| - // last here (as opposed to the above, where no further byte |
161 |
| - // extract is involved) |
162 |
| - for(mp_integer i=0; i<num_elements; ++i) |
| 292 | + // the next member would be misaligned, abort |
| 293 | + if(element_width<=0 || element_width%8!=0) |
163 | 294 | {
|
164 |
| - // the most significant byte comes first in the concatenation! |
165 |
| - mp_integer index_offset= |
166 |
| - little_endian?(num_elements-i-1):i; |
167 |
| - |
168 |
| - plus_exprt index(first_index, from_integer(index_offset, offset_type)); |
169 |
| - concat.copy_to_operands(index_exprt(root, index)); |
| 295 | + failed=true; |
| 296 | + break; |
170 | 297 | }
|
171 | 298 |
|
172 |
| - // the new offset is offset%width |
173 |
| - mod_exprt new_offset(offset, |
174 |
| - from_integer(element_width/8, offset_type)); |
175 |
| - |
176 |
| - // build new byte-extract expression |
177 |
| - byte_extract_exprt tmp(src); |
178 |
| - tmp.op()=concat; |
179 |
| - tmp.offset()=new_offset; |
| 299 | + plus_exprt new_offset( |
| 300 | + unpacked.offset(), |
| 301 | + typecast_exprt( |
| 302 | + member_offset_expr(struct_type, comp.get_name(), ns), |
| 303 | + unpacked.offset().type())); |
180 | 304 |
|
181 |
| - return tmp; |
| 305 | + byte_extract_exprt tmp(unpacked); |
| 306 | + tmp.type()=comp.type(); |
| 307 | + tmp.offset()=simplify_expr(new_offset, ns); |
182 | 308 | }
|
| 309 | + |
| 310 | + if(!failed) |
| 311 | + return s; |
183 | 312 | }
|
184 |
| - else // non-array |
185 |
| - { |
186 |
| - mp_integer op0_bits=pointer_offset_bits(src.op0().type(), ns); |
187 |
| - if(op0_bits<0) |
188 |
| - throw "byte_extract flatting of non-constant source size: "+src.pretty(); |
189 | 313 |
|
190 |
| - // We turn that into logical right shift and extractbits |
| 314 | + const exprt &root=unpacked.op(); |
| 315 | + const exprt &offset=unpacked.offset(); |
191 | 316 |
|
192 |
| - const exprt &offset=src.op1(); |
193 |
| - const typet &offset_type=ns.follow(offset.type()); |
| 317 | + const array_typet &array_type=to_array_type(root.type()); |
| 318 | + const typet &subtype=array_type.subtype(); |
194 | 319 |
|
195 |
| - // adjust for endianness |
196 |
| - exprt adjusted_offset; |
| 320 | + assert(pointer_offset_bits(subtype, ns)==8); |
197 | 321 |
|
198 |
| - if(little_endian) |
199 |
| - adjusted_offset=offset; |
| 322 | + mp_integer size_bits=pointer_offset_bits(unpacked.type(), ns); |
| 323 | + if(size_bits<0) |
| 324 | + { |
| 325 | + mp_integer op0_bits=pointer_offset_bits(unpacked.op().type(), ns); |
| 326 | + if(op0_bits<0) |
| 327 | + throw "byte_extract flatting with non-constant size:\n"+ |
| 328 | + unpacked.pretty(); |
200 | 329 | else
|
201 |
| - { |
202 |
| - exprt width_constant=from_integer(op0_bits/8-1, offset_type); |
203 |
| - adjusted_offset=minus_exprt(width_constant, offset); |
204 |
| - } |
| 330 | + size_bits=op0_bits; |
| 331 | + } |
205 | 332 |
|
206 |
| - mult_exprt times_eight(adjusted_offset, from_integer(8, offset_type)); |
| 333 | + mp_integer num_elements= |
| 334 | + size_bits/8+((size_bits%8==0)?0:1); |
207 | 335 |
|
208 |
| - // cast to generic bit-vector |
209 |
| - std::size_t op0_width=integer2unsigned(op0_bits); |
210 |
| - typecast_exprt src_op0_tc(src.op0(), bv_typet(op0_width)); |
211 |
| - lshr_exprt left_shift(src_op0_tc, times_eight); |
| 336 | + const typet &offset_type=ns.follow(offset.type()); |
212 | 337 |
|
213 |
| - extractbits_exprt extractbits; |
| 338 | + // get 'width'-many bytes, and concatenate |
| 339 | + std::size_t width_bytes=integer2unsigned(num_elements); |
| 340 | + exprt::operandst op; |
| 341 | + op.reserve(width_bytes); |
214 | 342 |
|
215 |
| - extractbits.src()=left_shift; |
216 |
| - extractbits.type()=src.type(); |
217 |
| - extractbits.upper()=from_integer(size_bits-1, offset_type); |
218 |
| - extractbits.lower()=from_integer(0, offset_type); |
| 343 | + for(std::size_t i=0; i<width_bytes; i++) |
| 344 | + { |
| 345 | + // the most significant byte comes first in the concatenation! |
| 346 | + std::size_t offset_int= |
| 347 | + little_endian?(width_bytes-i-1):i; |
| 348 | + |
| 349 | + plus_exprt offset_i(from_integer(offset_int, offset_type), offset); |
| 350 | + index_exprt index_expr(root, offset_i); |
| 351 | + op.push_back(index_expr); |
| 352 | + } |
219 | 353 |
|
220 |
| - return extractbits; |
| 354 | + if(width_bytes==1) |
| 355 | + return simplify_expr(typecast_exprt(op.front(), src.type()), ns); |
| 356 | + else // width_bytes>=2 |
| 357 | + { |
| 358 | + concatenation_exprt concatenation(src.type()); |
| 359 | + concatenation.operands().swap(op); |
| 360 | + return concatenation; |
221 | 361 | }
|
222 | 362 | }
|
223 | 363 |
|
|
0 commit comments