@@ -22,9 +22,60 @@ instrumentation_props data structure.
22
22
\*******************************************************************/
23
23
24
24
#include < taint-slicer/instrumentation_props.h>
25
+ #include < goto-programs/remove_returns.h>
26
+ #include < java_bytecode/java_types.h>
27
+ #include < java_bytecode/expr2java.h>
28
+ #include < util/prefix.h>
25
29
#include < util/msgstream.h>
26
30
#include < deque>
27
31
32
+ bool is_primitive_type (const typet &type)
33
+ {
34
+ return type==java_boolean_type () ||
35
+ type==java_byte_type () ||
36
+ type==java_char_type () ||
37
+ type==java_short_type () ||
38
+ type==java_int_type () ||
39
+ type==java_long_type () ||
40
+ type==java_float_type () ||
41
+ type==java_double_type ();
42
+ }
43
+
44
+ std::string unwrap_type_name (const typet &type, const namespacet &ns)
45
+ {
46
+ if (type.id ()==ID_pointer)
47
+ return unwrap_type_name (to_pointer_type (type).subtype (), ns);
48
+ if (type.id ()==ID_array)
49
+ return unwrap_type_name (to_array_type (type).subtype (), ns);
50
+ if (type.id ()==ID_symbol)
51
+ return as_string (to_symbol_type (type).get_identifier ());
52
+ const std::string result=type2java (type, ns);
53
+ if (!is_primitive_type (type))
54
+ throw std::logic_error (
55
+ " ERROR: expecting a primitive data type, but received '" + result + " '." );
56
+ return result;
57
+ }
58
+
59
+ bool is_java_array_type_name (const std::string &datatype)
60
+ {
61
+ return has_prefix (datatype, " java::array[" ) && *datatype.rbegin ()==' ]' ;
62
+ }
63
+
64
+ bool does_instrumentation_of_type_require_subclass (
65
+ const std::string &datatype, const typet &type)
66
+ {
67
+ return is_primitive_type (type) ||
68
+ is_java_array_type_name (datatype) ||
69
+ datatype==" java::java.lang.Object" ;
70
+ }
71
+
72
+ static typet get_return_type_from_function_name (
73
+ const std::string &fname, const symbol_tablet &symbol_table)
74
+ {
75
+ assert (symbol_table.has_symbol (fname+RETURN_VALUE_SUFFIX));
76
+ return symbol_table.lookup (fname+RETURN_VALUE_SUFFIX).type ;
77
+ }
78
+
28
79
/* ******************************************************************\
29
80
30
81
Function: perform_BFS
@@ -72,6 +123,7 @@ static void perform_BFS(
72
123
73
124
taint_instrumentation_propst::taint_instrumentation_propst (
74
125
const taint_propagation_chainst &chains,
126
+ const taint_programt &program,
75
127
const taint_function_idt &_root,
76
128
const std::set<taint_function_idt> &in_functions,
77
129
const std::set<taint_function_idt> &in_suppressed)
@@ -107,14 +159,96 @@ taint_instrumentation_propst::taint_instrumentation_propst(
107
159
sinks.insert (location_props.size ());
108
160
location_props.push_back (chains.get_nodes ().at (nid));
109
161
}
162
+
163
+ build_map_from_typenames_to_tokennames (chains, program);
164
+ }
165
+
166
+
167
+ /* ******************************************************************\
168
+
169
+ Function: build_map_from_typenames_to_tokennames
170
+
171
+ Inputs:
172
+
173
+ Outputs:
174
+
175
+ Purpose:
176
+ The function fills in the member map "datatypes" so that for each
177
+ data type (identified by its type name, see "unwrap_type_name") to be
178
+ instrumented by a shadow variable there is computed an instance of
179
+ "taint_instrumentation_propst::datatype_infot" type holding details
180
+ about the instrumentation of the shadow variable into that type.
181
+
182
+ \*******************************************************************/
183
+ void taint_instrumentation_propst::build_map_from_typenames_to_tokennames (
184
+ const taint_propagation_chainst &chains,
185
+ const taint_programt &program)
186
+ {
187
+ for (const auto &loc : get_location_props ())
188
+ {
189
+ const goto_programt::instructiont &I=*loc.get_instruction_id ();
190
+ assert (I.type ==FUNCTION_CALL);
191
+ const code_function_callt &fn_call = to_code_function_call (I.code );
192
+ const exprt &callee_expr = fn_call.function ();
193
+ assert (callee_expr.id ()==ID_symbol);
194
+ irep_idt callee_id = to_symbol_expr (callee_expr).get_identifier ();
195
+ const std::string callee_ident = as_string (callee_id);
196
+ const code_typet &fn_type =
197
+ program.get_functions ().function_map .at (callee_id).type ;
198
+
199
+ std::set<argidx_and_tokennamet> to_process;
200
+ for (const auto &elem : loc.get_assumption ())
201
+ to_process.insert (elem);
202
+ for (const auto &elem : loc.get_turn_on ())
203
+ to_process.insert (elem);
204
+ for (const auto &elem : loc.get_turn_off ())
205
+ to_process.insert (elem);
206
+
207
+ for (const auto & arg_token : to_process)
208
+ {
209
+ assert (
210
+ arg_token.get_argidx ()!=
211
+ taint_tokens_propagation_grapht::get_void_loc ());
212
+ std::string datatype;
213
+ typet type;
214
+ if (arg_token.get_argidx ()==-1 ) // Return value?
215
+ type=get_return_type_from_function_name (
216
+ callee_ident,
217
+ program.get_symbol_table ());
218
+ else
219
+ type=fn_type.parameters ().at (arg_token.get_argidx ()).type ();
220
+ datatype=unwrap_type_name (type, program.get_namespace ());
221
+ auto it=datatypes.find (datatype);
222
+ if (it!=datatypes.end ())
223
+ {
224
+ assert (
225
+ does_instrumentation_of_type_require_subclass (datatype, type)==
226
+ it->second .subclass_required () &&
227
+ is_primitive_type (type)==it->second .is_primitive ());
228
+ it->second .add_token (arg_token.get_token_name ());
229
+ }
230
+ else
231
+ {
232
+ datatypes.insert (
233
+ {
234
+ datatype,
235
+ {
236
+ datatype,
237
+ type,
238
+ does_instrumentation_of_type_require_subclass (datatype, type),
239
+ is_primitive_type (type),
240
+ { arg_token.get_token_name () }
241
+ }
242
+ });
243
+ }
244
+ }
245
+ }
110
246
}
111
247
248
+
112
249
void taint_build_instrumentation_props (
113
250
const taint_propagation_chainst &chains,
114
- const taint_tokens_propagation_grapht &tokens_propagation_graph,
115
- const goto_functionst &program_functions,
116
- const call_grapht &call_graph,
117
- const call_grapht &inverted_call_graph,
251
+ const taint_programt &program,
118
252
std::vector<taint_instrumentation_propst> &output)
119
253
{
120
254
// First we collect all functions mentioned in the graph of chains.
@@ -137,7 +271,8 @@ void taint_build_instrumentation_props(
137
271
// all of them in the call graph; a nearest common caller has no
138
272
// callees which are themselves common callers.
139
273
std::set<irep_idt> roots;
140
- find_nearest_common_callees (inverted_call_graph, functions, roots);
274
+ find_nearest_common_callees (
275
+ program.get_inverted_call_graph (), functions, roots);
141
276
142
277
for (const auto &root : roots)
143
278
{
@@ -150,7 +285,8 @@ void taint_build_instrumentation_props(
150
285
std::set<taint_function_idt> suppressed;
151
286
{
152
287
// First we called all callees including those which should be suppressed.
153
- find_direct_or_indirect_callees_of_function (call_graph, root, callees);
288
+ find_direct_or_indirect_callees_of_function (
289
+ program.get_call_graph (), root, callees);
154
290
// Now we compute suppressed functions and erase them from the callees
155
291
// computed above. We do so in 3 steps.
156
292
// Step 1: We collect functions which definitelly should be suppressed.
@@ -165,8 +301,8 @@ void taint_build_instrumentation_props(
165
301
const std::string full_function_name=as_string (to_symbol_expr (
166
302
to_code_function_call (I.code ).function ()).get_identifier ());
167
303
if (callees.count (full_function_name)!=0UL &&
168
- program_functions .function_map .at (full_function_name)
169
- .body_available ())
304
+ program. get_functions () .function_map .at (full_function_name)
305
+ .body_available ())
170
306
{
171
307
suppressed.insert (full_function_name);
172
308
}
@@ -178,7 +314,7 @@ void taint_build_instrumentation_props(
178
314
std::unordered_set<irep_idt, dstring_hash> suppressions;
179
315
for (const auto &fn : suppressed)
180
316
find_direct_or_indirect_callees_of_function (
181
- call_graph , fn, suppressions);
317
+ program. get_call_graph () , fn, suppressions);
182
318
// Step 3: We copy from "suppressions" to "suppressed" each function
183
319
// reachable from the root without passing through any function
184
320
// collected in the step 1.
@@ -187,7 +323,8 @@ void taint_build_instrumentation_props(
187
323
std::unordered_set<irep_idt, dstring_hash> ignored_functions (
188
324
suppressed.cbegin (), suppressed.cend ());
189
325
if (!exists_direct_or_indirect_call (
190
- call_graph, root, *suppressions.cbegin (), ignored_functions))
326
+ program.get_call_graph (), root, *suppressions.cbegin (),
327
+ ignored_functions))
191
328
{
192
329
suppressed.insert (as_string (*suppressions.cbegin ()));
193
330
callees.erase (*suppressions.cbegin ());
@@ -202,13 +339,14 @@ void taint_build_instrumentation_props(
202
339
// table.)
203
340
std::set<taint_function_idt> available_functions;
204
341
for (const auto &fn : callees)
205
- if (program_functions .function_map .at (fn).body_available ())
342
+ if (program. get_functions () .function_map .at (fn).body_available ())
206
343
available_functions.insert (as_string (fn));
207
344
// We are ready to create the instrumentation props for the root function.
208
345
// This may fail, if the set of the corresponding chains is empty.
209
346
const taint_instrumentation_propst props
210
347
{
211
348
chains,
349
+ program,
212
350
as_string (root),
213
351
available_functions,
214
352
suppressed
@@ -220,6 +358,24 @@ void taint_build_instrumentation_props(
220
358
221
359
void dump_as_json (const taint_instrumentation_propst &props, json_objectt &out)
222
360
{
361
+ {
362
+ json_arrayt out_types;
363
+ for (const auto &elem : props.get_datatypes ())
364
+ {
365
+ json_arrayt out_tokens;
366
+ for (const auto &name : elem.second .get_tokens ())
367
+ out_tokens.push_back (json_stringt (msgstream () << name));
368
+ json_objectt out_type_props;
369
+ out_type_props[" type_name" ]=json_stringt (elem.first );
370
+ out_type_props[" shadow_vars" ]=out_tokens;
371
+ out_type_props[" make_subclass" ]=jsont::json_boolean (
372
+ elem.second .subclass_required ());
373
+ out_type_props[" is_primitive" ]=jsont::json_boolean (
374
+ elem.second .is_primitive ());
375
+ out_types.push_back (out_type_props);
376
+ }
377
+ out[" datatypes" ]=out_types;
378
+ }
223
379
{
224
380
json_arrayt out_locations;
225
381
for (const auto &loc : props.get_location_props ())
0 commit comments