@@ -1806,7 +1806,10 @@ code_blockt java_bytecode_convert_methodt::convert_instructions(
1806
1806
// (e.g. due to exceptional control flow)
1807
1807
if (!start_new_block)
1808
1808
start_new_block=address_pair.second .predecessors .size ()>1 ;
1809
- // Start a new lexical block if we've just entered a try block
1809
+ // Start a new lexical block if we've just entered a block in which
1810
+ // exceptions are handled. This is usually the start of a try block, but a
1811
+ // single try can be represented as multiple non-contiguous blocks in the
1812
+ // exception table.
1810
1813
if (!start_new_block && has_seen_previous_address)
1811
1814
{
1812
1815
for (const auto &exception_row : method.exception_table )
@@ -2335,105 +2338,81 @@ codet &java_bytecode_convert_methodt::do_exception_handling(
2335
2338
const java_bytecode_convert_methodt::methodt &method,
2336
2339
const std::set<method_offsett> &working_set,
2337
2340
method_offsett cur_pc,
2338
- codet &c )
2341
+ codet &code )
2339
2342
{
2340
- std::vector<irep_idt> exception_ids;
2341
- std::vector<irep_idt> handler_labels;
2342
-
2343
- // for each try-catch add a CATCH-PUSH instruction
2344
- // each CATCH-PUSH records a list of all the handler labels
2345
- // together with a list of all the exception ids
2343
+ // For each exception handler range that starts here add a CATCH-PUSH marker
2344
+ // Each CATCH-PUSH records a list of all the exception id and handler label
2345
+ // pairs handled for its exact block
2346
+
2347
+ // Gather all try-catch blocks that have cur_pc as the starting pc
2348
+ typedef std::vector<std::reference_wrapper<
2349
+ const java_bytecode_convert_methodt::methodt::exceptiont>> exceptionst;
2350
+ std::map<std::size_t , exceptionst> exceptions_by_end;
2351
+ for (const java_bytecode_convert_methodt::methodt::exceptiont &exception
2352
+ : method.exception_table )
2353
+ {
2354
+ if (exception.start_pc == cur_pc)
2355
+ exceptions_by_end[exception.end_pc ].push_back (exception);
2356
+ }
2357
+ for (const auto &exceptions : exceptions_by_end)
2358
+ {
2359
+ // For each block with a distinct end position create one CATCH-PUSH
2360
+ code_push_catcht catch_push;
2361
+ // Fill in its exception_list
2362
+ code_push_catcht::exception_listt &exception_list =
2363
+ catch_push.exception_list ();
2364
+ for (const java_bytecode_convert_methodt::methodt::exceptiont &exception
2365
+ : exceptions.second )
2366
+ {
2367
+ exception_list.emplace_back (
2368
+ exception.catch_type .get_identifier (),
2369
+ // Record the exception handler in the CATCH-PUSH instruction by
2370
+ // generating a label corresponding to the handler's pc
2371
+ label (std::to_string (exception.handler_pc )));
2372
+ }
2373
+ // Prepend the CATCH-PUSH instruction
2374
+ code = code_blockt ({ std::move (catch_push), code });
2375
+ }
2346
2376
2347
- // be aware of different try-catch blocks with the same starting pc
2348
- method_offsett pos = 0 ;
2349
- method_offsett end_pc = 0 ;
2350
- while (pos < method.exception_table .size ())
2377
+ // Next add the CATCH-POP instructions
2378
+ // exception_row.end_pc is exclusive so append a CATCH-POP instruction if
2379
+ // this is the instruction before it.
2380
+ // To do this, attempt to find all exception handlers that end at the
2381
+ // earliest known instruction after this one.
2382
+
2383
+ // Dangerously, we assume that the next opcode in the bytecode after the end
2384
+ // of the exception handler block (whose address matches the exclusive end
2385
+ // position of the block) will be a successor of some code investigated
2386
+ // before the instruction at the end of that handler and therefore in the
2387
+ // working set.
2388
+ // As an example of where this may fail, for non-obfuscated bytecode
2389
+ // generated by most compilers the next opcode after the block ending at the
2390
+ // end of the try block is the lexically next bit of code after the try
2391
+ // block, i.e. the catch block. When there aren't any throwing statements in
2392
+ // the try block this block will not be the successor of any instruction.
2393
+
2394
+ auto next_opcode_it = std::find_if (
2395
+ working_set.begin (),
2396
+ working_set.end (),
2397
+ [cur_pc](method_offsett offset) { return offset > cur_pc; });
2398
+ if (next_opcode_it != working_set.end ())
2351
2399
{
2352
- // check if this is the beginning of a try block
2353
- for (; pos < method.exception_table .size (); ++pos)
2400
+ // Count the distinct start positions of handlers that end at this location
2401
+ std::set<std::size_t > start_positions; // Use a set to deduplicate
2402
+ for (const auto &exception_row : method.exception_table )
2354
2403
{
2355
- // unexplored try-catch?
2356
- if (cur_pc == method.exception_table [pos].start_pc && end_pc == 0 )
2357
- {
2358
- end_pc = method.exception_table [pos].end_pc ;
2359
- }
2360
-
2361
- // currently explored try-catch?
2362
- if (
2363
- cur_pc == method.exception_table [pos].start_pc &&
2364
- method.exception_table [pos].end_pc == end_pc)
2365
- {
2366
- exception_ids.push_back (
2367
- method.exception_table [pos].catch_type .get_identifier ());
2368
- // record the exception handler in the CATCH-PUSH
2369
- // instruction by generating a label corresponding to
2370
- // the handler's pc
2371
- handler_labels.push_back (
2372
- label (std::to_string (method.exception_table [pos].handler_pc )));
2373
- }
2374
- else
2375
- break ;
2404
+ // Check if the next instruction found is the (exclusive) end of a block
2405
+ if (*next_opcode_it == exception_row.end_pc )
2406
+ start_positions.insert (exception_row.start_pc );
2376
2407
}
2377
-
2378
- // add the actual PUSH-CATCH instruction
2379
- if (!exception_ids.empty ())
2408
+ for (std::size_t handler = 0 ; handler < start_positions.size (); ++handler)
2380
2409
{
2381
- code_push_catcht catch_push;
2382
- INVARIANT (
2383
- exception_ids.size () == handler_labels.size (),
2384
- " Exception tags and handler labels should be 1-to-1" );
2385
- code_push_catcht::exception_listt &exception_list =
2386
- catch_push.exception_list ();
2387
- for (size_t i = 0 ; i < exception_ids.size (); ++i)
2388
- {
2389
- exception_list.push_back (
2390
- code_push_catcht::exception_list_entryt (
2391
- exception_ids[i], handler_labels[i]));
2392
- }
2393
-
2394
- c = code_blockt ({catch_push, c});
2395
- }
2396
- else
2397
- {
2398
- // advance
2399
- ++pos;
2410
+ // Append a CATCH-POP instruction before the end of the block
2411
+ code = code_blockt ({ code, code_pop_catcht () });
2400
2412
}
2401
-
2402
- // reset
2403
- end_pc = 0 ;
2404
- exception_ids.clear ();
2405
- handler_labels.clear ();
2406
2413
}
2407
2414
2408
- // next add the CATCH-POP instructions
2409
- size_t start_pc = 0 ;
2410
- // as the first try block may have start_pc 0, we
2411
- // must track it separately
2412
- bool first_handler = false ;
2413
- // check if this is the end of a try block
2414
- for (const auto &exception_row : method.exception_table )
2415
- {
2416
- // add the CATCH-POP before the end of the try block
2417
- if (
2418
- cur_pc < exception_row.end_pc && !working_set.empty () &&
2419
- *working_set.begin () == exception_row.end_pc )
2420
- {
2421
- // have we already added a CATCH-POP for the current try-catch?
2422
- // (each row corresponds to a handler)
2423
- if (exception_row.start_pc != start_pc || !first_handler)
2424
- {
2425
- if (!first_handler)
2426
- first_handler = true ;
2427
- // remember the beginning of the try-catch so that we don't add
2428
- // another CATCH-POP for the same try-catch
2429
- start_pc = exception_row.start_pc ;
2430
- // add CATCH_POP instruction
2431
- code_pop_catcht catch_pop;
2432
- c = code_blockt ({c, catch_pop});
2433
- }
2434
- }
2435
- }
2436
- return c;
2415
+ return code;
2437
2416
}
2438
2417
2439
2418
code_blockt java_bytecode_convert_methodt::convert_multianewarray (
0 commit comments