From 731c69e498d3efcea07fcd4e519f96a41249e0f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20G=C3=BCdemann?= Date: Thu, 12 Jul 2018 15:50:32 +0200 Subject: [PATCH 1/3] Keep expressions unchanged when adding temporary variables Search recursively through expressions instead of only removing outer typecast expressions. --- .../java_bytecode_convert_method.cpp | 84 ++++++++++++++----- 1 file changed, 64 insertions(+), 20 deletions(-) diff --git a/jbmc/src/java_bytecode/java_bytecode_convert_method.cpp b/jbmc/src/java_bytecode/java_bytecode_convert_method.cpp index a95ed0f9159..00539b45ecf 100644 --- a/jbmc/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/jbmc/src/java_bytecode/java_bytecode_convert_method.cpp @@ -33,6 +33,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include +#include #include #include @@ -3176,8 +3177,13 @@ irep_idt java_bytecode_convert_methodt::get_static_field( return inherited_method.get_full_component_identifier(); } -/// create temporary variables if a write instruction can have undesired side- -/// effects +/// Create temporary variables if a write instruction can have undesired side- +/// effects. +/// \param tmp_var_prefix: The prefix string to use for new temporary variables +/// \param tmp_var_type: The type of the temporary variable. +/// \param[out] block: The code block the assignment is added to if required. +/// \param write_type: The enumeration type of the write instruction. +/// \param identifier: The identifier of the symbol in the write instruction. void java_bytecode_convert_methodt::save_stack_entries( const std::string &tmp_var_prefix, const typet &tmp_var_type, @@ -3185,35 +3191,73 @@ void java_bytecode_convert_methodt::save_stack_entries( const bytecode_write_typet write_type, const irep_idt &identifier) { + const std::function, const exprt &expr)> + entry_matches = [&entry_matches]( + const std::function predicate, + const exprt &expr) { + const tvt &tvres = predicate(expr); + if(tvres.is_unknown()) + { + return std::any_of( + expr.operands().begin(), + expr.operands().end(), + [&predicate, &entry_matches](const exprt &expr) { + return entry_matches(predicate, expr); + }); + } + else + { + return tvres.is_true(); + } + }; + + const std::function has_member_entry = [&identifier]( + const exprt &expr) { + const auto member_expr = expr_try_dynamic_cast(expr); + return !member_expr ? tvt::unknown() + : tvt(member_expr->get_component_name() == identifier); + }; + + const std::function is_symbol_entry = + [&identifier](const exprt &expr) { + const auto symbol_expr = expr_try_dynamic_cast(expr); + return !symbol_expr ? tvt::unknown() + : tvt(symbol_expr->get_identifier() == identifier); + }; + + const std::function is_dereference_entry = + [](const exprt &expr) { + const auto dereference_expr = + expr_try_dynamic_cast(expr); + return !dereference_expr ? tvt::unknown() : tvt(true); + }; + for(auto &stack_entry : stack) { - // remove typecasts if existing - while(stack_entry.id()==ID_typecast) - stack_entry=to_typecast_expr(stack_entry).op(); - // variables or static fields and symbol -> save symbols with same id - if((write_type==bytecode_write_typet::VARIABLE || - write_type==bytecode_write_typet::STATIC_FIELD) && - stack_entry.id()==ID_symbol) + if( + (write_type == bytecode_write_typet::VARIABLE || + write_type == bytecode_write_typet::STATIC_FIELD) && + entry_matches(is_symbol_entry, stack_entry)) { - const symbol_exprt &var=to_symbol_expr(stack_entry); - if(var.get_identifier()==identifier) - create_stack_tmp_var(tmp_var_prefix, tmp_var_type, block, stack_entry); + create_stack_tmp_var(tmp_var_prefix, tmp_var_type, block, stack_entry); } // array reference and dereference -> save all references on the stack - else if(write_type==bytecode_write_typet::ARRAY_REF && - stack_entry.id()==ID_dereference) + else if( + write_type == bytecode_write_typet::ARRAY_REF && + entry_matches(is_dereference_entry, stack_entry)) + { create_stack_tmp_var(tmp_var_prefix, tmp_var_type, block, stack_entry); + } // field and member access -> compare component name - else if(write_type==bytecode_write_typet::FIELD && - stack_entry.id()==ID_member) + else if( + write_type == bytecode_write_typet::FIELD && + entry_matches(has_member_entry, stack_entry)) { - const irep_idt &entry_id= - to_member_expr(stack_entry).get_component_name(); - if(entry_id==identifier) - create_stack_tmp_var(tmp_var_prefix, tmp_var_type, block, stack_entry); + create_stack_tmp_var(tmp_var_prefix, tmp_var_type, block, stack_entry); } } } From 6f63050e059cf889b60e948f7d0f6bd4ccd3e41f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20G=C3=BCdemann?= Date: Fri, 13 Jul 2018 10:48:42 +0200 Subject: [PATCH 2/3] Simplify stack element replacement loop --- .../java_bytecode_convert_method.cpp | 45 ++++++++++--------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/jbmc/src/java_bytecode/java_bytecode_convert_method.cpp b/jbmc/src/java_bytecode/java_bytecode_convert_method.cpp index 00539b45ecf..330f90ef019 100644 --- a/jbmc/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/jbmc/src/java_bytecode/java_bytecode_convert_method.cpp @@ -3212,6 +3212,9 @@ void java_bytecode_convert_methodt::save_stack_entries( } }; + // Function that checks whether the expression accesses a member with the + // given identifier name. These accesses are created in the case of `iinc`, or + // non-array `?store` instructions. const std::function has_member_entry = [&identifier]( const exprt &expr) { const auto member_expr = expr_try_dynamic_cast(expr); @@ -3219,6 +3222,9 @@ void java_bytecode_convert_methodt::save_stack_entries( : tvt(member_expr->get_component_name() == identifier); }; + // Function that checks whether the expression is a symbol with the given + // identifier name. These accesses are created in the case of `putstatic` or + // `putfield` instructions. const std::function is_symbol_entry = [&identifier](const exprt &expr) { const auto symbol_expr = expr_try_dynamic_cast(expr); @@ -3226,6 +3232,9 @@ void java_bytecode_convert_methodt::save_stack_entries( : tvt(symbol_expr->get_identifier() == identifier); }; + // Function that checks whether the expression is a dereference + // expression. These accesses are created in `?astore` array write + // instructions. const std::function is_dereference_entry = [](const exprt &expr) { const auto dereference_expr = @@ -3235,27 +3244,21 @@ void java_bytecode_convert_methodt::save_stack_entries( for(auto &stack_entry : stack) { - // variables or static fields and symbol -> save symbols with same id - if( - (write_type == bytecode_write_typet::VARIABLE || - write_type == bytecode_write_typet::STATIC_FIELD) && - entry_matches(is_symbol_entry, stack_entry)) - { - create_stack_tmp_var(tmp_var_prefix, tmp_var_type, block, stack_entry); - } - - // array reference and dereference -> save all references on the stack - else if( - write_type == bytecode_write_typet::ARRAY_REF && - entry_matches(is_dereference_entry, stack_entry)) - { - create_stack_tmp_var(tmp_var_prefix, tmp_var_type, block, stack_entry); - } - - // field and member access -> compare component name - else if( - write_type == bytecode_write_typet::FIELD && - entry_matches(has_member_entry, stack_entry)) + bool replace = false; + switch(write_type) + { + case bytecode_write_typet::VARIABLE: + case bytecode_write_typet::STATIC_FIELD: + replace = entry_matches(is_symbol_entry, stack_entry); + break; + case bytecode_write_typet::ARRAY_REF: + replace = entry_matches(is_dereference_entry, stack_entry); + break; + case bytecode_write_typet::FIELD: + replace = entry_matches(has_member_entry, stack_entry); + break; + } + if(replace) { create_stack_tmp_var(tmp_var_prefix, tmp_var_type, block, stack_entry); } From a63212e1385bce778ae24ccf58f6964276b29b5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20G=C3=BCdemann?= Date: Fri, 20 Jul 2018 11:12:32 +0200 Subject: [PATCH 3/3] Add regression test for stack variables with typecasts This regression test mimicks the bytecode that exhibited the error in Tika. --- .../jbmc/stack_var12/stack_typecast.class | Bin 0 -> 367 bytes .../jbmc/stack_var12/stack_typecast.j | 37 ++++++++++++++++++ jbmc/regression/jbmc/stack_var12/test.desc | 17 ++++++++ 3 files changed, 54 insertions(+) create mode 100644 jbmc/regression/jbmc/stack_var12/stack_typecast.class create mode 100644 jbmc/regression/jbmc/stack_var12/stack_typecast.j create mode 100644 jbmc/regression/jbmc/stack_var12/test.desc diff --git a/jbmc/regression/jbmc/stack_var12/stack_typecast.class b/jbmc/regression/jbmc/stack_var12/stack_typecast.class new file mode 100644 index 0000000000000000000000000000000000000000..0d88a2fa4c97f7a4771a8337e3f5e3746f4a3851 GIT binary patch literal 367 zcmY+9yH3ME5Jm4e&dZp*hDO4c4V)SX!s%;q(DKz z2k=qA*tntDnb|pa?#$Qs$0vXhHVC~?Jc@B7;@k0w7fN3f8oMf1dXHf0JC!v-HVm{F z&9w0Euk_Pg`XbY#(8daSgxc2jjN!xNMz+y`O|VXrJoV*~ilmKoEE8zbgj1puf^m6Z z!9<$lM!kaTH_N;#R60X}^^*Iv9y8Dj)D<=iGif+~3N26>zef+o!4BhhutPAt{4c5-{ Z3&REm{Qg0}8s~N?7@`KJ9JW+ajbGj5KX3p5 literal 0 HcmV?d00001 diff --git a/jbmc/regression/jbmc/stack_var12/stack_typecast.j b/jbmc/regression/jbmc/stack_var12/stack_typecast.j new file mode 100644 index 00000000000..011f8a018e8 --- /dev/null +++ b/jbmc/regression/jbmc/stack_var12/stack_typecast.j @@ -0,0 +1,37 @@ +.class stack_typecast +.super java/lang/Object + +.field public position I +.field public buffer [B + +.method public ()V + aload_0 + invokevirtual java/lang/Object/()V + return +.end method + +.method public f()I + .limit stack 6 + .limit locals 1 + .var 0 is this stack_typecast from begin to end + .line 0 +begin: + + sipush 255 + + aload_0 + getfield stack_typecast/buffer [B + + aload_0 + dup + getfield stack_typecast/position I + dup_x1 + iconst_1 + iadd + putfield stack_typecast/position I + baload + iand +end: + ireturn + +.end method diff --git a/jbmc/regression/jbmc/stack_var12/test.desc b/jbmc/regression/jbmc/stack_var12/test.desc new file mode 100644 index 00000000000..27eaa97285e --- /dev/null +++ b/jbmc/regression/jbmc/stack_var12/test.desc @@ -0,0 +1,17 @@ +CORE +stack_typecast.class +--cover location --function stack_typecast.f +^EXIT=0$ +^SIGNAL=0$ +covered \(100.0%\) +-- +-- +This tests that there is no invariant violation when modifying the values on the +stack. in this case the `position` variable is used as index in an array via the +Java equivalent of + + buffer[position++]; + +The code pushes position, duplicates it and the modifies the stack +entry. Before, the stack entry generation removed typecasts which gave a typing +problem with the `iand` operator