diff --git a/jbmc/regression/jbmc/lazyloading10/test.desc b/jbmc/regression/jbmc/lazyloading10/test.desc index 3a1f2da5ae8..eb5a2d31e3b 100644 --- a/jbmc/regression/jbmc/lazyloading10/test.desc +++ b/jbmc/regression/jbmc/lazyloading10/test.desc @@ -1,11 +1,9 @@ CORE symex-driven-lazy-loading-expected-failure test.class --lazy-methods --verbosity 10 --function test.f --lazy-methods-extra-entry-point test.sety -^EXIT=6$ ^SIGNAL=0$ -entry point 'test\.sety' is ambiguous between: -test\.sety:\(I\)V -test\.sety:\(F\)V +CI lazy methods: elaborate java::test\.sety:\(I\)V +CI lazy methods: elaborate java::test\.sety:\(F\)V -- -- This doesn't work under symex-driven lazy loading because it is incompatible with --lazy-methods diff --git a/jbmc/regression/jbmc/lazyloading11/test.desc b/jbmc/regression/jbmc/lazyloading11/test.desc index ee2ef683627..3a3b51770a6 100644 --- a/jbmc/regression/jbmc/lazyloading11/test.desc +++ b/jbmc/regression/jbmc/lazyloading11/test.desc @@ -1,6 +1,6 @@ CORE symex-driven-lazy-loading-expected-failure test.class ---lazy-methods --verbosity 10 --function test.f --lazy-methods-extra-entry-point 'test.sety:(I)V' --lazy-methods-extra-entry-point 'test.sety:(F)V' +--lazy-methods --verbosity 10 --function test.f --lazy-methods-extra-entry-point 'test.sety:\(I\)V' --lazy-methods-extra-entry-point 'test.sety:\(F\)V' ^EXIT=0$ ^SIGNAL=0$ VERIFICATION SUCCESSFUL diff --git a/jbmc/regression/jbmc/lazyloading7/test.desc b/jbmc/regression/jbmc/lazyloading7/test.desc index 009f489aeff..10f1aee96f2 100644 --- a/jbmc/regression/jbmc/lazyloading7/test.desc +++ b/jbmc/regression/jbmc/lazyloading7/test.desc @@ -1,6 +1,6 @@ CORE symex-driven-lazy-loading-expected-failure test.class ---lazy-methods --verbosity 10 --function test.f --lazy-methods-extra-entry-point 'test.sety:(I)V' +--lazy-methods --verbosity 10 --function test.f --lazy-methods-extra-entry-point 'test.sety:\(I\)V' ^EXIT=0$ ^SIGNAL=0$ VERIFICATION SUCCESSFUL diff --git a/jbmc/regression/jbmc/lazyloading8/test.desc b/jbmc/regression/jbmc/lazyloading8/test.desc index 2476aa30841..ab66e0958ab 100644 --- a/jbmc/regression/jbmc/lazyloading8/test.desc +++ b/jbmc/regression/jbmc/lazyloading8/test.desc @@ -1,6 +1,6 @@ CORE symex-driven-lazy-loading-expected-failure test.class ---lazy-methods --verbosity 10 --function test.f --lazy-methods-extra-entry-point 'test.sety:(F)V' +--lazy-methods --verbosity 10 --function test.f --lazy-methods-extra-entry-point 'test.sety:\(F\)V' ^EXIT=0$ ^SIGNAL=0$ VERIFICATION SUCCESSFUL diff --git a/jbmc/src/java_bytecode/Makefile b/jbmc/src/java_bytecode/Makefile index 83d465fd379..df1511612ce 100644 --- a/jbmc/src/java_bytecode/Makefile +++ b/jbmc/src/java_bytecode/Makefile @@ -32,6 +32,7 @@ SRC = bytecode_info.cpp \ java_string_literals.cpp \ java_types.cpp \ java_utils.cpp \ + load_method_by_regex.cpp \ mz_zip_archive.cpp \ remove_exceptions.cpp \ remove_instanceof.cpp \ diff --git a/jbmc/src/java_bytecode/ci_lazy_methods.cpp b/jbmc/src/java_bytecode/ci_lazy_methods.cpp index 647dc6b9c84..18be0320498 100644 --- a/jbmc/src/java_bytecode/ci_lazy_methods.cpp +++ b/jbmc/src/java_bytecode/ci_lazy_methods.cpp @@ -34,7 +34,7 @@ ci_lazy_methodst::ci_lazy_methodst( const symbol_tablet &symbol_table, const irep_idt &main_class, const std::vector &main_jar_classes, - const std::vector &lazy_methods_extra_entry_points, + const std::vector &lazy_methods_extra_entry_points, java_class_loadert &java_class_loader, const std::vector &extra_instantiated_classes, const select_pointer_typet &pointer_type_selector, @@ -103,8 +103,13 @@ bool ci_lazy_methodst::operator()( // Add any extra entry points specified; we should elaborate these in the // same way as the main function. - std::vector extra_entry_points=lazy_methods_extra_entry_points; - resolve_method_names(extra_entry_points, symbol_table); + std::vector extra_entry_points; + for(const auto &extra_function_generator : lazy_methods_extra_entry_points) + { + const auto &extra_methods = extra_function_generator(symbol_table); + extra_entry_points.insert( + extra_entry_points.end(), extra_methods.begin(), extra_methods.end()); + } methods_to_convert_later.insert( extra_entry_points.begin(), extra_entry_points.end()); @@ -355,54 +360,6 @@ ci_lazy_methodst::entry_point_methods(const symbol_tablet &symbol_table) return methods_to_convert_later; } -/// Translates the given list of method names from human-readable to -/// internal syntax. -/// Expands any wildcards (entries ending in '.*') in the given method -/// list to include all non-static methods defined on the given class. -/// \param [in, out] methods: List of methods to expand. Any wildcard entries -/// will be deleted and the expanded entries appended to the end. -/// \param symbol_table: global symbol table -void ci_lazy_methodst::resolve_method_names( - std::vector &methods, - const symbol_tablet &symbol_table) -{ - std::vector new_methods; - for(const irep_idt &method : methods) - { - const std::string &method_str=id2string(method); - if(!has_suffix(method_str, ".*")) - { - std::string error_message; - irep_idt internal_name= - resolve_friendly_method_name( - method_str, - symbol_table, - error_message); - if(internal_name==irep_idt()) - throw "entry point "+error_message; - new_methods.push_back(internal_name); - } - else - { - irep_idt classname="java::"+method_str.substr(0, method_str.length()-2); - if(!symbol_table.has_symbol(classname)) - throw "wildcard entry point '"+method_str+"': unknown class"; - - for(const auto &name_symbol : symbol_table.symbols) - { - if(name_symbol.second.type.id()!=ID_code) - continue; - if(!to_code_type(name_symbol.second.type).has_this()) - continue; - if(has_prefix(id2string(name_symbol.first), id2string(classname))) - new_methods.push_back(name_symbol.first); - } - } - } - - methods=std::move(new_methods); -} - /// Build up a list of methods whose type may be passed around reachable /// from the entry point. /// \param entry_points: list of fully-qualified function names that diff --git a/jbmc/src/java_bytecode/ci_lazy_methods.h b/jbmc/src/java_bytecode/ci_lazy_methods.h index 7e9d506488c..f90d2186a8e 100644 --- a/jbmc/src/java_bytecode/ci_lazy_methods.h +++ b/jbmc/src/java_bytecode/ci_lazy_methods.h @@ -91,6 +91,9 @@ typedef std::function< bool(const irep_idt &function_id, ci_lazy_methods_neededt)> method_convertert; +typedef std::function(const symbol_tablet &)> + load_extra_methodst; + class ci_lazy_methodst:public messaget { public: @@ -98,7 +101,7 @@ class ci_lazy_methodst:public messaget const symbol_tablet &symbol_table, const irep_idt &main_class, const std::vector &main_jar_classes, - const std::vector &lazy_methods_extra_entry_points, + const std::vector &lazy_methods_extra_entry_points, java_class_loadert &java_class_loader, const std::vector &extra_instantiated_classes, const select_pointer_typet &pointer_type_selector, @@ -112,10 +115,6 @@ class ci_lazy_methodst:public messaget const method_convertert &method_converter); private: - void resolve_method_names( - std::vector &methods, - const symbol_tablet &symbol_table); - void initialize_instantiated_classes( const std::unordered_set &entry_points, const namespacet &ns, @@ -149,7 +148,7 @@ class ci_lazy_methodst:public messaget class_hierarchyt class_hierarchy; irep_idt main_class; std::vector main_jar_classes; - std::vector lazy_methods_extra_entry_points; + const std::vector &lazy_methods_extra_entry_points; java_class_loadert &java_class_loader; const std::vector &extra_instantiated_classes; const select_pointer_typet &pointer_type_selector; diff --git a/jbmc/src/java_bytecode/java_bytecode_language.cpp b/jbmc/src/java_bytecode/java_bytecode_language.cpp index 155304d7514..aa88a8c19ac 100644 --- a/jbmc/src/java_bytecode/java_bytecode_language.cpp +++ b/jbmc/src/java_bytecode/java_bytecode_language.cpp @@ -37,6 +37,7 @@ Author: Daniel Kroening, kroening@kroening.com #include "ci_lazy_methods.h" #include "expr2java.h" +#include "load_method_by_regex.h" /// Consume options that are java bytecode specific. /// \param Command:line options @@ -94,10 +95,11 @@ void java_bytecode_languaget::get_language_options(const cmdlinet &cmd) const std::list &extra_entry_points= cmd.get_values("lazy-methods-extra-entry-point"); - lazy_methods_extra_entry_points.insert( - lazy_methods_extra_entry_points.end(), + std::transform( extra_entry_points.begin(), - extra_entry_points.end()); + extra_entry_points.end(), + std::back_inserter(extra_methods), + build_load_method_by_regex); if(cmd.isset("java-cp-include-files")) { @@ -815,7 +817,7 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( symbol_table, main_class, main_jar_classes, - lazy_methods_extra_entry_points, + extra_methods, java_class_loader, java_load_classes, get_pointer_type_selector(), diff --git a/jbmc/src/java_bytecode/java_bytecode_language.h b/jbmc/src/java_bytecode/java_bytecode_language.h index e4fcfaf8c14..8c3ca46d77a 100644 --- a/jbmc/src/java_bytecode/java_bytecode_language.h +++ b/jbmc/src/java_bytecode/java_bytecode_language.h @@ -62,7 +62,10 @@ Author: Daniel Kroening, kroening@kroening.com " --lazy-methods-extra-entry-point METHODNAME\n" /* NOLINT(*) */ \ " treat METHODNAME as a possible program entry point for\n" /* NOLINT(*) */ \ " the purpose of lazy method loading\n" /* NOLINT(*) */ \ - " A '.*' wildcard is allowed to specify all class members\n" + " METHODNAME can be a regex that will be matched against\n" /* NOLINT(*) */ \ + " all symbols. If missing a java:: prefix will be added\n" /* NOLINT(*) */ \ + " If no descriptor is found, all overloads of a method will\n"/* NOLINT(*) */ \ + " also be added." /* NOLINT(*) */ // clang-format on class symbolt; @@ -173,7 +176,6 @@ class java_bytecode_languaget:public languaget size_t max_user_array_length; // max size for user code created arrays method_bytecodet method_bytecode; lazy_methods_modet lazy_methods_mode; - std::vector lazy_methods_extra_entry_points; bool string_refinement_enabled; bool throw_runtime_exceptions; bool assert_uncaught_exceptions; @@ -195,6 +197,8 @@ class java_bytecode_languaget:public languaget class_hierarchyt class_hierarchy; // List of classes to never load std::unordered_set no_load_classes; + + std::vector extra_methods; }; std::unique_ptr new_java_bytecode_language(); diff --git a/jbmc/src/java_bytecode/load_method_by_regex.cpp b/jbmc/src/java_bytecode/load_method_by_regex.cpp new file mode 100644 index 00000000000..22468317495 --- /dev/null +++ b/jbmc/src/java_bytecode/load_method_by_regex.cpp @@ -0,0 +1,74 @@ + +/*******************************************************************\ + +Module: Java Bytecode + +Author: Diffblue Ltd. + +\*******************************************************************/ + +#include "load_method_by_regex.h" + +#include + +#include + +/// For a given user provided pattern, return a regex, having dealt with the +/// cases where the user has not prefixed with java:: or suffixed with the +/// descriptor +/// \param pattern: The user provided pattern +/// \return The regex to match with +static std::regex build_regex_from_pattern(const std::string &pattern) +{ + std::string modified_pattern = pattern; + if(does_pattern_miss_descriptor(pattern)) + modified_pattern += R"(:\(.*\).*)"; + + if(!has_prefix(pattern, "java::")) + modified_pattern = "java::" + modified_pattern; + + return std::regex{modified_pattern}; +} + +/// Identify if a parameter includes a part that will match a descriptor. That +/// is, does it have a colon separtor. +/// \param pattern: The user provided pattern +/// \return True if no descriptor is found (that is, the only : relates to the +/// java:: prefix. +bool does_pattern_miss_descriptor(const std::string &pattern) +{ + const size_t descriptor_index = pattern.rfind(':'); + if(descriptor_index == std::string::npos) + return true; + + static const std::string java_prefix = "java::"; + return descriptor_index == java_prefix.length() - 1 && + has_prefix(pattern, java_prefix); +} + +/// Create a lambda that returns the symbols that the given pattern should be +/// loaded.If the pattern doesn't include a colon for matching the descriptor, +/// append a `:\(.*\).*` to the regex. Note this will mean all overloaded +/// methods will be marked as extra entry points for CI lazy loading. +/// If the pattern doesn't include the java:: prefix, prefix that +/// \param pattern: The user provided pattern +/// \return The lambda to execute. +std::function(const symbol_tablet &symbol_table)> +build_load_method_by_regex(const std::string &pattern) +{ + std::regex regex = build_regex_from_pattern(pattern); + + return [=](const symbol_tablet &symbol_table) { + std::vector matched_methods; + for(const auto &symbol : symbol_table.symbols) + { + if( + symbol.second.is_function() && + std::regex_match(id2string(symbol.first), regex)) + { + matched_methods.push_back(symbol.first); + } + } + return matched_methods; + }; +} diff --git a/jbmc/src/java_bytecode/load_method_by_regex.h b/jbmc/src/java_bytecode/load_method_by_regex.h new file mode 100644 index 00000000000..bdb18140538 --- /dev/null +++ b/jbmc/src/java_bytecode/load_method_by_regex.h @@ -0,0 +1,25 @@ +/*******************************************************************\ + +Module: Java Bytecode + +Author: Diffblue Ltd. + +\*******************************************************************/ + +/// \file +/// Process a pattern to use as a regex for selecting extra entry points for +/// ci_lazy_methodst + +#ifndef CPROVER_JAVA_BYTECODE_LOAD_METHOD_BY_REGEX_H +#define CPROVER_JAVA_BYTECODE_LOAD_METHOD_BY_REGEX_H + +#include + +class symbol_tablet; + +std::function(const symbol_tablet &symbol_table)> +build_load_method_by_regex(const std::string &pattern); + +bool does_pattern_miss_descriptor(const std::string &pattern); + +#endif // CPROVER_JAVA_BYTECODE_LOAD_METHOD_BY_REGEX_H diff --git a/jbmc/unit/Makefile b/jbmc/unit/Makefile index 0b6df177183..741ff87c8ff 100644 --- a/jbmc/unit/Makefile +++ b/jbmc/unit/Makefile @@ -21,6 +21,7 @@ SRC += java_bytecode/goto-programs/class_hierarchy_output.cpp \ java_bytecode/java_types/java_generic_symbol_type.cpp \ java_bytecode/java_types/java_type_from_string.cpp \ java_bytecode/java_utils_test.cpp \ + java_bytecode/load_method_by_regex.cpp \ java_bytecode/inherited_static_fields/inherited_static_fields.cpp \ pointer-analysis/custom_value_set_analysis.cpp \ solvers/refinement/string_constraint_instantiation/instantiate_not_contains.cpp \ diff --git a/jbmc/unit/java_bytecode/load_method_by_regex.cpp b/jbmc/unit/java_bytecode/load_method_by_regex.cpp new file mode 100644 index 00000000000..5b754996728 --- /dev/null +++ b/jbmc/unit/java_bytecode/load_method_by_regex.cpp @@ -0,0 +1,178 @@ +/*******************************************************************\ + + Module: Unit tests for parsing generic classes + + Author: Diffblue Limited + +\*******************************************************************/ + +#include +#include +#include + +SCENARIO( + "load_method_by_regex::does_pattern_miss_descriptor", + "[core][java_bytecode][load_method_by_regex]") +{ + GIVEN("A string with a java prefix and no descriptor") + { + const std::string pattern = "java::com.diffblue.ClassName.methodName"; + WHEN("When calling does_pattern_miss_descriptor") + { + const bool result = does_pattern_miss_descriptor(pattern); + THEN("It should miss discriptor") + { + REQUIRE(result); + } + } + } + GIVEN("A string with a java prefix and a descriptor") + { + const std::string pattern = "java::com.diffblue.ClassName.methodName:()V"; + WHEN("When calling does_pattern_miss_descriptor") + { + const bool result = does_pattern_miss_descriptor(pattern); + THEN("It should have the discriptor") + { + REQUIRE_FALSE(result); + } + } + } + GIVEN("A string without a java prefix and without a descriptor") + { + const std::string pattern = "com.diffblue.ClassName.methodName"; + WHEN("When calling does_pattern_miss_descriptor") + { + const bool result = does_pattern_miss_descriptor(pattern); + THEN("It should miss discriptor") + { + REQUIRE(result); + } + } + } + GIVEN("A string without a java prefix and with a descriptor") + { + const std::string pattern = "com.diffblue.ClassName.methodName:()V"; + WHEN("When calling does_pattern_miss_descriptor") + { + const bool result = does_pattern_miss_descriptor(pattern); + THEN("It should have the discriptor") + { + REQUIRE_FALSE(result); + } + } + } + GIVEN("A string with an almost java prefix and no descriptor") + { + const std::string pattern = "java:com.diffblue.ClassName.methodName"; + WHEN("When calling does_pattern_miss_descriptor") + { + const bool result = does_pattern_miss_descriptor(pattern); + THEN("It should classify the last : as a descriptor") + { + REQUIRE_FALSE(result); + } + } + } + GIVEN("A string with an almost java prefix and with a descriptor") + { + const std::string pattern = "java:com.diffblue.ClassName.methodName:()V"; + WHEN("When calling does_pattern_miss_descriptor") + { + const bool result = does_pattern_miss_descriptor(pattern); + THEN("It should have the discriptor") + { + REQUIRE_FALSE(result); + } + } + } +} + +static symbolt create_method_symbol(const std::string &method_name) +{ + symbolt new_symbol; + new_symbol.name = method_name; + new_symbol.type = code_typet{{}, nil_typet{}}; + return new_symbol; +} + +static void require_result_for_pattern( + const std::string &pattern, + const std::vector &expected, + const symbol_tablet &symbol_table) +{ + WHEN("Constructing a load_method_by_regex") + { + const auto matcher = build_load_method_by_regex(pattern); + const auto &results = matcher(symbol_table); + if(expected.size() == 1) + { + THEN("Expect " + id2string(expected[0])) + { + require_vectors_equal_unordered(results, expected); + } + } + else + { + THEN("Expect " + std::to_string(expected.size()) + " symbols") + { + require_vectors_equal_unordered(results, expected); + } + } + } +} + +SCENARIO("load_method_by_regex", "[core][java_bytecode][load_method_by_regex]") +{ + symbol_tablet symbol_table; + symbol_table.add(create_method_symbol("java::pack.Class.methodName:()V")); + symbol_table.add(create_method_symbol("java::pack.Class.anotherMethod:()V")); + symbol_table.add(create_method_symbol("java::pack.Different.methodName:()V")); + symbol_table.add(create_method_symbol("java::another.Class.methodName:()V")); + + GIVEN("A pattern without java prefix, without descriptor, no regex") + { + const std::string pattern = "pack.Class.methodName"; + const std::vector expected = {"java::pack.Class.methodName:()V"}; + require_result_for_pattern(pattern, expected, symbol_table); + } + GIVEN("A pattern with java prefix, without descriptor, no regex") + { + const std::string pattern = "java::pack.Class.methodName"; + const std::vector expected = {"java::pack.Class.methodName:()V"}; + require_result_for_pattern(pattern, expected, symbol_table); + } + GIVEN("A pattern with java prefix, with descriptor, no regex") + { + const std::string pattern = R"(java::pack.Class.methodName:\(\)V)"; + const std::vector expected = {"java::pack.Class.methodName:()V"}; + require_result_for_pattern(pattern, expected, symbol_table); + } + GIVEN("A pattern with java prefix, with wrong descriptor, no regex") + { + const std::string pattern = R"(java::pack.Class.methodName:\(I\)V)"; + const std::vector expected = {}; + require_result_for_pattern(pattern, expected, symbol_table); + } + GIVEN("A pattern with java prefix, without descriptor, with regex") + { + const std::string pattern = "java::pack.Class..*"; + const std::vector expected = { + "java::pack.Class.methodName:()V", "java::pack.Class.anotherMethod:()V"}; + require_result_for_pattern(pattern, expected, symbol_table); + } + GIVEN("A pattern without java prefix, without descriptor, with regex") + { + const std::string pattern = "pack.Class..*"; + const std::vector expected = { + "java::pack.Class.methodName:()V", "java::pack.Class.anotherMethod:()V"}; + require_result_for_pattern(pattern, expected, symbol_table); + } + GIVEN("A pattern without java prefix, with descriptor, with regex in package") + { + const std::string pattern = R"(\w+.Class.methodName:\(\)V)"; + const std::vector expected = { + "java::pack.Class.methodName:()V", "java::another.Class.methodName:()V"}; + require_result_for_pattern(pattern, expected, symbol_table); + } +} diff --git a/unit/testing-utils/require_vectors_equal_unordered.h b/unit/testing-utils/require_vectors_equal_unordered.h new file mode 100644 index 00000000000..956079bc3bc --- /dev/null +++ b/unit/testing-utils/require_vectors_equal_unordered.h @@ -0,0 +1,28 @@ +/*******************************************************************\ + + Module: Unit test utilities + + Author: Diffblue Limited. + +\*******************************************************************/ + +#ifndef CPROVER_TESTING_UTILS_REQUIRE_VECTORS_EQUAL_UNORDERED_H +#define CPROVER_TESTING_UTILS_REQUIRE_VECTORS_EQUAL_UNORDERED_H + +#include +#include + +/// Checks whether two vectors are equal, ignoring ordering +/// \tparam T: The type of the vector contents +/// \param actual: The vector to check +/// \param expected: The vector to check against +template +void require_vectors_equal_unordered( + const std::vector &actual, + const std::vector &expected) +{ + REQUIRE(actual.size() == expected.size()); + REQUIRE_THAT(actual, Catch::Matchers::Vector::ContainsMatcher{expected}); +} + +#endif // CPROVER_TESTING_UTILS_REQUIRE_VECTORS_EQUAL_UNORDERED_H