Skip to content

Commit 783fcea

Browse files
author
thk123
committed
Extend lazy-methods-extra-entry-point to support regex methods
This will allow for loading for example, all constructors or all methods called getSomething. Tests using this require minor modifications when they have specified the descriptor as the brackets must be escaped for the regex. However, the new code handles the case where no descriptor is provided (in which case it will match all overloads) and the case where no java:: prefix has been provided, leaving similar behaviour. Correcting documentation for the extra entry points
1 parent 1e55404 commit 783fcea

File tree

11 files changed

+296
-13
lines changed

11 files changed

+296
-13
lines changed
Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
11
CORE symex-driven-lazy-loading-expected-failure
22
test.class
33
--lazy-methods --verbosity 10 --function test.f --lazy-methods-extra-entry-point test.sety
4-
^EXIT=6$
54
^SIGNAL=0$
6-
entry point 'test\.sety' is ambiguous between:
7-
test\.sety:\(I\)V
8-
test\.sety:\(F\)V
5+
CI lazy methods: elaborate java::test\.sety:\(I\)V
6+
CI lazy methods: elaborate java::test\.sety:\(F\)V
97
--
108
--
119
This doesn't work under symex-driven lazy loading because it is incompatible with --lazy-methods

jbmc/regression/jbmc/lazyloading11/test.desc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
CORE symex-driven-lazy-loading-expected-failure
22
test.class
3-
--lazy-methods --verbosity 10 --function test.f --lazy-methods-extra-entry-point 'test.sety:(I)V' --lazy-methods-extra-entry-point 'test.sety:(F)V'
3+
--lazy-methods --verbosity 10 --function test.f --lazy-methods-extra-entry-point 'test.sety:\(I\)V' --lazy-methods-extra-entry-point 'test.sety:\(F\)V'
44
^EXIT=0$
55
^SIGNAL=0$
66
VERIFICATION SUCCESSFUL

jbmc/regression/jbmc/lazyloading7/test.desc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
CORE symex-driven-lazy-loading-expected-failure
22
test.class
3-
--lazy-methods --verbosity 10 --function test.f --lazy-methods-extra-entry-point 'test.sety:(I)V'
3+
--lazy-methods --verbosity 10 --function test.f --lazy-methods-extra-entry-point 'test.sety:\(I\)V'
44
^EXIT=0$
55
^SIGNAL=0$
66
VERIFICATION SUCCESSFUL

jbmc/regression/jbmc/lazyloading8/test.desc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
CORE symex-driven-lazy-loading-expected-failure
22
test.class
3-
--lazy-methods --verbosity 10 --function test.f --lazy-methods-extra-entry-point 'test.sety:(F)V'
3+
--lazy-methods --verbosity 10 --function test.f --lazy-methods-extra-entry-point 'test.sety:\(F\)V'
44
^EXIT=0$
55
^SIGNAL=0$
66
VERIFICATION SUCCESSFUL

jbmc/src/java_bytecode/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ SRC = bytecode_info.cpp \
3232
java_string_literals.cpp \
3333
java_types.cpp \
3434
java_utils.cpp \
35+
load_method_by_regex.cpp \
3536
mz_zip_archive.cpp \
3637
remove_exceptions.cpp \
3738
remove_instanceof.cpp \

jbmc/src/java_bytecode/java_bytecode_language.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ Author: Daniel Kroening, [email protected]
3737
#include "ci_lazy_methods.h"
3838

3939
#include "expr2java.h"
40+
#include "load_method_by_regex.h"
4041

4142
/// Consume options that are java bytecode specific.
4243
/// \param Command:line options
@@ -94,10 +95,11 @@ void java_bytecode_languaget::get_language_options(const cmdlinet &cmd)
9495

9596
const std::list<std::string> &extra_entry_points=
9697
cmd.get_values("lazy-methods-extra-entry-point");
97-
lazy_methods_extra_entry_points.insert(
98-
lazy_methods_extra_entry_points.end(),
98+
std::transform(
9999
extra_entry_points.begin(),
100-
extra_entry_points.end());
100+
extra_entry_points.end(),
101+
std::back_inserter(extra_methods),
102+
build_load_method_by_regex);
101103

102104
if(cmd.isset("java-cp-include-files"))
103105
{
@@ -815,7 +817,7 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion(
815817
symbol_table,
816818
main_class,
817819
main_jar_classes,
818-
lazy_methods_extra_entry_points,
820+
extra_methods,
819821
java_class_loader,
820822
java_load_classes,
821823
get_pointer_type_selector(),

jbmc/src/java_bytecode/java_bytecode_language.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,10 @@ Author: Daniel Kroening, [email protected]
6262
" --lazy-methods-extra-entry-point METHODNAME\n" /* NOLINT(*) */ \
6363
" treat METHODNAME as a possible program entry point for\n" /* NOLINT(*) */ \
6464
" the purpose of lazy method loading\n" /* NOLINT(*) */ \
65-
" A '.*' wildcard is allowed to specify all class members\n"
65+
" METHODNAME can be a regex that will be matched against\n" /* NOLINT(*) */ \
66+
" all symbols. If missing a java:: prefix will be added\n" /* NOLINT(*) */ \
67+
" If no descriptor is found, all overloads of a method will\n"/* NOLINT(*) */ \
68+
" also be added." /* NOLINT(*) */
6669
// clang-format on
6770

6871
class symbolt;
@@ -173,7 +176,6 @@ class java_bytecode_languaget:public languaget
173176
size_t max_user_array_length; // max size for user code created arrays
174177
method_bytecodet method_bytecode;
175178
lazy_methods_modet lazy_methods_mode;
176-
std::vector<irep_idt> lazy_methods_extra_entry_points;
177179
bool string_refinement_enabled;
178180
bool throw_runtime_exceptions;
179181
bool assert_uncaught_exceptions;
@@ -195,6 +197,8 @@ class java_bytecode_languaget:public languaget
195197
class_hierarchyt class_hierarchy;
196198
// List of classes to never load
197199
std::unordered_set<std::string> no_load_classes;
200+
201+
std::vector<load_extra_methodst> extra_methods;
198202
};
199203

200204
std::unique_ptr<languaget> new_java_bytecode_language();
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
2+
/*******************************************************************\
3+
4+
Module: Java Bytecode
5+
6+
Author: Diffblue Ltd.
7+
8+
\*******************************************************************/
9+
10+
#include "load_method_by_regex.h"
11+
12+
#include <regex>
13+
14+
#include <util/symbol_table.h>
15+
16+
/// For a given user provided pattern, return a regex, having dealt with the
17+
/// cases where the user has not prefixed with java:: or suffixed with the
18+
/// descriptor
19+
/// \param pattern: The user provided pattern
20+
/// \return The regex to match with
21+
static std::regex build_regex_from_pattern(const std::string &pattern)
22+
{
23+
std::string modified_pattern = pattern;
24+
if(does_pattern_miss_descriptor(pattern))
25+
modified_pattern += R"(:\(.*\).*)";
26+
27+
if(!has_prefix(pattern, "java::"))
28+
modified_pattern = "java::" + modified_pattern;
29+
30+
return std::regex{modified_pattern};
31+
}
32+
33+
/// Identify if a parameter includes a part that will match a descriptor. That
34+
/// is, does it have a colon separtor.
35+
/// \param pattern: The user provided pattern
36+
/// \return True if no descriptor is found (that is, the only : relates to the
37+
/// java:: prefix.
38+
bool does_pattern_miss_descriptor(const std::string &pattern)
39+
{
40+
const size_t descriptor_index = pattern.rfind(':');
41+
if(descriptor_index == std::string::npos)
42+
return true;
43+
44+
static const std::string java_prefix = "java::";
45+
return descriptor_index == java_prefix.length() - 1 &&
46+
has_prefix(pattern, java_prefix);
47+
}
48+
49+
/// Create a lambda that returns the symbols that the given pattern should be
50+
/// loaded.If the pattern doesn't include a colon for matching the descriptor,
51+
/// append a `:\(.*\).*` to the regex. Note this will mean all overloaded
52+
/// methods will be marked as extra entry points for CI lazy loading.
53+
/// If the pattern doesn't include the java:: prefix, prefix that
54+
/// \param pattern: The user provided pattern
55+
/// \return The lambda to execute.
56+
std::function<std::vector<irep_idt>(const symbol_tablet &symbol_table)>
57+
build_load_method_by_regex(const std::string &pattern)
58+
{
59+
std::regex regex = build_regex_from_pattern(pattern);
60+
61+
return [=](const symbol_tablet &symbol_table) {
62+
std::vector<irep_idt> matched_methods;
63+
for(const auto &symbol : symbol_table.symbols)
64+
{
65+
if(
66+
symbol.second.is_function() &&
67+
std::regex_match(id2string(symbol.first), regex))
68+
{
69+
matched_methods.push_back(symbol.first);
70+
}
71+
}
72+
return matched_methods;
73+
};
74+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/*******************************************************************\
2+
3+
Module: Java Bytecode
4+
5+
Author: Diffblue Ltd.
6+
7+
\*******************************************************************/
8+
9+
/// \file
10+
/// Process a pattern to use as a regex for selecting extra entry points for
11+
/// ci_lazy_methodst
12+
13+
#ifndef CPROVER_JAVA_BYTECODE_LOAD_METHOD_BY_REGEX_H
14+
#define CPROVER_JAVA_BYTECODE_LOAD_METHOD_BY_REGEX_H
15+
16+
#include <java_bytecode/ci_lazy_methods.h>
17+
18+
class symbol_tablet;
19+
20+
std::function<std::vector<irep_idt>(const symbol_tablet &symbol_table)>
21+
build_load_method_by_regex(const std::string &pattern);
22+
23+
bool does_pattern_miss_descriptor(const std::string &pattern);
24+
25+
#endif // CPROVER_JAVA_BYTECODE_LOAD_METHOD_BY_REGEX_H

jbmc/unit/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ SRC += java_bytecode/goto-programs/class_hierarchy_output.cpp \
2121
java_bytecode/java_types/java_generic_symbol_type.cpp \
2222
java_bytecode/java_types/java_type_from_string.cpp \
2323
java_bytecode/java_utils_test.cpp \
24+
java_bytecode/load_method_by_regex.cpp \
2425
java_bytecode/inherited_static_fields/inherited_static_fields.cpp \
2526
pointer-analysis/custom_value_set_analysis.cpp \
2627
solvers/refinement/string_constraint_instantiation/instantiate_not_contains.cpp \
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
/*******************************************************************\
2+
3+
Module: Unit tests for parsing generic classes
4+
5+
Author: Diffblue Limited
6+
7+
\*******************************************************************/
8+
9+
#include <java_bytecode/load_method_by_regex.h>
10+
#include <testing-utils/catch.hpp>
11+
#include <testing-utils/require_vectors_equal_unordered.h>
12+
13+
SCENARIO(
14+
"load_method_by_regex::does_pattern_miss_descriptor",
15+
"[core][java_bytecode][load_method_by_regex]")
16+
{
17+
GIVEN("A string with a java prefix and no descriptor")
18+
{
19+
const std::string pattern = "java::com.diffblue.ClassName.methodName";
20+
WHEN("When calling does_pattern_miss_descriptor")
21+
{
22+
const bool result = does_pattern_miss_descriptor(pattern);
23+
THEN("It should miss discriptor")
24+
{
25+
REQUIRE(result);
26+
}
27+
}
28+
}
29+
GIVEN("A string with a java prefix and a descriptor")
30+
{
31+
const std::string pattern = "java::com.diffblue.ClassName.methodName:()V";
32+
WHEN("When calling does_pattern_miss_descriptor")
33+
{
34+
const bool result = does_pattern_miss_descriptor(pattern);
35+
THEN("It should have the discriptor")
36+
{
37+
REQUIRE_FALSE(result);
38+
}
39+
}
40+
}
41+
GIVEN("A string without a java prefix and without a descriptor")
42+
{
43+
const std::string pattern = "com.diffblue.ClassName.methodName";
44+
WHEN("When calling does_pattern_miss_descriptor")
45+
{
46+
const bool result = does_pattern_miss_descriptor(pattern);
47+
THEN("It should miss discriptor")
48+
{
49+
REQUIRE(result);
50+
}
51+
}
52+
}
53+
GIVEN("A string without a java prefix and with a descriptor")
54+
{
55+
const std::string pattern = "com.diffblue.ClassName.methodName:()V";
56+
WHEN("When calling does_pattern_miss_descriptor")
57+
{
58+
const bool result = does_pattern_miss_descriptor(pattern);
59+
THEN("It should have the discriptor")
60+
{
61+
REQUIRE_FALSE(result);
62+
}
63+
}
64+
}
65+
GIVEN("A string with an almost java prefix and no descriptor")
66+
{
67+
const std::string pattern = "java:com.diffblue.ClassName.methodName";
68+
WHEN("When calling does_pattern_miss_descriptor")
69+
{
70+
const bool result = does_pattern_miss_descriptor(pattern);
71+
THEN("It should classify the last : as a descriptor")
72+
{
73+
REQUIRE_FALSE(result);
74+
}
75+
}
76+
}
77+
GIVEN("A string with an almost java prefix and with a descriptor")
78+
{
79+
const std::string pattern = "java:com.diffblue.ClassName.methodName:()V";
80+
WHEN("When calling does_pattern_miss_descriptor")
81+
{
82+
const bool result = does_pattern_miss_descriptor(pattern);
83+
THEN("It should have the discriptor")
84+
{
85+
REQUIRE_FALSE(result);
86+
}
87+
}
88+
}
89+
}
90+
91+
static symbolt create_method_symbol(const std::string &method_name)
92+
{
93+
symbolt new_symbol;
94+
new_symbol.name = method_name;
95+
new_symbol.type = code_typet{{}, nil_typet{}};
96+
return new_symbol;
97+
}
98+
99+
static void require_result_for_pattern(
100+
const std::string &pattern,
101+
const std::vector<irep_idt> &expected,
102+
const symbol_tablet &symbol_table)
103+
{
104+
WHEN("Constructing a load_method_by_regex")
105+
{
106+
const auto matcher = build_load_method_by_regex(pattern);
107+
const auto &results = matcher(symbol_table);
108+
if(expected.size() == 1)
109+
{
110+
THEN("Expect " + id2string(expected[0]))
111+
{
112+
require_vectors_equal_unordered(results, expected);
113+
}
114+
}
115+
else
116+
{
117+
THEN("Expect " + std::to_string(expected.size()) + " symbols")
118+
{
119+
require_vectors_equal_unordered(results, expected);
120+
}
121+
}
122+
}
123+
}
124+
125+
SCENARIO("load_method_by_regex", "[core][java_bytecode][load_method_by_regex]")
126+
{
127+
symbol_tablet symbol_table;
128+
symbol_table.add(create_method_symbol("java::pack.Class.methodName:()V"));
129+
symbol_table.add(create_method_symbol("java::pack.Class.anotherMethod:()V"));
130+
symbol_table.add(create_method_symbol("java::pack.Different.methodName:()V"));
131+
symbol_table.add(create_method_symbol("java::another.Class.methodName:()V"));
132+
133+
GIVEN("A pattern without java prefix, without descriptor, no regex")
134+
{
135+
const std::string pattern = "pack.Class.methodName";
136+
const std::vector<irep_idt> expected = {"java::pack.Class.methodName:()V"};
137+
require_result_for_pattern(pattern, expected, symbol_table);
138+
}
139+
GIVEN("A pattern with java prefix, without descriptor, no regex")
140+
{
141+
const std::string pattern = "java::pack.Class.methodName";
142+
const std::vector<irep_idt> expected = {"java::pack.Class.methodName:()V"};
143+
require_result_for_pattern(pattern, expected, symbol_table);
144+
}
145+
GIVEN("A pattern with java prefix, with descriptor, no regex")
146+
{
147+
const std::string pattern = R"(java::pack.Class.methodName:\(\)V)";
148+
const std::vector<irep_idt> expected = {"java::pack.Class.methodName:()V"};
149+
require_result_for_pattern(pattern, expected, symbol_table);
150+
}
151+
GIVEN("A pattern with java prefix, with wrong descriptor, no regex")
152+
{
153+
const std::string pattern = R"(java::pack.Class.methodName:\(I\)V)";
154+
const std::vector<irep_idt> expected = {};
155+
require_result_for_pattern(pattern, expected, symbol_table);
156+
}
157+
GIVEN("A pattern with java prefix, without descriptor, with regex")
158+
{
159+
const std::string pattern = "java::pack.Class..*";
160+
const std::vector<irep_idt> expected = {
161+
"java::pack.Class.methodName:()V", "java::pack.Class.anotherMethod:()V"};
162+
require_result_for_pattern(pattern, expected, symbol_table);
163+
}
164+
GIVEN("A pattern without java prefix, without descriptor, with regex")
165+
{
166+
const std::string pattern = "pack.Class..*";
167+
const std::vector<irep_idt> expected = {
168+
"java::pack.Class.methodName:()V", "java::pack.Class.anotherMethod:()V"};
169+
require_result_for_pattern(pattern, expected, symbol_table);
170+
}
171+
GIVEN("A pattern without java prefix, with descriptor, with regex in package")
172+
{
173+
const std::string pattern = R"(\w+.Class.methodName:\(\)V)";
174+
const std::vector<irep_idt> expected = {
175+
"java::pack.Class.methodName:()V", "java::another.Class.methodName:()V"};
176+
require_result_for_pattern(pattern, expected, symbol_table);
177+
}
178+
}

0 commit comments

Comments
 (0)