Skip to content

Commit 9f1a37e

Browse files
authored
Merge pull request diffblue#460 from diffblue/enhancement/configuration_generation
[SEC-424] Automatic Entry-point Detection
2 parents e0a000c + 94fcc3b commit 9f1a37e

26 files changed

+1412
-241
lines changed

driver/analyser.py

Lines changed: 12 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -43,86 +43,45 @@ def get_symex_pathname():
4343
return os.path.abspath(os.path.join(_tools_binary_dir, "symex"))
4444

4545

46-
def run_security_analyser(
47-
program_json_file,
48-
transition_rules_file_pathname,
49-
timeout,
50-
dump_html_summaries,
51-
dump_html_statistics,
52-
dump_html_slice,
53-
dump_html_program,
54-
verbosity,
55-
do_not_use_precise_access_paths,
56-
data_flow_insensitive_instrumentation,
57-
results_dir,
58-
temp_root_dir,
59-
lazy_methods_context_sensitive,
60-
verify_csvsa_sparse_domains,
61-
use_goto_binary
62-
):
46+
def run_security_analyser(config_json_file, cmdline):
6347
prof = {}
6448
prof_start_time = time.time()
6549

66-
if not os.path.exists(results_dir):
67-
os.makedirs(results_dir)
68-
69-
# Building the root config file for the security analyser
70-
root_config_json_fname = os.path.abspath(os.path.join(results_dir, "config.json"))
71-
print("Building the root config JSON file for 'goto-analyser': " + root_config_json_fname)
72-
root_config_json = {
73-
"program": program_json_file,
74-
"rules": transition_rules_file_pathname,
75-
"timeout": timeout,
76-
"verbosity": verbosity,
77-
"dump_html_summaries": dump_html_summaries,
78-
"dump_html_statistics": dump_html_statistics,
79-
"dump_html_slice": dump_html_slice,
80-
"dump_html_program": dump_html_program,
81-
"use_goto_binary": use_goto_binary,
82-
"do_not_use_precise_access_paths": do_not_use_precise_access_paths,
83-
"output-dir": "./",
84-
"temp-dir": temp_root_dir,
85-
"data-flow-insensitive-instrumentation": data_flow_insensitive_instrumentation,
86-
"lazy-methods-context-sensitive": lazy_methods_context_sensitive,
87-
"verify-csvsa-sparse-domains": verify_csvsa_sparse_domains
88-
}
89-
with open(root_config_json_fname, "w") as root_config_json_file:
90-
root_config_json_file.write(json.dumps(root_config_json, sort_keys=True, indent=4))
91-
92-
if not os.path.exists(results_dir):
93-
os.makedirs(results_dir)
50+
if not os.path.exists(cmdline.results_dir):
51+
os.makedirs(cmdline.results_dir)
9452

9553
old_cwd = os.getcwd()
96-
os.chdir(results_dir)
54+
os.chdir(cmdline.results_dir)
9755

98-
if use_goto_binary:
56+
if cmdline.use_goto_binary:
57+
program_json_file = os.path.abspath(os.path.join(cmdline.results_dir, "program.json"))
9958
with open(program_json_file, "r") as ifile:
10059
program_info = json.load(ifile)
10160
assert "gbf" in program_info
10261
if not os.path.isfile(program_info["gbf"]):
10362
command = (
10463
get_security_analyser_pathname() + " --security-scanner '" +
105-
root_config_json_fname + "' --output-goto-binary '" +
64+
config_json_file + "' --output-goto-binary '" +
10665
program_info["gbf"] + "' "
10766
)
10867
prof["calling_security_analyser_for_goto_program_generation"] = {}
10968
prof_calling_security_analyser_start_time = time.time()
11069
print("Invoking 'security-analyser' to translate Java to GOTO program.")
111-
if verbosity >= 9:
112-
print("CWD: " + results_dir)
70+
if cmdline.verbosity >= 9:
71+
print("CWD: " + cmdline.results_dir)
11372
print("CMD: " + command)
11473
os.system(command)
11574
prof["calling_security_analyser_for_goto_program_generation"]["duration"] = time.time() - prof_calling_security_analyser_start_time
11675

11776
command = (
11877
get_security_analyser_pathname() + " "
119-
"--security-scanner '" + root_config_json_fname + "' "
78+
"--security-scanner '" + config_json_file + "' "
12079
)
12180
prof["calling_security_analyser"] = {}
12281
prof_calling_security_analyser_start_time = time.time()
12382
print("Invoking 'security-analyser' ...")
124-
if verbosity >= 9:
125-
print("CWD: " + results_dir)
83+
if cmdline.verbosity >= 9:
84+
print("CWD: " + cmdline.results_dir)
12685
print("CMD: " + command)
12786
os.system(command)
12887
prof["calling_security_analyser"]["duration"] = time.time() - prof_calling_security_analyser_start_time

driver/mkbench.py

Lines changed: 106 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -35,20 +35,21 @@ def _find_java_binaries(start_path, java_binaries):
3535
_add_java_binary(full_pathname, java_binaries)
3636

3737

38-
def _read_info_of_class_files(class_files, temp_dir, verbosity):
38+
def _read_info_of_class_files(class_files, configuration, cmdline):
39+
3940
assert isinstance(class_files, list) and all(isinstance(s, str) for s in class_files)
40-
class_list_json_file = os.path.join(temp_dir, "collected_classes.json")
41+
class_list_json_file = configuration["collected_classes_path"]
4142
with open(class_list_json_file, "w") as ofile:
4243
ofile.write(json.dumps(class_files, sort_keys=True, indent=4))
43-
class_info_json_file = os.path.join(temp_dir, "collected_classes_info.json")
44+
class_info_json_file = configuration["collected_classes_info_path"]
45+
4446
os_command = (
4547
analyser.get_java_class_info_tool_pathname() +
46-
" --in-json \"" + class_list_json_file + "\" " +
47-
" --out-json \"" + class_info_json_file + "\""
48+
" --configuration-path \"" + cmdline.prepare_scan + "\""
4849
)
4950
prof_calling_java_class_info_start_time = time.time()
5051
print("Invoking 'java-class-info' ...")
51-
if verbosity >= 9:
52+
if cmdline.verbosity >= 9:
5253
print("CWD: " + os.getcwd())
5354
print("CMD: " + os_command)
5455
os.system(os_command)
@@ -71,29 +72,32 @@ def _read_info_of_class_files(class_files, temp_dir, verbosity):
7172
return classes_info, java_class_info_call_duration
7273

7374

74-
def collect_java_binaries(app_path, list_of_classpaths, entry_point, temp_dir, output_json, verbosity):
75+
def collect_java_binaries(cmdline):
76+
with open(cmdline.prepare_scan) as config_file:
77+
configuration = json.load(config_file)
78+
7579
prof_start_time = time.time()
7680
prof = dict()
7781

78-
if os.path.splitext(app_path)[1].lower() == ".war":
82+
if os.path.splitext(cmdline.input_path)[1].lower() == ".war":
7983
# Loading of WAR files is not supported yet in our front-end, see PR
8084
# https://github.com/diffblue/cbmc/pull/608
8185
# Therefore, we have to unpack them and collect Java binaries.
82-
unpack_dir = os.path.abspath(os.path.join(temp_dir, "WAR_UNPACK"))
86+
unpack_dir = os.path.abspath(os.path.join(cmdline.common_dir, "WAR_UNPACK"))
8387
assert not os.path.exists(unpack_dir)
8488
os.makedirs(unpack_dir)
8589

8690
with utility.PushCwd(unpack_dir):
87-
os.system("jar xf " + app_path)
91+
os.system("jar xf " + cmdline.input_path)
8892

89-
app_path = unpack_dir
93+
cmdline.input_path = unpack_dir
9094

9195
java_binaries = CollectedJavaBinaries()
92-
_find_java_binaries(app_path, java_binaries)
96+
_find_java_binaries(cmdline.input_path, java_binaries)
9397

9498
library_directories = []
9599
java_libraries = CollectedJavaBinaries()
96-
for path in list_of_classpaths:
100+
for path in cmdline.libraries:
97101
if os.path.isdir(path):
98102
_find_java_binaries(path, java_libraries)
99103
library_directories.append(path)
@@ -106,19 +110,17 @@ def collect_java_binaries(app_path, list_of_classpaths, entry_point, temp_dir, o
106110

107111
# First we read packages of all collected class files.
108112
classes_info, java_class_info_call_duration = _read_info_of_class_files(
109-
java_binaries.class_files,
110-
temp_dir,
111-
verbosity
112-
)
113+
java_binaries.class_files, configuration, cmdline)
114+
113115
prof["java-class-info"] = {"duration": java_class_info_call_duration}
114116

115117
assert classes_info is not None and len(classes_info["results"]) != 0
116118

117119
# We copy classes into correct locations for the package structure
118120
# inside the TEMP directory and pack them into a JAR file.
119-
classes_temp_dir = os.path.join(temp_dir, "CLASSES")
120-
assert not os.path.exists(classes_temp_dir)
121-
os.makedirs(classes_temp_dir)
121+
classes_temp_dir = os.path.join(cmdline.common_dir, "CLASSES")
122+
if not os.path.exists(classes_temp_dir):
123+
os.makedirs(classes_temp_dir)
122124

123125
for class_src_pathname, class_file_info in classes_info["results"].items():
124126
class_dst_pathname = os.path.join(classes_temp_dir, class_file_info["name"].replace(".", "/") + ".class")
@@ -135,23 +137,96 @@ def collect_java_binaries(app_path, list_of_classpaths, entry_point, temp_dir, o
135137
# which causes multiple insertions of symbols from models library to the
136138
# symbols table (because 'core-models' library is automatically added to
137139
# each 'languaget' instance.
138-
java_binaries.jar_file = os.path.join(temp_dir, "collected_classes.jar")
140+
java_binaries.jar_file = os.path.join(cmdline.common_dir, "collected_classes.jar")
139141
with utility.PushCwd(classes_temp_dir):
140142
os.system("jar cf \"" + java_binaries.jar_file + "\" .")
141143

142-
# We write the lists of collected Java binaries into the output JSON file.
143-
if not os.path.isdir(os.path.dirname(output_json)):
144-
os.makedirs(os.path.dirname(output_json))
145-
with open(output_json, "w") as ofile:
144+
entry_points_file = configuration["detected_entry_points_path"]
145+
if not os.path.exists(entry_points_file):
146+
print("WARNING: Unable to find any detected entry points. No analysis will be run.")
147+
return
148+
149+
with open(entry_points_file) as ep_config_file:
150+
ep_config = json.load(ep_config_file)
151+
152+
# Copy the current commandline and transpose into a dictionary.
153+
copied_command_line = {key.replace("_", "-"): val for key, val in vars(cmdline).items()}
154+
155+
# Loop over all our detected entry points and create a folder for each.
156+
class_paths = [p for p in java_binaries.classpath_jar_files + java_libraries.classpath_jar_files + library_directories]
157+
for ep_data in ep_config["entry_points"]:
158+
159+
method_data = ep_data["method"]
160+
161+
# We don't add the descriptor here as it's not recognized (and makes folder names too long).
162+
fully_qualified_method = ep_data["class_name"] + "." + method_data["name"]
163+
164+
# Try to make sure the folder name isn't invalid.
165+
folder_name = fully_qualified_method.replace(os.path.sep, '.')
166+
output_folder = os.path.join(cmdline.results_dir, folder_name)
167+
168+
# If our folder exists then just add (#number) to it and try creation again.
169+
incrementor = 1
170+
while os.path.exists(output_folder):
171+
folder_name = fully_qualified_method.replace(os.path.sep, '.') + " (" + str(incrementor) + ")"
172+
output_folder = os.path.join(cmdline.results_dir, folder_name)
173+
incrementor += 1
174+
175+
os.mkdir(output_folder)
176+
177+
generated_temp_folder = os.path.join(cmdline.temp_dir, folder_name)
178+
146179
program_json = {
147180
"jar": java_binaries.jar_file,
148-
"classpath": [p for p in java_binaries.classpath_jar_files + java_libraries.classpath_jar_files + library_directories if os.path.exists(p)],
149-
"gbf": os.path.join(temp_dir, "input_program.gbf"), # The file should not exist yet. Here we only record the prefered/desired location
150-
# of the file on disk. Analyser will create it, if it does not exist (in the first run)
181+
"classpath": class_paths,
182+
# The file should not exist yet. Here we only record the prefered/desired location
183+
# of the file on disk. Analyser will create it, if it does not exist (in the first run)
184+
"gbf": os.path.join(generated_temp_folder, "input_program.gbf"),
185+
"entry-point": fully_qualified_method
186+
}
187+
188+
program_file = os.path.join(output_folder, "program.json")
189+
with open(program_file, "w") as program_json_file:
190+
json.dump(program_json, program_json_file, sort_keys=True, indent=4)
191+
192+
command_line_file = os.path.join(output_folder, "command_line.json")
193+
194+
# Update our copied commandline with accurate / new values.
195+
copied_command_line.update({
196+
"name": "[" + cmdline.name + "] " + fully_qualified_method,
197+
"output-dir": output_folder,
198+
"results-dir": output_folder,
199+
"temp-dir": generated_temp_folder,
200+
"entry-point": fully_qualified_method
201+
})
202+
203+
# Save the commandline file.
204+
with open(command_line_file, "w") as commandline_json_file:
205+
json.dump(copied_command_line, commandline_json_file, sort_keys=True, indent=4)
206+
207+
config_file = os.path.join(output_folder, "config.json")
208+
209+
root_config_json = {
210+
"program": program_file,
211+
"rules": cmdline.config,
212+
"timeout": cmdline.timeout,
213+
"verbosity": cmdline.verbosity,
214+
"dump_html_summaries": cmdline.dump_html_summaries,
215+
"dump_html_statistics": cmdline.dump_html_statistics,
216+
"dump_html_slice": cmdline.dump_html_slice,
217+
"dump_html_program": cmdline.dump_html_program,
218+
"do_not_use_precise_access_paths": cmdline.do_not_use_precise_access_paths,
219+
"output-dir": "./",
220+
"temp-dir": generated_temp_folder,
221+
"data-flow-insensitive-instrumentation": cmdline.data_flow_insensitive_instrumentation,
222+
"lazy-methods-context-sensitive": cmdline.lazy_methods_context_sensitive,
223+
"verify-csvsa-sparse-domains": cmdline.verify_csvsa_sparse_domains,
224+
"use_goto_binary": cmdline.use_goto_binary
151225
}
152-
if entry_point is not None:
153-
program_json["entry-point"] = entry_point
154-
ofile.write(json.dumps(program_json, sort_keys=True, indent=4))
226+
227+
# Save the config file.
228+
with open(config_file, "w") as root_config_json_file:
229+
json.dump(root_config_json, root_config_json_file, sort_keys=True, indent=4)
155230

156231
# Lastly, we complete and return statistics from this stage.
157232
prof["duration"] = time.time() - prof_start_time

driver/presentation.py

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -654,30 +654,6 @@ def build_HTML_interface_to_results_and_statistics(
654654

655655
ofile.write("<p></p>\n")
656656

657-
ofile.write("<table>\n"
658-
"<caption>Performance info. Times are in seconds.</caption>\n"
659-
" <tr>\n"
660-
" <th>Property</th>\n"
661-
" <th>Value</th>\n"
662-
" </tr>\n")
663-
ofile.write(" <tr>\n")
664-
ofile.write(" <td>Duration of collecting Java binaries</td>\n")
665-
ofile.write(" <td align=\"center\">" + "{:.3f}".format(prof["collect_java_binaries"]["duration"]) + "</td>\n")
666-
ofile.write(" </tr>\n")
667-
ofile.write(" <tr>\n")
668-
ofile.write(" <td>Duration of reading info of classes (tool java-class-info)</td>\n")
669-
ofile.write(" <td align=\"center\">" + "{:.3f}".format(prof["collect_java_binaries"]["java-class-info"]["duration"]) + "</td>\n")
670-
ofile.write(" </tr>\n")
671-
ofile.write(" <tr>\n")
672-
ofile.write(" <td>Number of collected CLASS files</td>\n")
673-
ofile.write(" <td align=\"center\">" + str(prof["collect_java_binaries"]["num_classes"]) + "</td>\n")
674-
ofile.write(" </tr>\n")
675-
ofile.write(" <tr>\n")
676-
ofile.write(" <td>Number of collected JAR files for classpath</td>\n")
677-
ofile.write(" <td align=\"center\">" + str(prof["collect_java_binaries"]["num_classpath_jar_files"]) + "</td>\n")
678-
ofile.write(" </tr>\n")
679-
ofile.write("</table>\n")
680-
681657
if cmdline.dump_html_program:
682658
original_program_root_html_filename = os.path.join(cmdline.results_dir,"goto-program","HTML","index.html")
683659
ofile.write("<p>\n")

0 commit comments

Comments
 (0)