Skip to content

Commit cf24638

Browse files
committed
Adds Python method to run DI model generation
1 parent ddc315c commit cf24638

File tree

2 files changed

+117
-7
lines changed

2 files changed

+117
-7
lines changed

driver/mkbench.py

Lines changed: 106 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
import os
2+
import re
3+
import subprocess
4+
25
import analyser
36
import filecmp
47
import shutil
@@ -7,6 +10,8 @@
710
import utility
811
import subprocess
912

13+
import xml.dom.minidom
14+
1015
class CollectedJavaBinaries:
1116
def __init__(self):
1217
self.class_files = []
@@ -70,6 +75,88 @@ def _read_info_of_class_files(class_files, configuration, cmdline):
7075
return classes_info, java_class_info_call_duration
7176

7277

78+
def run_di_model_generation(configuration, cmdline):
79+
"""
80+
Runs DI overlay model generation. This attempts to work out what Spring
81+
DI modelling is being used by the application under analysis and then create
82+
fake Java code that will simulate what the DI would do at run-time.
83+
84+
At this point we should have a configuration file from java-class-info that
85+
will drive generation.
86+
"""
87+
88+
# If we don't have anything vaguely looking like a spring library being used, just skip generation.
89+
if not next((lib for lib in cmdline.libraries if re.search("spring-[a-zA-Z]*-[0-9]*\.[0-9]*", lib)), None) \
90+
or cmdline.skip_di_generation:
91+
print("No Spring Framework libraries passed in, skipping DI generation.")
92+
return
93+
94+
di_output_file_path = configuration["diConfigurationPath"]
95+
detected_entry_points_path = configuration["detectedEntryPointsPath"]
96+
di_starting_script = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", "env-model-generator", "built", "env-model-generator.js"))
97+
98+
# Right now we're only using our metadata generated files, but this should also include XML.
99+
spring_files = []
100+
if os.path.exists(di_output_file_path) and os.path.isfile(di_output_file_path):
101+
spring_files.append(di_output_file_path)
102+
103+
# Clear down any existing files.
104+
generated_source_file_path = os.path.join(cmdline.common_dir, "GENERATED_SOURCE")
105+
if os.path.exists(generated_source_file_path):
106+
os.rmdir(generated_source_file_path)
107+
os.mkdir(generated_source_file_path)
108+
109+
di_generation_commandline = ["node", di_starting_script, "".join(spring_files[:1]), "--input-file", " ".join(spring_files[1:]), "--output-path", generated_source_file_path, "--entry-points-input-file", detected_entry_points_path]
110+
111+
print("Running commandline: " + " ".join(di_generation_commandline))
112+
di_generation_result = subprocess.run(di_generation_commandline, stderr=subprocess.STDOUT)
113+
if di_generation_result.returncode != 0:
114+
print("DI generation failed.")
115+
return
116+
117+
# Quick check to see if we have any output files.
118+
if not next((file for root, dirs, files in os.walk(generated_source_file_path) for file in files), None):
119+
print("No Java source files emitted by DI generation.")
120+
return
121+
122+
# Clear down yet more existing files.
123+
java_binaries_path = os.path.join(cmdline.common_dir, "GENERATED_BINARIES")
124+
if os.path.exists(java_binaries_path):
125+
os.rmdir(java_binaries_path)
126+
os.mkdir(java_binaries_path)
127+
128+
# Combine the incoming libraries with our collected classes, this
129+
# should hold every reference we need to compile.
130+
class_paths = cmdline.libraries + [os.path.join(cmdline.common_dir, "collected_classes.jar")]
131+
132+
generated_jar_path = os.path.join(cmdline.common_dir, "DI-models.jar")
133+
ant_build_file = ('<project default="compile">'
134+
'<target name="compile">'
135+
'<javac includeantruntime="true" srcdir="' + generated_source_file_path + '" destdir="' + java_binaries_path + '">'
136+
+ '<classpath>' + "".join(["<pathelement location=\"" + path + "\"/>" for path in class_paths]) + '</classpath>' +
137+
'</javac>'
138+
'<jar destfile="' + generated_jar_path + '" basedir="' + java_binaries_path + '"/>'
139+
'</target>'
140+
'</project>')
141+
142+
# Attempt to prettify output a little.
143+
ant_build_file = xml.dom.minidom.parseString(ant_build_file).toprettyxml()
144+
145+
ant_build_path = os.path.join(generated_source_file_path, "build.xml")
146+
with open(ant_build_path, 'x') as ant_xml_file:
147+
ant_xml_file.write(ant_build_file)
148+
149+
javac_commandline = ["ant", "-file", ant_build_path, "-lib", os.pathsep.join(class_paths)]
150+
151+
print("Running commandline: " + " ".join(javac_commandline))
152+
javac_result = subprocess.run(javac_commandline, stderr=subprocess.STDOUT)
153+
if javac_result.returncode != 0:
154+
print("Java compilation of DI-generated source failed.")
155+
return
156+
157+
return generated_jar_path
158+
159+
73160
def collect_java_binaries(cmdline):
74161
with open(cmdline.prepare_scan) as config_file:
75162
configuration = json.load(config_file)
@@ -157,9 +244,6 @@ def collect_java_binaries(cmdline):
157244
print("WARNING: Unable to find any detected entry points. No analysis will be run.")
158245
return
159246

160-
with open(entry_points_file) as ep_config_file:
161-
ep_config = json.load(ep_config_file)
162-
163247
# Copy the current commandline and transpose into a dictionary.
164248
copied_command_line = {key.replace("_", "-"): val for key, val in vars(cmdline).items()}
165249

@@ -170,14 +254,29 @@ def collect_java_binaries(cmdline):
170254
java_libraries.classpath_jar_files +
171255
library_directories)
172256

257+
di_output_path = run_di_model_generation(configuration, cmdline)
258+
259+
# If DI generation has run we want to target its output jar as our starting point.
260+
target_binary = java_binaries.jar_file
261+
if di_output_path:
262+
class_paths.append(java_binaries.jar_file)
263+
class_paths.append(di_output_path) # Appended because we need overlay classes too.
264+
target_binary = di_output_path
265+
266+
with open(entry_points_file) as ep_config_file:
267+
ep_config = json.load(ep_config_file)
268+
173269
previously_created_folders = set()
174270
for ep_data in ep_config["entryPoints"]:
175271

176272
method_data = ep_data["method"]
177273

178274
# We don't add the descriptor here as it's not recognized (and makes folder names too long).
179275
friendly_method_name = ep_data["className"] + "." + method_data["name"]
180-
raw_method_name = ep_data["className"] + "." + method_data["signature"]
276+
277+
actual_entry_point = \
278+
"com.diffblue.security.SyntheticEntryPoints." + method_data["syntheticMethodName"] if di_output_path \
279+
else ep_data["className"] + "." + method_data["signature"]
181280

182281
# Try to make sure the folder name isn't invalid.
183282
folder_name = friendly_method_name.replace(os.path.sep, '.')
@@ -199,12 +298,12 @@ def collect_java_binaries(cmdline):
199298
generated_temp_folder = os.path.join(cmdline.temp_dir, folder_name)
200299

201300
program_json = {
202-
"jar": java_binaries.jar_file,
301+
"jar": target_binary,
203302
"classpath": class_paths,
204303
# The file should not exist yet. Here we only record the prefered/desired location
205304
# of the file on disk. Analyser will create it, if it does not exist (in the first run)
206305
"gbf": os.path.join(generated_temp_folder, "input_program.gbf"),
207-
"entry-point": raw_method_name
306+
"entry-point": actual_entry_point
208307
}
209308

210309
program_file = os.path.join(output_folder, "program.json")
@@ -219,7 +318,7 @@ def collect_java_binaries(cmdline):
219318
"output-dir": output_folder,
220319
"results-dir": output_folder,
221320
"temp-dir": generated_temp_folder,
222-
"entry-point": raw_method_name
321+
"entry-point": actual_entry_point
223322
})
224323

225324
# Save the commandline file.

driver/run.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,8 @@ def create_parser():
195195
help="Path to the configuration file used to do the pre-analysis stage")
196196
parser.add_argument("--run-scan", type=str, const='', nargs="?",
197197
help="Flag for whether a security scan should be run. Target will be the results folder.")
198+
parser.add_argument("--skip-di-generation", action='store_true',
199+
help="Turns off DI overlay generation. This overrides the automatic detection of this feature.")
198200

199201
return parser
200202

@@ -432,6 +434,15 @@ def __main():
432434
analyser.get_missing_binary_error_message())
433435
return
434436

437+
# If we've been passed a path with * on the end, unpack the .jar files in
438+
# that directory (and children).
439+
detected_jar_files = [os.path.join(root, file)
440+
for path in cmdline.libraries if not os.path.isdir(path) and path.endswith("*") and os.path.isdir(path.strip('*'))
441+
for root, directories, files in os.walk(path.strip('*'))
442+
for file in files if re.search(".*\.jar$", file)]
443+
444+
cmdline.libraries = detected_jar_files + [path for path in cmdline.libraries if not (not os.path.isdir(path) and path.endswith("*"))]
445+
435446
common_libraries = _get_common_libraries(cmdline.models_library_location)
436447

437448
if cmdline.use_models_library:

0 commit comments

Comments
 (0)