1
1
import os
2
+ import re
3
+ import subprocess
4
+
2
5
import analyser
3
6
import filecmp
4
7
import shutil
7
10
import utility
8
11
import subprocess
9
12
13
+ import xml .dom .minidom
14
+
10
15
class CollectedJavaBinaries :
11
16
def __init__ (self ):
12
17
self .class_files = []
@@ -70,6 +75,88 @@ def _read_info_of_class_files(class_files, configuration, cmdline):
70
75
return classes_info , java_class_info_call_duration
71
76
72
77
78
+ def run_di_model_generation (configuration , cmdline ):
79
+ """
80
+ Runs DI overlay model generation. This attempts to work out what Spring
81
+ DI modelling is being used by the application under analysis and then create
82
+ fake Java code that will simulate what the DI would do at run-time.
83
+
84
+ At this point we should have a configuration file from java-class-info that
85
+ will drive generation.
86
+ """
87
+
88
+ # If we don't have anything vaguely looking like a spring library being used, just skip generation.
89
+ if not next ((lib for lib in cmdline .libraries if re .search ("spring-[a-zA-Z]*-[0-9]*\.[0-9]*" , lib )), None ) \
90
+ or cmdline .skip_di_generation :
91
+ print ("No Spring Framework libraries passed in, skipping DI generation." )
92
+ return
93
+
94
+ di_output_file_path = configuration ["diConfigurationPath" ]
95
+ detected_entry_points_path = configuration ["detectedEntryPointsPath" ]
96
+ di_starting_script = os .path .abspath (os .path .join (os .path .dirname (os .path .realpath (__file__ )), ".." , "env-model-generator" , "built" , "env-model-generator.js" ))
97
+
98
+ # Right now we're only using our metadata generated files, but this should also include XML.
99
+ spring_files = []
100
+ if os .path .exists (di_output_file_path ) and os .path .isfile (di_output_file_path ):
101
+ spring_files .append (di_output_file_path )
102
+
103
+ # Clear down any existing files.
104
+ generated_source_file_path = os .path .join (cmdline .common_dir , "GENERATED_SOURCE" )
105
+ if os .path .exists (generated_source_file_path ):
106
+ os .rmdir (generated_source_file_path )
107
+ os .mkdir (generated_source_file_path )
108
+
109
+ di_generation_commandline = ["node" , di_starting_script , "" .join (spring_files [:1 ]), "--input-file" , " " .join (spring_files [1 :]), "--output-path" , generated_source_file_path , "--entry-points-input-file" , detected_entry_points_path ]
110
+
111
+ print ("Running commandline: " + " " .join (di_generation_commandline ))
112
+ di_generation_result = subprocess .run (di_generation_commandline , stderr = subprocess .STDOUT )
113
+ if di_generation_result .returncode != 0 :
114
+ print ("DI generation failed." )
115
+ return
116
+
117
+ # Quick check to see if we have any output files.
118
+ if not next ((file for root , dirs , files in os .walk (generated_source_file_path ) for file in files ), None ):
119
+ print ("No Java source files emitted by DI generation." )
120
+ return
121
+
122
+ # Clear down yet more existing files.
123
+ java_binaries_path = os .path .join (cmdline .common_dir , "GENERATED_BINARIES" )
124
+ if os .path .exists (java_binaries_path ):
125
+ os .rmdir (java_binaries_path )
126
+ os .mkdir (java_binaries_path )
127
+
128
+ # Combine the incoming libraries with our collected classes, this
129
+ # should hold every reference we need to compile.
130
+ class_paths = cmdline .libraries + [os .path .join (cmdline .common_dir , "collected_classes.jar" )]
131
+
132
+ generated_jar_path = os .path .join (cmdline .common_dir , "DI-models.jar" )
133
+ ant_build_file = ('<project default="compile">'
134
+ '<target name="compile">'
135
+ '<javac includeantruntime="true" srcdir="' + generated_source_file_path + '" destdir="' + java_binaries_path + '">'
136
+ + '<classpath>' + "" .join (["<pathelement location=\" " + path + "\" />" for path in class_paths ]) + '</classpath>' +
137
+ '</javac>'
138
+ '<jar destfile="' + generated_jar_path + '" basedir="' + java_binaries_path + '"/>'
139
+ '</target>'
140
+ '</project>' )
141
+
142
+ # Attempt to prettify output a little.
143
+ ant_build_file = xml .dom .minidom .parseString (ant_build_file ).toprettyxml ()
144
+
145
+ ant_build_path = os .path .join (generated_source_file_path , "build.xml" )
146
+ with open (ant_build_path , 'x' ) as ant_xml_file :
147
+ ant_xml_file .write (ant_build_file )
148
+
149
+ javac_commandline = ["ant" , "-file" , ant_build_path , "-lib" , os .pathsep .join (class_paths )]
150
+
151
+ print ("Running commandline: " + " " .join (javac_commandline ))
152
+ javac_result = subprocess .run (javac_commandline , stderr = subprocess .STDOUT )
153
+ if javac_result .returncode != 0 :
154
+ print ("Java compilation of DI-generated source failed." )
155
+ return
156
+
157
+ return generated_jar_path
158
+
159
+
73
160
def collect_java_binaries (cmdline ):
74
161
with open (cmdline .prepare_scan ) as config_file :
75
162
configuration = json .load (config_file )
@@ -157,9 +244,6 @@ def collect_java_binaries(cmdline):
157
244
print ("WARNING: Unable to find any detected entry points. No analysis will be run." )
158
245
return
159
246
160
- with open (entry_points_file ) as ep_config_file :
161
- ep_config = json .load (ep_config_file )
162
-
163
247
# Copy the current commandline and transpose into a dictionary.
164
248
copied_command_line = {key .replace ("_" , "-" ): val for key , val in vars (cmdline ).items ()}
165
249
@@ -170,14 +254,29 @@ def collect_java_binaries(cmdline):
170
254
java_libraries .classpath_jar_files +
171
255
library_directories )
172
256
257
+ di_output_path = run_di_model_generation (configuration , cmdline )
258
+
259
+ # If DI generation has run we want to target its output jar as our starting point.
260
+ target_binary = java_binaries .jar_file
261
+ if di_output_path :
262
+ class_paths .append (java_binaries .jar_file )
263
+ class_paths .append (di_output_path ) # Appended because we need overlay classes too.
264
+ target_binary = di_output_path
265
+
266
+ with open (entry_points_file ) as ep_config_file :
267
+ ep_config = json .load (ep_config_file )
268
+
173
269
previously_created_folders = set ()
174
270
for ep_data in ep_config ["entryPoints" ]:
175
271
176
272
method_data = ep_data ["method" ]
177
273
178
274
# We don't add the descriptor here as it's not recognized (and makes folder names too long).
179
275
friendly_method_name = ep_data ["className" ] + "." + method_data ["name" ]
180
- raw_method_name = ep_data ["className" ] + "." + method_data ["signature" ]
276
+
277
+ actual_entry_point = \
278
+ "com.diffblue.security.SyntheticEntryPoints." + method_data ["syntheticMethodName" ] if di_output_path \
279
+ else ep_data ["className" ] + "." + method_data ["signature" ]
181
280
182
281
# Try to make sure the folder name isn't invalid.
183
282
folder_name = friendly_method_name .replace (os .path .sep , '.' )
@@ -199,12 +298,12 @@ def collect_java_binaries(cmdline):
199
298
generated_temp_folder = os .path .join (cmdline .temp_dir , folder_name )
200
299
201
300
program_json = {
202
- "jar" : java_binaries . jar_file ,
301
+ "jar" : target_binary ,
203
302
"classpath" : class_paths ,
204
303
# The file should not exist yet. Here we only record the prefered/desired location
205
304
# of the file on disk. Analyser will create it, if it does not exist (in the first run)
206
305
"gbf" : os .path .join (generated_temp_folder , "input_program.gbf" ),
207
- "entry-point" : raw_method_name
306
+ "entry-point" : actual_entry_point
208
307
}
209
308
210
309
program_file = os .path .join (output_folder , "program.json" )
@@ -219,7 +318,7 @@ def collect_java_binaries(cmdline):
219
318
"output-dir" : output_folder ,
220
319
"results-dir" : output_folder ,
221
320
"temp-dir" : generated_temp_folder ,
222
- "entry-point" : raw_method_name
321
+ "entry-point" : actual_entry_point
223
322
})
224
323
225
324
# Save the commandline file.
0 commit comments