Skip to content

Commit c727a62

Browse files
authored
Merge pull request diffblue#542 from diffblue/jd/feature/DI_tool_pipeline_integration
[SEC-239] DI generation pipeline integration
2 parents e377666 + 57ee08a commit c727a62

File tree

7 files changed

+164
-9
lines changed

7 files changed

+164
-9
lines changed

CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -112,3 +112,4 @@ add_custom_target(models-library ALL
112112
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/LIBRARIES/models/model
113113
)
114114

115+
add_subdirectory(env-model-generator)

driver/mkbench.py

+119-7
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
import os
2+
import re
3+
import sys
4+
25
import analyser
36
import filecmp
47
import shutil
@@ -70,6 +73,101 @@ def _read_info_of_class_files(class_files, configuration, cmdline):
7073
return classes_info, java_class_info_call_duration
7174

7275

76+
def run_di_model_generation(configuration, cmdline):
77+
"""
78+
Runs DI overlay model generation. This attempts to work out what Spring
79+
DI modelling is being used by the application under analysis and then create
80+
fake Java code that will simulate what the DI would do at run-time.
81+
82+
At this point we should have a configuration file from java-class-info that
83+
will drive generation.
84+
"""
85+
86+
# If we don't have anything vaguely looking like a spring library being used, just skip generation.
87+
if not next((lib for lib in cmdline.libraries if re.search("spring-[a-zA-Z]*-[0-9]*\.[0-9]*", lib)), None) \
88+
or cmdline.skip_di_generation:
89+
print("No Spring Framework libraries passed in, cannot generate any DI-related code.", file=sys.stderr)
90+
return None
91+
92+
di_output_file_path = configuration["diConfigurationPath"]
93+
detected_entry_points_path = configuration["detectedEntryPointsPath"]
94+
di_starting_script = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", "env-model-generator", "env-model-generator"))
95+
96+
# Right now we're only using our metadata generated files, but this should also include XML.
97+
spring_files = []
98+
if os.path.isfile(di_output_file_path):
99+
spring_files.append(di_output_file_path)
100+
101+
# Clear down any existing files.
102+
generated_source_file_path = os.path.join(cmdline.common_dir, "GENERATED_SOURCE")
103+
if os.path.exists(generated_source_file_path):
104+
os.rmdir(generated_source_file_path)
105+
os.mkdir(generated_source_file_path)
106+
107+
primary_input_file = "".join(spring_files[:1])
108+
if not os.path.isfile(primary_input_file):
109+
print("No files available to generate DI overlays.")
110+
return None
111+
112+
di_generation_commandline = [di_starting_script, primary_input_file, "--output-path", generated_source_file_path, "--entry-points-input-file", detected_entry_points_path]
113+
114+
additional_spring_files = spring_files[1:]
115+
if additional_spring_files:
116+
di_generation_commandline.extend(["--input-file"] + additional_spring_files)
117+
118+
di_generation_result = utility.call(di_generation_commandline, "DI model generator", cmdline.verbosity, stderr=subprocess.STDOUT)
119+
if di_generation_result != 0:
120+
return None
121+
122+
# Quick check to see if we have any output files.
123+
if not next((file for root, dirs, files in os.walk(generated_source_file_path) for file in files), None):
124+
print("No Java source files emitted by DI generation.")
125+
return None
126+
127+
# Clear down yet more existing files.
128+
java_binaries_path = os.path.join(cmdline.common_dir, "GENERATED_BINARIES")
129+
if os.path.exists(java_binaries_path):
130+
os.rmdir(java_binaries_path)
131+
os.mkdir(java_binaries_path)
132+
133+
# Combine the incoming libraries with our collected classes, this
134+
# should hold every reference we need to compile.
135+
class_paths = cmdline.libraries + [os.path.join(cmdline.common_dir, "collected_classes.jar")]
136+
137+
generated_jar_path = os.path.join(cmdline.common_dir, "DI-models.jar")
138+
139+
ant_format_arguments = {
140+
"generated_source_file_path": generated_source_file_path,
141+
"class_paths": os.linesep.join([" <pathelement location=\"" + path + "\"/>" for path in class_paths]),
142+
"generated_jar_path": generated_jar_path,
143+
"java_binaries_path": java_binaries_path
144+
}
145+
146+
ant_build_statement = """
147+
<project default="compile">
148+
<target name="compile">
149+
<javac includeantruntime="true" srcdir="{generated_source_file_path}" destdir="{java_binaries_path}">
150+
<classpath>
151+
{class_paths}
152+
</classpath>
153+
</javac>
154+
<jar destfile="{generated_jar_path}" basedir="{java_binaries_path}"/>
155+
</target>
156+
</project>
157+
""".format(**ant_format_arguments)
158+
159+
ant_build_path = os.path.join(generated_source_file_path, "build.xml")
160+
with open(ant_build_path, 'x') as ant_xml_file:
161+
ant_xml_file.write(ant_build_statement)
162+
163+
javac_result = utility.call(["ant", "-file", ant_build_path], "Java Compiler for DI model overlays", cmdline.verbosity, stderr=subprocess.STDOUT)
164+
if javac_result != 0:
165+
print("Java compilation of DI-generated source failed.")
166+
return None
167+
168+
return generated_jar_path
169+
170+
73171
def collect_java_binaries(cmdline):
74172
with open(cmdline.prepare_scan) as config_file:
75173
configuration = json.load(config_file)
@@ -157,9 +255,6 @@ def collect_java_binaries(cmdline):
157255
print("WARNING: Unable to find any detected entry points. No analysis will be run.")
158256
return
159257

160-
with open(entry_points_file) as ep_config_file:
161-
ep_config = json.load(ep_config_file)
162-
163258
# Copy the current commandline and transpose into a dictionary.
164259
copied_command_line = {key.replace("_", "-"): val for key, val in vars(cmdline).items()}
165260

@@ -170,14 +265,31 @@ def collect_java_binaries(cmdline):
170265
java_libraries.classpath_jar_files +
171266
library_directories)
172267

268+
di_output_path = run_di_model_generation(configuration, cmdline)
269+
270+
# If DI generation has run we want to target its output jar as our starting point.
271+
target_binary = java_binaries.jar_file
272+
if di_output_path:
273+
class_paths.append(target_binary)
274+
class_paths.append(di_output_path) # Appended because we need overlay classes too.
275+
target_binary = di_output_path
276+
else:
277+
print("DI generation failed, continuing without synthetic entry points.", file=sys.stderr)
278+
279+
with open(entry_points_file) as ep_config_file:
280+
ep_config = json.load(ep_config_file)
281+
173282
previously_created_folders = set()
174283
for ep_data in ep_config["entryPoints"]:
175284

176285
method_data = ep_data["method"]
177286

178287
# We don't add the descriptor here as it's not recognized (and makes folder names too long).
179288
friendly_method_name = ep_data["className"] + "." + method_data["name"]
180-
raw_method_name = ep_data["className"] + "." + method_data["signature"]
289+
290+
actual_entry_point = \
291+
"com.diffblue.security.SyntheticEntryPoints." + method_data["syntheticMethodName"] if di_output_path \
292+
else ep_data["className"] + "." + method_data["signature"]
181293

182294
# Try to make sure the folder name isn't invalid.
183295
folder_name = friendly_method_name.replace(os.path.sep, '.')
@@ -199,12 +311,12 @@ def collect_java_binaries(cmdline):
199311
generated_temp_folder = os.path.join(cmdline.temp_dir, folder_name)
200312

201313
program_json = {
202-
"jar": java_binaries.jar_file,
314+
"jar": target_binary,
203315
"classpath": class_paths,
204316
# The file should not exist yet. Here we only record the prefered/desired location
205317
# of the file on disk. Analyser will create it, if it does not exist (in the first run)
206318
"gbf": os.path.join(generated_temp_folder, "input_program.gbf"),
207-
"entry-point": raw_method_name
319+
"entry-point": actual_entry_point
208320
}
209321

210322
program_file = os.path.join(output_folder, "program.json")
@@ -219,7 +331,7 @@ def collect_java_binaries(cmdline):
219331
"output-dir": output_folder,
220332
"results-dir": output_folder,
221333
"temp-dir": generated_temp_folder,
222-
"entry-point": raw_method_name
334+
"entry-point": actual_entry_point
223335
})
224336

225337
# Save the commandline file.

driver/run.py

+18
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,8 @@ def create_parser():
195195
help="Path to the configuration file used to do the pre-analysis stage")
196196
parser.add_argument("--run-scan", type=str, const='', nargs="?",
197197
help="Flag for whether a security scan should be run. Target will be the results folder.")
198+
parser.add_argument("--skip-di-generation", action='store_true',
199+
help="Turns off DI overlay generation. This overrides the automatic detection of DI usage.")
198200

199201
return parser
200202

@@ -432,6 +434,22 @@ def __main():
432434
analyser.get_missing_binary_error_message())
433435
return
434436

437+
def extract_wildcard_paths(path):
438+
"""
439+
If our path is a directory and has a trailing * that isn't related
440+
to any concrete item, recursively find all .jar files in that folder
441+
and its children.
442+
"""
443+
if not os.path.exists(path) and path.endswith("*") and os.path.isdir(path.strip('*')):
444+
return [os.path.join(root, file)
445+
for root, directories, files in os.walk(path.strip('*'))
446+
for file in files if re.search(".*\.jar$", file)]
447+
else:
448+
return [path]
449+
450+
# Transform incoming libraries.
451+
cmdline.libraries = [path for library_path in cmdline.libraries for path in extract_wildcard_paths(library_path)]
452+
435453
common_libraries = _get_common_libraries(cmdline.models_library_location)
436454

437455
if cmdline.use_models_library:

env-model-generator/CMakeLists.txt

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
project(SECURITY_ANALYSER)
2+
3+
file(GLOB_RECURSE typescript_files "${CMAKE_CURRENT_SOURCE_DIR}/src/*.ts")
4+
5+
string(REGEX REPLACE "\\.ts($|;)" ".js\\1" javascript_files "${typescript_files}")
6+
string(REPLACE "/src/" "/built/" javascript_files "${javascript_files}")
7+
8+
add_custom_target(env-model-generator ALL
9+
DEPENDS ${javascript_files}
10+
)
11+
12+
add_custom_command(
13+
COMMENT "Recompiling env-model-generator."
14+
OUTPUT ${javascript_files}
15+
DEPENDS ${typescript_files};package.json
16+
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
17+
COMMAND yarn install
18+
COMMAND yarn run build
19+
)
+3-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
#!/bin/bash
2-
chmod a+x built/env-model-generator.js
3-
built/env-model-generator.js "$@"
2+
parent_path=$(dirname $(readlink -f $0))
3+
chmod a+x "$parent_path/built/env-model-generator.js"
4+
"$parent_path/built/env-model-generator.js" "$@"

src/java-class-info/entry_point.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <java_bytecode/java_types.h>
1414

1515
#include <boost/algorithm/string/predicate.hpp>
16+
#include <boost/algorithm/string/replace.hpp>
1617

1718
entry_pointt::entry_pointt(
1819
const std::string &class_name,
@@ -59,6 +60,8 @@ jsont entry_pointt::to_json()
5960
method_obj[json_namest::return_type] = json_stringt(method.return_type);
6061
method_obj[json_namest::name] = json_stringt(method.name);
6162
method_obj[json_namest::signature] = json_stringt(method.signature);
63+
method_obj[json_namest::synthetic_method_name] = json_stringt(
64+
boost::replace_all_copy(class_name, ".", "_") + "_" + method.name);
6265

6366
return json_entry_point;
6467
}

src/java-class-info/entry_point.h

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ class entry_pointt
2121
{
2222
public:
2323
static constexpr const char *name = "name";
24+
static constexpr const char *synthetic_method_name = "syntheticMethodName";
2425
static constexpr const char *signature = "signature";
2526
static constexpr const char *type = "type";
2627
static constexpr const char *arguments = "arguments";

0 commit comments

Comments
 (0)