Skip to content

Commit 921844c

Browse files
authored
Merge pull request diffblue#471 from diffblue/stage_1_performance_over_all_apps
SEC-499: Making time&memory performance scatter plots of Stage 1 over many apps.
2 parents 3b9e247 + 0d558d1 commit 921844c

File tree

2 files changed

+231
-0
lines changed

2 files changed

+231
-0
lines changed

scripts/collect_stats.py

+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import argparse
2+
import os
3+
import json
4+
5+
6+
def _parse_cmd_line():
7+
parser = argparse.ArgumentParser(
8+
description="Searches result directories computed by security-analyser for web apps and "
9+
"collects performance data. The script provides an input to the script "
10+
"'make_performace_scatter_plots.py'.")
11+
parser.add_argument("input", type=str,
12+
help="A root directory under which are stored results of the web apps.")
13+
parser.add_argument("output", type=str,
14+
help="A path-name of the output JSON file.")
15+
return parser.parse_args()
16+
17+
18+
def _main(cmdline):
19+
if not os.path.isdir(cmdline.input):
20+
print("ERROR: The input path is not an existing directory.")
21+
return
22+
result = {}
23+
for root, _, file_names in os.walk(cmdline.input):
24+
is_other = False
25+
xroot = root
26+
for dirname in ["JSON", "statistics", "RESULTS"]:
27+
xroot, tail = os.path.split(xroot)
28+
if tail != dirname:
29+
is_other = True
30+
break
31+
full_pathname = os.path.abspath(os.path.join(root, "statistics_security_analyser.json"))
32+
if not is_other and os.path.isfile(full_pathname):
33+
assert os.path.basename(xroot) not in result
34+
with open(full_pathname, "r") as ifile:
35+
stats = json.load(ifile)
36+
assert "table-files" in stats
37+
38+
num_locations = 0
39+
for record in stats["table-files"]:
40+
assert "functions" in record
41+
for func in record["functions"]:
42+
assert "num-locations" in func
43+
num_locations += func["num-locations"]
44+
assert "table-phases" in stats
45+
total_time = 0.0
46+
for _, time in stats["table-phases"].items():
47+
total_time += time
48+
benchmark_name = os.path.basename(xroot)
49+
webgoat_lessons = [
50+
"Assignment5",
51+
"Assignment6",
52+
"BlindSendFileAssignment",
53+
"CrossSiteScriptingLesson5a",
54+
"SimpleXXE",
55+
"SqlInjectionChallenge",
56+
"SqlInjectionLesson12a",
57+
"SqlInjectionLesson5a",
58+
"SqlInjectionLesson5b",
59+
"SqlInjectionLesson6a",
60+
]
61+
result[benchmark_name] = {
62+
"category": benchmark_name if benchmark_name not in webgoat_lessons else "WebGoat",
63+
"num_goto_program_locations": num_locations,
64+
"time_in_seconds": total_time,
65+
"memory_in_mega_bytes": 0,
66+
}
67+
with open(cmdline.output, "w") as ofile:
68+
ofile.write(json.dumps(result, sort_keys=True, indent=4))
69+
70+
71+
if __name__ == "__main__":
72+
_main(_parse_cmd_line())
+159
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
import argparse
2+
import os
3+
import json
4+
import matplotlib.pyplot as plt
5+
import matplotlib.lines as mlines
6+
import numpy
7+
import random
8+
9+
10+
def _parse_cmd_line():
11+
parser = argparse.ArgumentParser(
12+
description="Makes scatter plots from performance data collected by the script 'collect_stats.py'.")
13+
parser.add_argument("-V","--version", action="store_true",
14+
help="Prints a version string.")
15+
parser.add_argument("input", type=str,
16+
help="A path-name of a JSON file with input data (i.e. output from the script 'collect_stats.py').")
17+
parser.add_argument("output", type=str,
18+
help="A directory under which the plots will be stored.")
19+
parser.add_argument("-F", "--format", type=str, default="svg",
20+
help="A directory under which the plots will be stored. Possible values are: svg, png, pdf, ps, eps.")
21+
return parser.parse_args()
22+
23+
24+
def get_predefined_colour_names():
25+
return [
26+
"blue",
27+
"green",
28+
"red",
29+
"cyan",
30+
"magenta",
31+
"orange",
32+
"black",
33+
"brown",
34+
"navy",
35+
"khaki",
36+
"olive",
37+
"pink",
38+
"violet",
39+
"purple",
40+
"yellow",
41+
"salmon",
42+
]
43+
44+
45+
def choose_colour(colour_index=0):
46+
if colour_index < len(get_predefined_colour_names()):
47+
return get_predefined_colour_names()[colour_index], colour_index + 1
48+
return (random.uniform(0.0, 0.75), random.uniform(0.0, 0.75), random.uniform(0.0, 0.75)), colour_index
49+
50+
51+
def make_scatter_plot(
52+
pathname,
53+
format,
54+
point_groups,
55+
title=None,
56+
xaxis_name=None,
57+
faxis_name=None,
58+
xaxis_log=False,
59+
faxis_log=False,
60+
draw_diagonal=False,
61+
draw_fitline=False,
62+
add_legend=False,
63+
size_xy=None,
64+
dpi=None
65+
):
66+
assert isinstance(pathname, str) and len(pathname) > 0
67+
assert title is None or isinstance(title, str)
68+
assert xaxis_name is None or isinstance(xaxis_name, str)
69+
assert faxis_name is None or isinstance(faxis_name, str)
70+
assert size_xy is None or (isinstance(size_xy, tuple) and len(size_xy) == 2)
71+
assert dpi is None or isinstance(dpi, int)
72+
if dpi is None:
73+
dpi = 100
74+
os.makedirs(os.path.dirname(pathname), exist_ok=True)
75+
fig = plt.figure(figsize=size_xy, dpi=dpi)
76+
ax = fig.gca()
77+
if title:
78+
ax.set_title(title)
79+
if xaxis_name:
80+
ax.set_xlabel(xaxis_name)
81+
if faxis_name:
82+
ax.set_ylabel(faxis_name)
83+
if xaxis_log:
84+
ax.set_xscale('log')
85+
if faxis_log:
86+
ax.set_yscale('symlog')
87+
ax.grid(True, linestyle='dotted')
88+
all_xs = []
89+
all_ys = []
90+
idx = 0
91+
for group in sorted(point_groups.keys()):
92+
points = point_groups[group]
93+
colour, idx = choose_colour(idx)
94+
xs = []
95+
ys = []
96+
for x, y in points:
97+
xs.append(x)
98+
ys.append(y)
99+
ax.scatter(xs, ys, marker="o", color=colour, label=group)
100+
all_xs += xs
101+
all_ys += ys
102+
ax.legend()
103+
if draw_diagonal:
104+
line = mlines.Line2D([0, 1], [0, 1], color=("blue" if draw_fitline else "red"))
105+
line.set_transform(ax.transAxes)
106+
ax.add_line(line)
107+
if draw_fitline:
108+
line_coefs = numpy.polyfit(all_xs, all_ys, 1)
109+
x_lo = min(all_xs)
110+
x_hi = max(all_xs)
111+
n_steps = 1000
112+
dx = (x_hi - x_lo) / n_steps
113+
lxs = sorted(all_xs + [x_lo + t * dx for t in range(n_steps + 1)])
114+
lys = [line_coefs[0] * x + line_coefs[1] for x in lxs]
115+
ax.plot(lxs, lys, "k:")
116+
fig.savefig(pathname, bbox_inches='tight', format=format)
117+
118+
119+
def _main(cmdline):
120+
with open(cmdline.input, "r") as ifile:
121+
stats = json.load(ifile)
122+
123+
time_points = {}
124+
memory_points = {}
125+
for _, data in stats.items():
126+
category = "" if "category" not in data else data["category"]
127+
if category not in time_points:
128+
time_points[category] = []
129+
time_points[category].append((data["num_goto_program_locations"], data["time_in_seconds"]))
130+
if category not in memory_points:
131+
memory_points[category] = []
132+
memory_points[category].append((data["num_goto_program_locations"], data["memory_in_mega_bytes"]))
133+
134+
fname_prefix = "security-analyser_stage1_"
135+
136+
make_scatter_plot(
137+
os.path.join(cmdline.output, fname_prefix + "time_perf." + cmdline.format),
138+
cmdline.format,
139+
time_points,
140+
"Time performance of Stage 1 of the security-analyser",
141+
"goto-program locations",
142+
"seconds",
143+
draw_fitline=True,
144+
add_legend=True
145+
)
146+
make_scatter_plot(
147+
os.path.join(cmdline.output, fname_prefix + "memory_perf." + cmdline.format),
148+
cmdline.format,
149+
memory_points,
150+
"Memory performance of Stage 1 of the security-analyser",
151+
"goto-program locations",
152+
"MB",
153+
draw_fitline=True,
154+
add_legend=True
155+
)
156+
157+
158+
if __name__ == "__main__":
159+
_main(_parse_cmd_line())

0 commit comments

Comments
 (0)