55
55
ATTR_TEARDOWN = '__teardown__'
56
56
57
57
58
+ # ----------------------------------------------------------------------------------------------------------------------
59
+ #
60
+ # the CUSUM method adapted for warmup detection within a given threshold (initial iterations)
61
+ #
62
+ # ----------------------------------------------------------------------------------------------------------------------
63
+ def zeros (n ):
64
+ return [0 for _ in range (n )]
65
+
66
+
67
+ def append (arr , val ):
68
+ if isinstance (arr , list ):
69
+ return arr + [val ]
70
+ else :
71
+ return [val ] + arr
72
+
73
+
74
+ def cusum (values , threshold = 1.0 , drift = 0.0 ):
75
+ csum_pos , csum_neg = zeros (len (values )), zeros (len (values ))
76
+ change_points = []
77
+ for i in range (1 , len (values )):
78
+ diff = values [i ] - values [i - 1 ]
79
+ csum_pos [i ] = csum_pos [i - 1 ] + diff - drift
80
+ csum_neg [i ] = csum_neg [i - 1 ] - diff - drift
81
+
82
+ if csum_pos [i ] < 0 :
83
+ csum_pos [i ] = 0
84
+ if csum_neg [i ] < 0 :
85
+ csum_neg [i ] = 0
86
+
87
+ if csum_pos [i ] > threshold or csum_neg [i ] > threshold :
88
+ change_points = append (change_points , i )
89
+ csum_pos [i ], csum_neg [i ] = 0 , 0
90
+
91
+ return change_points
92
+
93
+
94
+ def avg (values ):
95
+ return float (sum (values )) / len (values )
96
+
97
+
98
+ def norm (values ):
99
+ _max , _min = max (values ), min (values )
100
+ return [float (v - _min ) / (_max - _min ) * 100.0 for v in values ]
101
+
102
+
103
+ def pairwise_slopes (values , cp ):
104
+ return [abs (float (values [i + 1 ] - values [i ]) / float (cp [i + 1 ] - cp [i ])) for i in range (len (values )- 1 )]
105
+
106
+
107
+ def last_n_percent_runs (values , n = 0.1 ):
108
+ assert 0.0 < n <= 1.0
109
+ end_runs_idx = len (values ) - int (len (values ) * n )
110
+ end_runs_idx = len (values ) - 1 if end_runs_idx >= len (values ) else end_runs_idx
111
+ return values [end_runs_idx :], list (range (end_runs_idx , len (values )))
112
+
113
+
114
+ def first_n_percent_runs (values , n = 0.1 ):
115
+ assert 0.0 < n <= 1.0
116
+ first_run_idx = int (len (values ) * n )
117
+ return first_run_idx - 1 if first_run_idx == len (values ) else first_run_idx
118
+
119
+
120
+ def detect_warmup (values , cp_threshold = 0.03 , stability_slope_grade = 0.01 ):
121
+ """
122
+ detect the point of warmup point (iteration / run)
123
+
124
+ :param values: the durations for each run
125
+ :param cp_threshold: the percent in value difference for a point to be considered a change point (percentage)
126
+ :param stability_slope_grade: the slope grade (percentage). A grade of 1% corresponds to a slope of 0.5 degrees
127
+ :return: the change point or -1 if not detected
128
+ """
129
+ # normalize all
130
+ stability_slope_grade *= 100.0
131
+ cp_threshold *= 100
132
+ values = norm (values )
133
+
134
+ try :
135
+ cp = cusum (values , threshold = cp_threshold )
136
+ rolling_avg = [avg (values [i :]) for i in cp ]
137
+
138
+ def warmup (cp_index ):
139
+ val_idx = cp [cp_index ] + 1
140
+ return val_idx if val_idx < len (values ) else - 1
141
+
142
+ # find the point where the duration avg is below the cp threshold
143
+ for i , d in enumerate (rolling_avg ):
144
+ if d <= cp_threshold :
145
+ return warmup (i )
146
+
147
+ # could not find something below the CP threshold (noise in the data), use the stabilisation of slopes
148
+ last_n_vals , last_n_idx = last_n_percent_runs (values , 0.1 )
149
+ slopes = pairwise_slopes (rolling_avg + last_n_vals , cp + last_n_idx )
150
+
151
+ for i , d in enumerate (slopes ):
152
+ if d <= stability_slope_grade :
153
+ return warmup (i )
154
+
155
+ return - 1
156
+ except Exception as e :
157
+ print ("exception occurred while detecting warmup: %s" % e )
158
+ return - 1
159
+
160
+
58
161
def ccompile (name , code ):
59
162
from importlib import invalidate_caches
60
163
from distutils .core import setup , Extension
@@ -89,7 +192,14 @@ def _as_int(value):
89
192
90
193
91
194
class BenchRunner (object ):
92
- def __init__ (self , bench_file , bench_args = None , iterations = 1 , warmup = 0 ):
195
+ def __init__ (self , bench_file , bench_args = None , iterations = 1 , warmup = - 1 , warmup_runs = 0 ):
196
+ assert isinstance (iterations , int ), \
197
+ "BenchRunner iterations argument must be an int, got %s instead" % iterations
198
+ assert isinstance (warmup , int ), \
199
+ "BenchRunner warmup argument must be an int, got %s instead" % warmup
200
+ assert isinstance (warmup_runs , int ), \
201
+ "BenchRunner warmup_runs argument must be an int, got %s instead" % warmup_runs
202
+
93
203
if bench_args is None :
94
204
bench_args = []
95
205
self .bench_module = BenchRunner .get_bench_module (bench_file )
@@ -98,10 +208,8 @@ def __init__(self, bench_file, bench_args=None, iterations=1, warmup=0):
98
208
_iterations = _as_int (iterations )
99
209
self ._run_once = _iterations <= 1
100
210
self .iterations = 1 if self ._run_once else _iterations
101
-
102
- assert isinstance (self .iterations , int )
103
- self .warmup = _as_int (warmup )
104
- assert isinstance (self .warmup , int )
211
+ self .warmup_runs = warmup_runs if warmup_runs > 0 else 0
212
+ self .warmup = warmup if warmup > 0 else - 1
105
213
106
214
@staticmethod
107
215
def get_bench_module (bench_file ):
@@ -139,9 +247,10 @@ def _call_attr(self, attr_name, *args):
139
247
140
248
def run (self ):
141
249
if self ._run_once :
142
- print ("### %s, exactly one iteration (no warmup curves)" % ( self .bench_module .__name__ ) )
250
+ print ("### %s, exactly one iteration (no warmup curves)" % self .bench_module .__name__ )
143
251
else :
144
- print ("### %s, %s warmup iterations, %s bench iterations " % (self .bench_module .__name__ , self .warmup , self .iterations ))
252
+ print ("### %s, %s warmup iterations, %s bench iterations " % (self .bench_module .__name__ ,
253
+ self .warmup_runs , self .iterations ))
145
254
146
255
# process the args if the processor function is defined
147
256
args = self ._call_attr (ATTR_PROCESS_ARGS , * self .bench_args )
@@ -159,9 +268,9 @@ def run(self):
159
268
bench_func = self ._get_attr (ATTR_BENCHMARK )
160
269
durations = []
161
270
if bench_func and hasattr (bench_func , '__call__' ):
162
- if self .warmup :
163
- print ("### warming up for %s iterations ... " % self .warmup )
164
- for _ in range (self .warmup ):
271
+ if self .warmup_runs :
272
+ print ("### (pre) warming up for %s iterations ... " % self .warmup_runs )
273
+ for _ in range (self .warmup_runs ):
165
274
bench_func (* args )
166
275
167
276
for iteration in range (self .iterations ):
@@ -173,21 +282,46 @@ def run(self):
173
282
if self ._run_once :
174
283
print ("@@@ name=%s, duration=%s" % (self .bench_module .__name__ , duration_str ))
175
284
else :
176
- print ("### iteration=%s, name=%s, duration=%s" % (iteration , self .bench_module .__name__ , duration_str ))
285
+ print ("### iteration=%s, name=%s, duration=%s" % (iteration , self .bench_module .__name__ ,
286
+ duration_str ))
177
287
178
288
print (_HRULE )
179
289
print ("### teardown ... " )
180
290
self ._call_attr (ATTR_TEARDOWN )
181
291
print ("### benchmark complete" )
182
292
print (_HRULE )
183
- print ("### BEST duration: %.3f s" % min (durations ))
184
- print ("### WORST duration: %.3f s" % max (durations ))
185
- print ("### AVG duration: %.3f" % (sum (durations ) / len (durations )))
293
+
294
+ # summary
295
+ if self ._run_once :
296
+ print ("### SINGLE RUN duration: %.3f s" % durations [0 ])
297
+ else :
298
+ print ("### BEST duration: %.3f s" % min (durations ))
299
+ print ("### WORST duration: %.3f s" % max (durations ))
300
+ print ("### AVG (all runs) duration: %.3f s" % (sum (durations ) / len (durations )))
301
+ warmup_iter = self .warmup if self .warmup > 0 else detect_warmup (durations )
302
+ # if we cannot detect a warmup starting point but we performed some pre runs, we take a starting point
303
+ # after the 10% of the first runs ...
304
+ if warmup_iter < 0 and self .warmup_runs > 0 :
305
+ print ("### warmup could not be detected, but %s pre-runs were executed.\n "
306
+ "### we assume the benchmark is warmed up and pick an iteration "
307
+ "in the first 10%% of the runs" % self .warmup_runs )
308
+ warmup_iter = first_n_percent_runs (durations , 0.1 )
309
+
310
+ if warmup_iter > 0 :
311
+ print ("### WARMUP %s at iteration: %d" % ("specified" if self .warmup > 0 else "detected" , warmup_iter ))
312
+ no_warmup_durations = durations [warmup_iter :]
313
+ print ("### AVG (no warmup) duration: %.3f s" % (sum (no_warmup_durations ) / len (no_warmup_durations )))
314
+ else :
315
+ print ("### WARMUP iteration not specified or could not be detected" )
316
+
317
+ print (_HRULE )
318
+ print ("### RAW DURATIONS: %s" % str (durations ))
186
319
print (_HRULE )
187
320
188
321
189
322
def run_benchmark (args ):
190
- warmup = 0
323
+ warmup = - 1
324
+ warmup_runs = 0
191
325
iterations = 1
192
326
bench_file = None
193
327
bench_args = []
@@ -208,6 +342,12 @@ def run_benchmark(args):
208
342
elif arg .startswith ("--warmup" ):
209
343
warmup = _as_int (arg .split ("=" )[1 ])
210
344
345
+ elif arg == '-r' :
346
+ i += 1
347
+ warmup_runs = _as_int (args [i ])
348
+ elif arg .startswith ("--warmup-runs" ):
349
+ warmup_runs = _as_int (arg .split ("=" )[1 ])
350
+
211
351
elif arg == '-p' :
212
352
i += 1
213
353
paths = args [i ].split ("," )
@@ -229,7 +369,7 @@ def run_benchmark(args):
229
369
else :
230
370
print ("### no extra module search paths specified" )
231
371
232
- BenchRunner (bench_file , bench_args = bench_args , iterations = iterations , warmup = warmup ).run ()
372
+ BenchRunner (bench_file , bench_args = bench_args , iterations = iterations , warmup = warmup , warmup_runs = warmup_runs ).run ()
233
373
234
374
235
375
if __name__ == '__main__' :
0 commit comments