37
37
import random
38
38
import numpy as np
39
39
40
+ import pandas as pd
40
41
from pandas import DataFrame , Series
41
42
42
43
from suite import REPO_PATH
43
-
44
+ VB_DIR = os . path . dirname ( os . path . abspath ( __file__ ))
44
45
DEFAULT_MIN_DURATION = 0.01
45
46
HEAD_COL = "head[ms]"
46
47
BASE_COL = "base[ms]"
47
48
49
+ try :
50
+ import git # gitpython
51
+ except Exception :
52
+ print ("Error: Please install the `gitpython` package\n " )
53
+ sys .exit (1 )
48
54
49
55
class RevParseAction (argparse .Action ):
50
56
def __call__ (self , parser , namespace , values , option_string = None ):
@@ -66,6 +72,14 @@ def __call__(self, parser, namespace, values, option_string=None):
66
72
parser .add_argument ('-t' , '--target-commit' ,
67
73
help = 'The commit to compare against the baseline (default: HEAD).' ,
68
74
type = str , action = RevParseAction )
75
+ parser .add_argument ('--base-pickle' ,
76
+ help = 'name of pickle file with timings data generated by a former `-H -d FILE` run. ' \
77
+ 'filename must be of the form <hash>-*.* or specify --base-commit seperately' ,
78
+ type = str )
79
+ parser .add_argument ('--target-pickle' ,
80
+ help = 'name of pickle file with timings data generated by a former `-H -d FILE` run ' \
81
+ 'filename must be of the form <hash>-*.* or specify --target-commit seperately' ,
82
+ type = str )
69
83
parser .add_argument ('-m' , '--min-duration' ,
70
84
help = 'Minimum duration (in ms) of baseline test for inclusion in report (default: %.3f).' % DEFAULT_MIN_DURATION ,
71
85
type = float ,
@@ -213,30 +227,82 @@ def profile_comparative(benchmarks):
213
227
214
228
head_res = get_results_df (db , h_head )
215
229
baseline_res = get_results_df (db , h_baseline )
216
- ratio = head_res ['timing' ] / baseline_res ['timing' ]
217
- totals = DataFrame ({HEAD_COL :head_res ['timing' ],
218
- BASE_COL :baseline_res ['timing' ],
219
- 'ratio' :ratio ,
220
- 'name' :baseline_res .name },
221
- columns = [HEAD_COL , BASE_COL , "ratio" , "name" ])
222
- totals = totals .ix [totals [HEAD_COL ] > args .min_duration ]
223
- # ignore below threshold
224
- totals = totals .dropna (
225
- ).sort ("ratio" ).set_index ('name' ) # sort in ascending order
226
-
227
- h_msg = repo .messages .get (h_head , "" )
228
- b_msg = repo .messages .get (h_baseline , "" )
229
-
230
- print_report (totals ,h_head = h_head ,h_msg = h_msg ,
231
- h_baseline = h_baseline ,b_msg = b_msg )
232
-
233
- if args .outdf :
234
- prprint ("The results DataFrame was written to '%s'\n " % args .outdf )
235
- totals .save (args .outdf )
230
+
231
+ report_comparative (head_res ,baseline_res )
232
+
236
233
finally :
237
234
# print("Disposing of TMP_DIR: %s" % TMP_DIR)
238
235
shutil .rmtree (TMP_DIR )
239
236
237
+ def prep_pickle_for_total (df , agg_name = 'median' ):
238
+ """
239
+ accepts a datafram resulting from invocation with -H -d o.pickle
240
+ If multiple data columns are present (-N was used), the
241
+ `agg_name` attr of the datafram will be used to reduce
242
+ them to a single value per vbench, df.median is used by defa
243
+ ult.
244
+
245
+ Returns a datadrame of the form expected by prep_totals
246
+ """
247
+ def prep (df ):
248
+ agg = getattr (df ,agg_name )
249
+ df = DataFrame (agg (1 ))
250
+ cols = list (df .columns )
251
+ cols [0 ]= 'timing'
252
+ df .columns = cols
253
+ df ['name' ] = list (df .index )
254
+ return df
255
+
256
+ return prep (df )
257
+
258
+ def prep_totals (head_res , baseline_res ):
259
+ """
260
+ Each argument should be a dataframe with 'timing' and 'name' columns
261
+ where name is the name of the vbench.
262
+
263
+ returns a 'totals' dataframe, suitable as input for print_report.
264
+ """
265
+ head_res , baseline_res = head_res .align (baseline_res )
266
+ ratio = head_res ['timing' ] / baseline_res ['timing' ]
267
+ totals = DataFrame ({HEAD_COL :head_res ['timing' ],
268
+ BASE_COL :baseline_res ['timing' ],
269
+ 'ratio' :ratio ,
270
+ 'name' :baseline_res .name },
271
+ columns = [HEAD_COL , BASE_COL , "ratio" , "name" ])
272
+ totals = totals .ix [totals [HEAD_COL ] > args .min_duration ]
273
+ # ignore below threshold
274
+ totals = totals .dropna (
275
+ ).sort ("ratio" ).set_index ('name' ) # sort in ascending order
276
+ return totals
277
+
278
+ def report_comparative (head_res ,baseline_res ):
279
+ try :
280
+ r = git .Repo (VB_DIR )
281
+ except :
282
+ import pdb
283
+ pdb .set_trace ()
284
+
285
+ totals = prep_totals (head_res ,baseline_res )
286
+
287
+ h_head = args .target_commit
288
+ h_baseline = args .base_commit
289
+ h_msg = b_msg = "Unknown"
290
+ try :
291
+ h_msg = r .commit (h_head ).message .strip ()
292
+ except git .exc .BadObject :
293
+ pass
294
+ try :
295
+ b_msg = r .commit (h_baseline ).message .strip ()
296
+ except git .exc .BadObject :
297
+ pass
298
+
299
+
300
+ print_report (totals ,h_head = h_head ,h_msg = h_msg ,
301
+ h_baseline = h_baseline ,b_msg = b_msg )
302
+
303
+ if args .outdf :
304
+ prprint ("The results DataFrame was written to '%s'\n " % args .outdf )
305
+ totals .to_pickle (args .outdf )
240
306
241
307
def profile_head_single (benchmark ):
242
308
import gc
@@ -312,7 +378,7 @@ def profile_head(benchmarks):
312
378
313
379
if args .outdf :
314
380
prprint ("The results DataFrame was written to '%s'\n " % args .outdf )
315
- DataFrame (results ).save (args .outdf )
381
+ DataFrame (results ).to_pickle (args .outdf )
316
382
317
383
def print_report (df ,h_head = None ,h_msg = "" ,h_baseline = None ,b_msg = "" ):
318
384
@@ -395,38 +461,22 @@ def main():
395
461
random .seed (args .seed )
396
462
np .random .seed (args .seed )
397
463
398
- affinity_set = False
464
+ if args .base_pickle and args .target_pickle :
465
+ baseline_res = prep_pickle_for_total (pd .read_pickle (args .base_pickle ))
466
+ target_res = prep_pickle_for_total (pd .read_pickle (args .target_pickle ))
399
467
400
- # try psutil first since it is more commonly present and better
401
- # maintained. Some people experienced problems with affinity package
402
- # (see https://code.google.com/p/psutil/issues/detail?id=238 for more references)
403
- try :
404
- import psutil
405
- if hasattr (psutil .Process , 'set_cpu_affinity' ):
406
- psutil .Process (os .getpid ()).set_cpu_affinity ([args .affinity ])
407
- affinity_set = True
408
- except ImportError :
409
- pass
468
+ report_comparative (target_res , baseline_res )
469
+ sys .exit (0 )
410
470
411
- if not affinity_set :
412
- try :
413
- import affinity
414
- affinity . set_process_affinity_mask ( 0 , args . affinity )
415
- assert affinity . get_process_affinity_mask ( 0 ) == args .affinity
416
- affinity_set = True
471
+ if args . affinity is not None :
472
+ try : # use psutil rather then stale affinity module. Thanks @yarikoptic
473
+ import psutil
474
+ if hasattr ( psutil . Process , 'set_cpu_affinity' ):
475
+ psutil . Process ( os . getpid ()). set_cpu_affinity ([ args .affinity ])
476
+ print ( "CPU affinity set to %d" % args . affinity )
417
477
except ImportError :
418
- pass
419
-
420
- if not affinity_set :
421
- import warnings
422
- warnings .warn ("\n \n "
423
- "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n "
424
- "The 'affinity' or 'psutil' >= 0.5.0 modules are not available, results may be unreliable\n "
425
- "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n \n "
426
- )
427
- time .sleep (2 )
428
- else :
429
- print ("CPU affinity set to %d" % args .affinity )
478
+ print ("-a/--affinity specified, but the 'psutil' module is not available, aborting.\n " )
479
+ sys .exit (1 )
430
480
431
481
print ("\n " )
432
482
prprint ("LOG_FILE = %s" % args .log_file )
@@ -508,10 +558,39 @@ def inner(repo_path):
508
558
509
559
if __name__ == '__main__' :
510
560
args = parser .parse_args ()
511
- if not args .head and (not args .base_commit and not args .target_commit ):
561
+ if (not args .head
562
+ and not (args .base_commit and args .target_commit )
563
+ and not (args .base_pickle and args .target_pickle )):
512
564
parser .print_help ()
513
- else :
514
- import warnings
515
- warnings .filterwarnings ('ignore' ,category = FutureWarning )
516
- warnings .filterwarnings ('ignore' ,category = DeprecationWarning )
517
- main ()
565
+ sys .exit (1 )
566
+ elif ((args .base_pickle or args .target_pickle ) and not
567
+ (args .base_pickle and args .target_pickle )):
568
+ print ("Must specify Both --base-pickle and --target-pickle." )
569
+ sys .exit (1 )
570
+
571
+ if ((args .base_pickle or args .target_pickle ) and not
572
+ (args .base_commit and args .target_commit )):
573
+ if not args .base_commit :
574
+ print ("base_commit not specified, Assuming base_pickle is named <commit>-foo.*" )
575
+ args .base_commit = args .base_pickle .split ('-' )[0 ]
576
+ if not args .target_commit :
577
+ print ("target_commit not specified, Assuming target_pickle is named <commit>-foo.*" )
578
+ args .target_commit = args .target_pickle .split ('-' )[0 ]
579
+
580
+ import warnings
581
+ warnings .filterwarnings ('ignore' ,category = FutureWarning )
582
+ warnings .filterwarnings ('ignore' ,category = DeprecationWarning )
583
+
584
+ if args .base_commit and args .target_commit :
585
+ print ("Verifying specified commits exist in repo..." )
586
+ r = git .Repo (VB_DIR )
587
+ for c in [ args .base_commit , args .target_commit ]:
588
+ try :
589
+ msg = r .commit (c ).message .strip ()
590
+ except git .BadObject :
591
+ print ("The commit '%s' was not found, aborting..." % c )
592
+ sys .exit (1 )
593
+ else :
594
+ print ("%s: %s" % (c ,msg ))
595
+
596
+ main ()
0 commit comments