26
26
27
27
"""
28
28
29
+ # IMPORTANT NOTE
30
+ #
31
+ # This script should run on pandas versions at least as far back as 0.9.1.
32
+ # devs should be able to use the latest version of this script with
33
+ # any dusty old commit and expect it to "just work".
34
+ # One way in which this is useful is when collecting historical data,
35
+ # where writing some logic around this script may prove easier
36
+ # in some cases then running vbench directly (think perf bisection).
37
+ #
38
+ # *please*, when you modify this script for whatever reason,
39
+ # make sure you do not break it's functionality when running under older
40
+ # pandas versions.
41
+ # Note that depreaction warnings are turned off in main(), so there's
42
+ # no need to change the actual code to supress such warnings.
43
+
29
44
import shutil
30
45
import os
31
46
import sys
37
52
import random
38
53
import numpy as np
39
54
55
+ import pandas as pd
40
56
from pandas import DataFrame , Series
41
57
42
58
from suite import REPO_PATH
43
-
59
+ VB_DIR = os . path . dirname ( os . path . abspath ( __file__ ))
44
60
DEFAULT_MIN_DURATION = 0.01
45
61
HEAD_COL = "head[ms]"
46
62
BASE_COL = "base[ms]"
47
63
64
+ try :
65
+ import git # gitpython
66
+ except Exception :
67
+ print ("Error: Please install the `gitpython` package\n " )
68
+ sys .exit (1 )
48
69
49
70
class RevParseAction (argparse .Action ):
50
71
def __call__ (self , parser , namespace , values , option_string = None ):
@@ -66,6 +87,14 @@ def __call__(self, parser, namespace, values, option_string=None):
66
87
parser .add_argument ('-t' , '--target-commit' ,
67
88
help = 'The commit to compare against the baseline (default: HEAD).' ,
68
89
type = str , action = RevParseAction )
90
+ parser .add_argument ('--base-pickle' ,
91
+ help = 'name of pickle file with timings data generated by a former `-H -d FILE` run. ' \
92
+ 'filename must be of the form <hash>-*.* or specify --base-commit seperately' ,
93
+ type = str )
94
+ parser .add_argument ('--target-pickle' ,
95
+ help = 'name of pickle file with timings data generated by a former `-H -d FILE` run ' \
96
+ 'filename must be of the form <hash>-*.* or specify --target-commit seperately' ,
97
+ type = str )
69
98
parser .add_argument ('-m' , '--min-duration' ,
70
99
help = 'Minimum duration (in ms) of baseline test for inclusion in report (default: %.3f).' % DEFAULT_MIN_DURATION ,
71
100
type = float ,
@@ -213,30 +242,82 @@ def profile_comparative(benchmarks):
213
242
214
243
head_res = get_results_df (db , h_head )
215
244
baseline_res = get_results_df (db , h_baseline )
216
- ratio = head_res ['timing' ] / baseline_res ['timing' ]
217
- totals = DataFrame ({HEAD_COL :head_res ['timing' ],
218
- BASE_COL :baseline_res ['timing' ],
219
- 'ratio' :ratio ,
220
- 'name' :baseline_res .name },
221
- columns = [HEAD_COL , BASE_COL , "ratio" , "name" ])
222
- totals = totals .ix [totals [HEAD_COL ] > args .min_duration ]
223
- # ignore below threshold
224
- totals = totals .dropna (
225
- ).sort ("ratio" ).set_index ('name' ) # sort in ascending order
226
-
227
- h_msg = repo .messages .get (h_head , "" )
228
- b_msg = repo .messages .get (h_baseline , "" )
229
-
230
- print_report (totals ,h_head = h_head ,h_msg = h_msg ,
231
- h_baseline = h_baseline ,b_msg = b_msg )
232
-
233
- if args .outdf :
234
- prprint ("The results DataFrame was written to '%s'\n " % args .outdf )
235
- totals .save (args .outdf )
245
+
246
+ report_comparative (head_res ,baseline_res )
247
+
236
248
finally :
237
249
# print("Disposing of TMP_DIR: %s" % TMP_DIR)
238
250
shutil .rmtree (TMP_DIR )
239
251
252
+ def prep_pickle_for_total (df , agg_name = 'median' ):
253
+ """
254
+ accepts a datafram resulting from invocation with -H -d o.pickle
255
+ If multiple data columns are present (-N was used), the
256
+ `agg_name` attr of the datafram will be used to reduce
257
+ them to a single value per vbench, df.median is used by defa
258
+ ult.
259
+
260
+ Returns a datadrame of the form expected by prep_totals
261
+ """
262
+ def prep (df ):
263
+ agg = getattr (df ,agg_name )
264
+ df = DataFrame (agg (1 ))
265
+ cols = list (df .columns )
266
+ cols [0 ]= 'timing'
267
+ df .columns = cols
268
+ df ['name' ] = list (df .index )
269
+ return df
270
+
271
+ return prep (df )
272
+
273
+ def prep_totals (head_res , baseline_res ):
274
+ """
275
+ Each argument should be a dataframe with 'timing' and 'name' columns
276
+ where name is the name of the vbench.
277
+
278
+ returns a 'totals' dataframe, suitable as input for print_report.
279
+ """
280
+ head_res , baseline_res = head_res .align (baseline_res )
281
+ ratio = head_res ['timing' ] / baseline_res ['timing' ]
282
+ totals = DataFrame ({HEAD_COL :head_res ['timing' ],
283
+ BASE_COL :baseline_res ['timing' ],
284
+ 'ratio' :ratio ,
285
+ 'name' :baseline_res .name },
286
+ columns = [HEAD_COL , BASE_COL , "ratio" , "name" ])
287
+ totals = totals .ix [totals [HEAD_COL ] > args .min_duration ]
288
+ # ignore below threshold
289
+ totals = totals .dropna (
290
+ ).sort ("ratio" ).set_index ('name' ) # sort in ascending order
291
+ return totals
292
+
293
+ def report_comparative (head_res ,baseline_res ):
294
+ try :
295
+ r = git .Repo (VB_DIR )
296
+ except :
297
+ import pdb
298
+ pdb .set_trace ()
299
+
300
+ totals = prep_totals (head_res ,baseline_res )
301
+
302
+ h_head = args .target_commit
303
+ h_baseline = args .base_commit
304
+ h_msg = b_msg = "Unknown"
305
+ try :
306
+ h_msg = r .commit (h_head ).message .strip ()
307
+ except git .exc .BadObject :
308
+ pass
309
+ try :
310
+ b_msg = r .commit (h_baseline ).message .strip ()
311
+ except git .exc .BadObject :
312
+ pass
313
+
314
+
315
+ print_report (totals ,h_head = h_head ,h_msg = h_msg ,
316
+ h_baseline = h_baseline ,b_msg = b_msg )
317
+
318
+ if args .outdf :
319
+ prprint ("The results DataFrame was written to '%s'\n " % args .outdf )
320
+ totals .save (args .outdf )
240
321
241
322
def profile_head_single (benchmark ):
242
323
import gc
@@ -395,38 +476,22 @@ def main():
395
476
random .seed (args .seed )
396
477
np .random .seed (args .seed )
397
478
398
- affinity_set = False
479
+ if args .base_pickle and args .target_pickle :
480
+ baseline_res = prep_pickle_for_total (pd .load (args .base_pickle ))
481
+ target_res = prep_pickle_for_total (pd .load (args .target_pickle ))
399
482
400
- # try psutil first since it is more commonly present and better
401
- # maintained. Some people experienced problems with affinity package
402
- # (see https://code.google.com/p/psutil/issues/detail?id=238 for more references)
403
- try :
404
- import psutil
405
- if hasattr (psutil .Process , 'set_cpu_affinity' ):
406
- psutil .Process (os .getpid ()).set_cpu_affinity ([args .affinity ])
407
- affinity_set = True
408
- except ImportError :
409
- pass
483
+ report_comparative (target_res , baseline_res )
484
+ sys .exit (0 )
410
485
411
- if not affinity_set :
412
- try :
413
- import affinity
414
- affinity . set_process_affinity_mask ( 0 , args . affinity )
415
- assert affinity . get_process_affinity_mask ( 0 ) == args .affinity
416
- affinity_set = True
486
+ if args . affinity is not None :
487
+ try : # use psutil rather then stale affinity module. Thanks @yarikoptic
488
+ import psutil
489
+ if hasattr ( psutil . Process , 'set_cpu_affinity' ):
490
+ psutil . Process ( os . getpid ()). set_cpu_affinity ([ args .affinity ])
491
+ print ( "CPU affinity set to %d" % args . affinity )
417
492
except ImportError :
418
- pass
419
-
420
- if not affinity_set :
421
- import warnings
422
- warnings .warn ("\n \n "
423
- "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n "
424
- "The 'affinity' or 'psutil' >= 0.5.0 modules are not available, results may be unreliable\n "
425
- "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n \n "
426
- )
427
- time .sleep (2 )
428
- else :
429
- print ("CPU affinity set to %d" % args .affinity )
493
+ print ("-a/--affinity specified, but the 'psutil' module is not available, aborting.\n " )
494
+ sys .exit (1 )
430
495
431
496
print ("\n " )
432
497
prprint ("LOG_FILE = %s" % args .log_file )
@@ -508,10 +573,39 @@ def inner(repo_path):
508
573
509
574
if __name__ == '__main__' :
510
575
args = parser .parse_args ()
511
- if not args .head and (not args .base_commit and not args .target_commit ):
576
+ if (not args .head
577
+ and not (args .base_commit and args .target_commit )
578
+ and not (args .base_pickle and args .target_pickle )):
512
579
parser .print_help ()
513
- else :
514
- import warnings
515
- warnings .filterwarnings ('ignore' ,category = FutureWarning )
516
- warnings .filterwarnings ('ignore' ,category = DeprecationWarning )
517
- main ()
580
+ sys .exit (1 )
581
+ elif ((args .base_pickle or args .target_pickle ) and not
582
+ (args .base_pickle and args .target_pickle )):
583
+ print ("Must specify Both --base-pickle and --target-pickle." )
584
+ sys .exit (1 )
585
+
586
+ if ((args .base_pickle or args .target_pickle ) and not
587
+ (args .base_commit and args .target_commit )):
588
+ if not args .base_commit :
589
+ print ("base_commit not specified, Assuming base_pickle is named <commit>-foo.*" )
590
+ args .base_commit = args .base_pickle .split ('-' )[0 ]
591
+ if not args .target_commit :
592
+ print ("target_commit not specified, Assuming target_pickle is named <commit>-foo.*" )
593
+ args .target_commit = args .target_pickle .split ('-' )[0 ]
594
+
595
+ import warnings
596
+ warnings .filterwarnings ('ignore' ,category = FutureWarning )
597
+ warnings .filterwarnings ('ignore' ,category = DeprecationWarning )
598
+
599
+ if args .base_commit and args .target_commit :
600
+ print ("Verifying specified commits exist in repo..." )
601
+ r = git .Repo (VB_DIR )
602
+ for c in [ args .base_commit , args .target_commit ]:
603
+ try :
604
+ msg = r .commit (c ).message .strip ()
605
+ except git .BadObject :
606
+ print ("The commit '%s' was not found, aborting..." % c )
607
+ sys .exit (1 )
608
+ else :
609
+ print ("%s: %s" % (c ,msg ))
610
+
611
+ main ()
0 commit comments