From c48db2f117a9d13072c90e77b1677d51cd2ae1fc Mon Sep 17 00:00:00 2001 From: y-p Date: Thu, 22 Aug 2013 21:05:18 +0300 Subject: [PATCH 1/2] BLD: test_perf, bring back pickle-compare code clobbered in 244d56751 --- vb_suite/test_perf.py | 193 +++++++++++++++++++++++++++++------------- 1 file changed, 136 insertions(+), 57 deletions(-) diff --git a/vb_suite/test_perf.py b/vb_suite/test_perf.py index ca98b94e4fbbd..b2887035bb9bf 100755 --- a/vb_suite/test_perf.py +++ b/vb_suite/test_perf.py @@ -37,14 +37,20 @@ import random import numpy as np +import pandas as pd from pandas import DataFrame, Series from suite import REPO_PATH - +VB_DIR = os.path.dirname(os.path.abspath(__file__)) DEFAULT_MIN_DURATION = 0.01 HEAD_COL="head[ms]" BASE_COL="base[ms]" +try: + import git # gitpython +except Exception: + print("Error: Please install the `gitpython` package\n") + sys.exit(1) class RevParseAction(argparse.Action): def __call__(self, parser, namespace, values, option_string=None): @@ -66,6 +72,14 @@ def __call__(self, parser, namespace, values, option_string=None): parser.add_argument('-t', '--target-commit', help='The commit to compare against the baseline (default: HEAD).', type=str, action=RevParseAction) +parser.add_argument('--base-pickle', + help='name of pickle file with timings data generated by a former `-H -d FILE` run. '\ + 'filename must be of the form -*.* or specify --base-commit seperately', + type=str) +parser.add_argument('--target-pickle', + help='name of pickle file with timings data generated by a former `-H -d FILE` run '\ + 'filename must be of the form -*.* or specify --target-commit seperately', + type=str) parser.add_argument('-m', '--min-duration', help='Minimum duration (in ms) of baseline test for inclusion in report (default: %.3f).' % DEFAULT_MIN_DURATION, type=float, @@ -213,30 +227,82 @@ def profile_comparative(benchmarks): head_res = get_results_df(db, h_head) baseline_res = get_results_df(db, h_baseline) - ratio = head_res['timing'] / baseline_res['timing'] - totals = DataFrame({HEAD_COL:head_res['timing'], - BASE_COL:baseline_res['timing'], - 'ratio':ratio, - 'name':baseline_res.name}, - columns=[HEAD_COL, BASE_COL, "ratio", "name"]) - totals = totals.ix[totals[HEAD_COL] > args.min_duration] - # ignore below threshold - totals = totals.dropna( - ).sort("ratio").set_index('name') # sort in ascending order - - h_msg = repo.messages.get(h_head, "") - b_msg = repo.messages.get(h_baseline, "") - - print_report(totals,h_head=h_head,h_msg=h_msg, - h_baseline=h_baseline,b_msg=b_msg) - - if args.outdf: - prprint("The results DataFrame was written to '%s'\n" % args.outdf) - totals.save(args.outdf) + + report_comparative(head_res,baseline_res) + finally: # print("Disposing of TMP_DIR: %s" % TMP_DIR) shutil.rmtree(TMP_DIR) +def prep_pickle_for_total(df, agg_name='median'): + """ + accepts a datafram resulting from invocation with -H -d o.pickle + If multiple data columns are present (-N was used), the + `agg_name` attr of the datafram will be used to reduce + them to a single value per vbench, df.median is used by defa + ult. + + Returns a datadrame of the form expected by prep_totals + """ + def prep(df): + agg = getattr(df,agg_name) + df = DataFrame(agg(1)) + cols = list(df.columns) + cols[0]='timing' + df.columns=cols + df['name'] = list(df.index) + return df + + return prep(df) + +def prep_totals(head_res, baseline_res): + """ + Each argument should be a dataframe with 'timing' and 'name' columns + where name is the name of the vbench. + + returns a 'totals' dataframe, suitable as input for print_report. + """ + head_res, baseline_res = head_res.align(baseline_res) + ratio = head_res['timing'] / baseline_res['timing'] + totals = DataFrame({HEAD_COL:head_res['timing'], + BASE_COL:baseline_res['timing'], + 'ratio':ratio, + 'name':baseline_res.name}, + columns=[HEAD_COL, BASE_COL, "ratio", "name"]) + totals = totals.ix[totals[HEAD_COL] > args.min_duration] + # ignore below threshold + totals = totals.dropna( + ).sort("ratio").set_index('name') # sort in ascending order + return totals + +def report_comparative(head_res,baseline_res): + try: + r=git.Repo(VB_DIR) + except: + import pdb + pdb.set_trace() + + totals = prep_totals(head_res,baseline_res) + + h_head = args.target_commit + h_baseline = args.base_commit + h_msg = b_msg = "Unknown" + try: + h_msg = r.commit(h_head).message.strip() + except git.exc.BadObject: + pass + try: + b_msg = r.commit(h_baseline).message.strip() + except git.exc.BadObject: + pass + + + print_report(totals,h_head=h_head,h_msg=h_msg, + h_baseline=h_baseline,b_msg=b_msg) + + if args.outdf: + prprint("The results DataFrame was written to '%s'\n" % args.outdf) + totals.to_pickle(args.outdf) def profile_head_single(benchmark): import gc @@ -312,7 +378,7 @@ def profile_head(benchmarks): if args.outdf: prprint("The results DataFrame was written to '%s'\n" % args.outdf) - DataFrame(results).save(args.outdf) + DataFrame(results).to_pickle(args.outdf) def print_report(df,h_head=None,h_msg="",h_baseline=None,b_msg=""): @@ -395,38 +461,22 @@ def main(): random.seed(args.seed) np.random.seed(args.seed) - affinity_set = False + if args.base_pickle and args.target_pickle: + baseline_res = prep_pickle_for_total(pd.read_pickle(args.base_pickle)) + target_res = prep_pickle_for_total(pd.read_pickle(args.target_pickle)) - # try psutil first since it is more commonly present and better - # maintained. Some people experienced problems with affinity package - # (see https://code.google.com/p/psutil/issues/detail?id=238 for more references) - try: - import psutil - if hasattr(psutil.Process, 'set_cpu_affinity'): - psutil.Process(os.getpid()).set_cpu_affinity([args.affinity]) - affinity_set = True - except ImportError: - pass + report_comparative(target_res, baseline_res) + sys.exit(0) - if not affinity_set: - try: - import affinity - affinity.set_process_affinity_mask(0, args.affinity) - assert affinity.get_process_affinity_mask(0) == args.affinity - affinity_set = True + if args.affinity is not None: + try: # use psutil rather then stale affinity module. Thanks @yarikoptic + import psutil + if hasattr(psutil.Process, 'set_cpu_affinity'): + psutil.Process(os.getpid()).set_cpu_affinity([args.affinity]) + print("CPU affinity set to %d" % args.affinity) except ImportError: - pass - - if not affinity_set: - import warnings - warnings.warn("\n\n" - "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n" - "The 'affinity' or 'psutil' >= 0.5.0 modules are not available, results may be unreliable\n" - "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n" - ) - time.sleep(2) - else: - print("CPU affinity set to %d" % args.affinity) + print("-a/--affinity specified, but the 'psutil' module is not available, aborting.\n") + sys.exit(1) print("\n") prprint("LOG_FILE = %s" % args.log_file) @@ -508,10 +558,39 @@ def inner(repo_path): if __name__ == '__main__': args = parser.parse_args() - if not args.head and (not args.base_commit and not args.target_commit): + if (not args.head + and not (args.base_commit and args.target_commit) + and not (args.base_pickle and args.target_pickle)): parser.print_help() - else: - import warnings - warnings.filterwarnings('ignore',category=FutureWarning) - warnings.filterwarnings('ignore',category=DeprecationWarning) - main() + sys.exit(1) + elif ((args.base_pickle or args.target_pickle) and not + (args.base_pickle and args.target_pickle)): + print("Must specify Both --base-pickle and --target-pickle.") + sys.exit(1) + + if ((args.base_pickle or args.target_pickle) and not + (args.base_commit and args.target_commit)): + if not args.base_commit: + print("base_commit not specified, Assuming base_pickle is named -foo.*") + args.base_commit = args.base_pickle.split('-')[0] + if not args.target_commit: + print("target_commit not specified, Assuming target_pickle is named -foo.*") + args.target_commit = args.target_pickle.split('-')[0] + + import warnings + warnings.filterwarnings('ignore',category=FutureWarning) + warnings.filterwarnings('ignore',category=DeprecationWarning) + + if args.base_commit and args.target_commit: + print("Verifying specified commits exist in repo...") + r=git.Repo(VB_DIR) + for c in [ args.base_commit, args.target_commit ]: + try: + msg = r.commit(c).message.strip() + except git.BadObject: + print("The commit '%s' was not found, aborting..." % c) + sys.exit(1) + else: + print("%s: %s" % (c,msg)) + + main() From d9cc058afe21675d789b5699739992aecd9f5a53 Mon Sep 17 00:00:00 2001 From: y-p Date: Thu, 22 Aug 2013 22:00:44 +0300 Subject: [PATCH 2/2] BLD: test_perf, revert s/save/to_pickle change, warn the unwary --- vb_suite/test_perf.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/vb_suite/test_perf.py b/vb_suite/test_perf.py index b2887035bb9bf..b39dfdcaae94b 100755 --- a/vb_suite/test_perf.py +++ b/vb_suite/test_perf.py @@ -26,6 +26,21 @@ """ +# IMPORTANT NOTE +# +# This script should run on pandas versions at least as far back as 0.9.1. +# devs should be able to use the latest version of this script with +# any dusty old commit and expect it to "just work". +# One way in which this is useful is when collecting historical data, +# where writing some logic around this script may prove easier +# in some cases then running vbench directly (think perf bisection). +# +# *please*, when you modify this script for whatever reason, +# make sure you do not break it's functionality when running under older +# pandas versions. +# Note that depreaction warnings are turned off in main(), so there's +# no need to change the actual code to supress such warnings. + import shutil import os import sys @@ -302,7 +317,7 @@ def report_comparative(head_res,baseline_res): if args.outdf: prprint("The results DataFrame was written to '%s'\n" % args.outdf) - totals.to_pickle(args.outdf) + totals.save(args.outdf) def profile_head_single(benchmark): import gc @@ -378,7 +393,7 @@ def profile_head(benchmarks): if args.outdf: prprint("The results DataFrame was written to '%s'\n" % args.outdf) - DataFrame(results).to_pickle(args.outdf) + DataFrame(results).save(args.outdf) def print_report(df,h_head=None,h_msg="",h_baseline=None,b_msg=""): @@ -462,8 +477,8 @@ def main(): np.random.seed(args.seed) if args.base_pickle and args.target_pickle: - baseline_res = prep_pickle_for_total(pd.read_pickle(args.base_pickle)) - target_res = prep_pickle_for_total(pd.read_pickle(args.target_pickle)) + baseline_res = prep_pickle_for_total(pd.load(args.base_pickle)) + target_res = prep_pickle_for_total(pd.load(args.target_pickle)) report_comparative(target_res, baseline_res) sys.exit(0)