Skip to content

test_perf fixes, restore clobbered pickle compare cmdargs, restore back_compat #4644

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
2 commits merged into from Aug 22, 2013
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
206 changes: 150 additions & 56 deletions vb_suite/test_perf.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,21 @@

"""

# IMPORTANT NOTE
#
# This script should run on pandas versions at least as far back as 0.9.1.
# devs should be able to use the latest version of this script with
# any dusty old commit and expect it to "just work".
# One way in which this is useful is when collecting historical data,
# where writing some logic around this script may prove easier
# in some cases then running vbench directly (think perf bisection).
#
# *please*, when you modify this script for whatever reason,
# make sure you do not break it's functionality when running under older
# pandas versions.
# Note that depreaction warnings are turned off in main(), so there's
# no need to change the actual code to supress such warnings.

import shutil
import os
import sys
Expand All @@ -37,14 +52,20 @@
import random
import numpy as np

import pandas as pd
from pandas import DataFrame, Series

from suite import REPO_PATH

VB_DIR = os.path.dirname(os.path.abspath(__file__))
DEFAULT_MIN_DURATION = 0.01
HEAD_COL="head[ms]"
BASE_COL="base[ms]"

try:
import git # gitpython
except Exception:
print("Error: Please install the `gitpython` package\n")
sys.exit(1)

class RevParseAction(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None):
Expand All @@ -66,6 +87,14 @@ def __call__(self, parser, namespace, values, option_string=None):
parser.add_argument('-t', '--target-commit',
help='The commit to compare against the baseline (default: HEAD).',
type=str, action=RevParseAction)
parser.add_argument('--base-pickle',
help='name of pickle file with timings data generated by a former `-H -d FILE` run. '\
'filename must be of the form <hash>-*.* or specify --base-commit seperately',
type=str)
parser.add_argument('--target-pickle',
help='name of pickle file with timings data generated by a former `-H -d FILE` run '\
'filename must be of the form <hash>-*.* or specify --target-commit seperately',
type=str)
parser.add_argument('-m', '--min-duration',
help='Minimum duration (in ms) of baseline test for inclusion in report (default: %.3f).' % DEFAULT_MIN_DURATION,
type=float,
Expand Down Expand Up @@ -213,30 +242,82 @@ def profile_comparative(benchmarks):

head_res = get_results_df(db, h_head)
baseline_res = get_results_df(db, h_baseline)
ratio = head_res['timing'] / baseline_res['timing']
totals = DataFrame({HEAD_COL:head_res['timing'],
BASE_COL:baseline_res['timing'],
'ratio':ratio,
'name':baseline_res.name},
columns=[HEAD_COL, BASE_COL, "ratio", "name"])
totals = totals.ix[totals[HEAD_COL] > args.min_duration]
# ignore below threshold
totals = totals.dropna(
).sort("ratio").set_index('name') # sort in ascending order

h_msg = repo.messages.get(h_head, "")
b_msg = repo.messages.get(h_baseline, "")

print_report(totals,h_head=h_head,h_msg=h_msg,
h_baseline=h_baseline,b_msg=b_msg)

if args.outdf:
prprint("The results DataFrame was written to '%s'\n" % args.outdf)
totals.save(args.outdf)

report_comparative(head_res,baseline_res)

finally:
# print("Disposing of TMP_DIR: %s" % TMP_DIR)
shutil.rmtree(TMP_DIR)

def prep_pickle_for_total(df, agg_name='median'):
"""
accepts a datafram resulting from invocation with -H -d o.pickle
If multiple data columns are present (-N was used), the
`agg_name` attr of the datafram will be used to reduce
them to a single value per vbench, df.median is used by defa
ult.

Returns a datadrame of the form expected by prep_totals
"""
def prep(df):
agg = getattr(df,agg_name)
df = DataFrame(agg(1))
cols = list(df.columns)
cols[0]='timing'
df.columns=cols
df['name'] = list(df.index)
return df

return prep(df)

def prep_totals(head_res, baseline_res):
"""
Each argument should be a dataframe with 'timing' and 'name' columns
where name is the name of the vbench.

returns a 'totals' dataframe, suitable as input for print_report.
"""
head_res, baseline_res = head_res.align(baseline_res)
ratio = head_res['timing'] / baseline_res['timing']
totals = DataFrame({HEAD_COL:head_res['timing'],
BASE_COL:baseline_res['timing'],
'ratio':ratio,
'name':baseline_res.name},
columns=[HEAD_COL, BASE_COL, "ratio", "name"])
totals = totals.ix[totals[HEAD_COL] > args.min_duration]
# ignore below threshold
totals = totals.dropna(
).sort("ratio").set_index('name') # sort in ascending order
return totals

def report_comparative(head_res,baseline_res):
try:
r=git.Repo(VB_DIR)
except:
import pdb
pdb.set_trace()

totals = prep_totals(head_res,baseline_res)

h_head = args.target_commit
h_baseline = args.base_commit
h_msg = b_msg = "Unknown"
try:
h_msg = r.commit(h_head).message.strip()
except git.exc.BadObject:
pass
try:
b_msg = r.commit(h_baseline).message.strip()
except git.exc.BadObject:
pass


print_report(totals,h_head=h_head,h_msg=h_msg,
h_baseline=h_baseline,b_msg=b_msg)

if args.outdf:
prprint("The results DataFrame was written to '%s'\n" % args.outdf)
totals.save(args.outdf)

def profile_head_single(benchmark):
import gc
Expand Down Expand Up @@ -395,38 +476,22 @@ def main():
random.seed(args.seed)
np.random.seed(args.seed)

affinity_set = False
if args.base_pickle and args.target_pickle:
baseline_res = prep_pickle_for_total(pd.load(args.base_pickle))
target_res = prep_pickle_for_total(pd.load(args.target_pickle))

# try psutil first since it is more commonly present and better
# maintained. Some people experienced problems with affinity package
# (see https://code.google.com/p/psutil/issues/detail?id=238 for more references)
try:
import psutil
if hasattr(psutil.Process, 'set_cpu_affinity'):
psutil.Process(os.getpid()).set_cpu_affinity([args.affinity])
affinity_set = True
except ImportError:
pass
report_comparative(target_res, baseline_res)
sys.exit(0)

if not affinity_set:
try:
import affinity
affinity.set_process_affinity_mask(0, args.affinity)
assert affinity.get_process_affinity_mask(0) == args.affinity
affinity_set = True
if args.affinity is not None:
try: # use psutil rather then stale affinity module. Thanks @yarikoptic
import psutil
if hasattr(psutil.Process, 'set_cpu_affinity'):
psutil.Process(os.getpid()).set_cpu_affinity([args.affinity])
print("CPU affinity set to %d" % args.affinity)
except ImportError:
pass

if not affinity_set:
import warnings
warnings.warn("\n\n"
"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"
"The 'affinity' or 'psutil' >= 0.5.0 modules are not available, results may be unreliable\n"
"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n"
)
time.sleep(2)
else:
print("CPU affinity set to %d" % args.affinity)
print("-a/--affinity specified, but the 'psutil' module is not available, aborting.\n")
sys.exit(1)

print("\n")
prprint("LOG_FILE = %s" % args.log_file)
Expand Down Expand Up @@ -508,10 +573,39 @@ def inner(repo_path):

if __name__ == '__main__':
args = parser.parse_args()
if not args.head and (not args.base_commit and not args.target_commit):
if (not args.head
and not (args.base_commit and args.target_commit)
and not (args.base_pickle and args.target_pickle)):
parser.print_help()
else:
import warnings
warnings.filterwarnings('ignore',category=FutureWarning)
warnings.filterwarnings('ignore',category=DeprecationWarning)
main()
sys.exit(1)
elif ((args.base_pickle or args.target_pickle) and not
(args.base_pickle and args.target_pickle)):
print("Must specify Both --base-pickle and --target-pickle.")
sys.exit(1)

if ((args.base_pickle or args.target_pickle) and not
(args.base_commit and args.target_commit)):
if not args.base_commit:
print("base_commit not specified, Assuming base_pickle is named <commit>-foo.*")
args.base_commit = args.base_pickle.split('-')[0]
if not args.target_commit:
print("target_commit not specified, Assuming target_pickle is named <commit>-foo.*")
args.target_commit = args.target_pickle.split('-')[0]

import warnings
warnings.filterwarnings('ignore',category=FutureWarning)
warnings.filterwarnings('ignore',category=DeprecationWarning)

if args.base_commit and args.target_commit:
print("Verifying specified commits exist in repo...")
r=git.Repo(VB_DIR)
for c in [ args.base_commit, args.target_commit ]:
try:
msg = r.commit(c).message.strip()
except git.BadObject:
print("The commit '%s' was not found, aborting..." % c)
sys.exit(1)
else:
print("%s: %s" % (c,msg))

main()