Skip to content

Commit a45206d

Browse files
author
y-p
committed
Merge pull request #4644 from y-p/PR_test_perf_fixes
test_perf fixes, restore clobbered pickle compare cmdargs, restore back_compat
2 parents b364f91 + d9cc058 commit a45206d

File tree

1 file changed

+150
-56
lines changed

1 file changed

+150
-56
lines changed

vb_suite/test_perf.py

+150-56
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,21 @@
2626
2727
"""
2828

29+
# IMPORTANT NOTE
30+
#
31+
# This script should run on pandas versions at least as far back as 0.9.1.
32+
# devs should be able to use the latest version of this script with
33+
# any dusty old commit and expect it to "just work".
34+
# One way in which this is useful is when collecting historical data,
35+
# where writing some logic around this script may prove easier
36+
# in some cases then running vbench directly (think perf bisection).
37+
#
38+
# *please*, when you modify this script for whatever reason,
39+
# make sure you do not break it's functionality when running under older
40+
# pandas versions.
41+
# Note that depreaction warnings are turned off in main(), so there's
42+
# no need to change the actual code to supress such warnings.
43+
2944
import shutil
3045
import os
3146
import sys
@@ -37,14 +52,20 @@
3752
import random
3853
import numpy as np
3954

55+
import pandas as pd
4056
from pandas import DataFrame, Series
4157

4258
from suite import REPO_PATH
43-
59+
VB_DIR = os.path.dirname(os.path.abspath(__file__))
4460
DEFAULT_MIN_DURATION = 0.01
4561
HEAD_COL="head[ms]"
4662
BASE_COL="base[ms]"
4763

64+
try:
65+
import git # gitpython
66+
except Exception:
67+
print("Error: Please install the `gitpython` package\n")
68+
sys.exit(1)
4869

4970
class RevParseAction(argparse.Action):
5071
def __call__(self, parser, namespace, values, option_string=None):
@@ -66,6 +87,14 @@ def __call__(self, parser, namespace, values, option_string=None):
6687
parser.add_argument('-t', '--target-commit',
6788
help='The commit to compare against the baseline (default: HEAD).',
6889
type=str, action=RevParseAction)
90+
parser.add_argument('--base-pickle',
91+
help='name of pickle file with timings data generated by a former `-H -d FILE` run. '\
92+
'filename must be of the form <hash>-*.* or specify --base-commit seperately',
93+
type=str)
94+
parser.add_argument('--target-pickle',
95+
help='name of pickle file with timings data generated by a former `-H -d FILE` run '\
96+
'filename must be of the form <hash>-*.* or specify --target-commit seperately',
97+
type=str)
6998
parser.add_argument('-m', '--min-duration',
7099
help='Minimum duration (in ms) of baseline test for inclusion in report (default: %.3f).' % DEFAULT_MIN_DURATION,
71100
type=float,
@@ -213,30 +242,82 @@ def profile_comparative(benchmarks):
213242

214243
head_res = get_results_df(db, h_head)
215244
baseline_res = get_results_df(db, h_baseline)
216-
ratio = head_res['timing'] / baseline_res['timing']
217-
totals = DataFrame({HEAD_COL:head_res['timing'],
218-
BASE_COL:baseline_res['timing'],
219-
'ratio':ratio,
220-
'name':baseline_res.name},
221-
columns=[HEAD_COL, BASE_COL, "ratio", "name"])
222-
totals = totals.ix[totals[HEAD_COL] > args.min_duration]
223-
# ignore below threshold
224-
totals = totals.dropna(
225-
).sort("ratio").set_index('name') # sort in ascending order
226-
227-
h_msg = repo.messages.get(h_head, "")
228-
b_msg = repo.messages.get(h_baseline, "")
229-
230-
print_report(totals,h_head=h_head,h_msg=h_msg,
231-
h_baseline=h_baseline,b_msg=b_msg)
232-
233-
if args.outdf:
234-
prprint("The results DataFrame was written to '%s'\n" % args.outdf)
235-
totals.save(args.outdf)
245+
246+
report_comparative(head_res,baseline_res)
247+
236248
finally:
237249
# print("Disposing of TMP_DIR: %s" % TMP_DIR)
238250
shutil.rmtree(TMP_DIR)
239251

252+
def prep_pickle_for_total(df, agg_name='median'):
253+
"""
254+
accepts a datafram resulting from invocation with -H -d o.pickle
255+
If multiple data columns are present (-N was used), the
256+
`agg_name` attr of the datafram will be used to reduce
257+
them to a single value per vbench, df.median is used by defa
258+
ult.
259+
260+
Returns a datadrame of the form expected by prep_totals
261+
"""
262+
def prep(df):
263+
agg = getattr(df,agg_name)
264+
df = DataFrame(agg(1))
265+
cols = list(df.columns)
266+
cols[0]='timing'
267+
df.columns=cols
268+
df['name'] = list(df.index)
269+
return df
270+
271+
return prep(df)
272+
273+
def prep_totals(head_res, baseline_res):
274+
"""
275+
Each argument should be a dataframe with 'timing' and 'name' columns
276+
where name is the name of the vbench.
277+
278+
returns a 'totals' dataframe, suitable as input for print_report.
279+
"""
280+
head_res, baseline_res = head_res.align(baseline_res)
281+
ratio = head_res['timing'] / baseline_res['timing']
282+
totals = DataFrame({HEAD_COL:head_res['timing'],
283+
BASE_COL:baseline_res['timing'],
284+
'ratio':ratio,
285+
'name':baseline_res.name},
286+
columns=[HEAD_COL, BASE_COL, "ratio", "name"])
287+
totals = totals.ix[totals[HEAD_COL] > args.min_duration]
288+
# ignore below threshold
289+
totals = totals.dropna(
290+
).sort("ratio").set_index('name') # sort in ascending order
291+
return totals
292+
293+
def report_comparative(head_res,baseline_res):
294+
try:
295+
r=git.Repo(VB_DIR)
296+
except:
297+
import pdb
298+
pdb.set_trace()
299+
300+
totals = prep_totals(head_res,baseline_res)
301+
302+
h_head = args.target_commit
303+
h_baseline = args.base_commit
304+
h_msg = b_msg = "Unknown"
305+
try:
306+
h_msg = r.commit(h_head).message.strip()
307+
except git.exc.BadObject:
308+
pass
309+
try:
310+
b_msg = r.commit(h_baseline).message.strip()
311+
except git.exc.BadObject:
312+
pass
313+
314+
315+
print_report(totals,h_head=h_head,h_msg=h_msg,
316+
h_baseline=h_baseline,b_msg=b_msg)
317+
318+
if args.outdf:
319+
prprint("The results DataFrame was written to '%s'\n" % args.outdf)
320+
totals.save(args.outdf)
240321

241322
def profile_head_single(benchmark):
242323
import gc
@@ -395,38 +476,22 @@ def main():
395476
random.seed(args.seed)
396477
np.random.seed(args.seed)
397478

398-
affinity_set = False
479+
if args.base_pickle and args.target_pickle:
480+
baseline_res = prep_pickle_for_total(pd.load(args.base_pickle))
481+
target_res = prep_pickle_for_total(pd.load(args.target_pickle))
399482

400-
# try psutil first since it is more commonly present and better
401-
# maintained. Some people experienced problems with affinity package
402-
# (see https://code.google.com/p/psutil/issues/detail?id=238 for more references)
403-
try:
404-
import psutil
405-
if hasattr(psutil.Process, 'set_cpu_affinity'):
406-
psutil.Process(os.getpid()).set_cpu_affinity([args.affinity])
407-
affinity_set = True
408-
except ImportError:
409-
pass
483+
report_comparative(target_res, baseline_res)
484+
sys.exit(0)
410485

411-
if not affinity_set:
412-
try:
413-
import affinity
414-
affinity.set_process_affinity_mask(0, args.affinity)
415-
assert affinity.get_process_affinity_mask(0) == args.affinity
416-
affinity_set = True
486+
if args.affinity is not None:
487+
try: # use psutil rather then stale affinity module. Thanks @yarikoptic
488+
import psutil
489+
if hasattr(psutil.Process, 'set_cpu_affinity'):
490+
psutil.Process(os.getpid()).set_cpu_affinity([args.affinity])
491+
print("CPU affinity set to %d" % args.affinity)
417492
except ImportError:
418-
pass
419-
420-
if not affinity_set:
421-
import warnings
422-
warnings.warn("\n\n"
423-
"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"
424-
"The 'affinity' or 'psutil' >= 0.5.0 modules are not available, results may be unreliable\n"
425-
"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n"
426-
)
427-
time.sleep(2)
428-
else:
429-
print("CPU affinity set to %d" % args.affinity)
493+
print("-a/--affinity specified, but the 'psutil' module is not available, aborting.\n")
494+
sys.exit(1)
430495

431496
print("\n")
432497
prprint("LOG_FILE = %s" % args.log_file)
@@ -508,10 +573,39 @@ def inner(repo_path):
508573

509574
if __name__ == '__main__':
510575
args = parser.parse_args()
511-
if not args.head and (not args.base_commit and not args.target_commit):
576+
if (not args.head
577+
and not (args.base_commit and args.target_commit)
578+
and not (args.base_pickle and args.target_pickle)):
512579
parser.print_help()
513-
else:
514-
import warnings
515-
warnings.filterwarnings('ignore',category=FutureWarning)
516-
warnings.filterwarnings('ignore',category=DeprecationWarning)
517-
main()
580+
sys.exit(1)
581+
elif ((args.base_pickle or args.target_pickle) and not
582+
(args.base_pickle and args.target_pickle)):
583+
print("Must specify Both --base-pickle and --target-pickle.")
584+
sys.exit(1)
585+
586+
if ((args.base_pickle or args.target_pickle) and not
587+
(args.base_commit and args.target_commit)):
588+
if not args.base_commit:
589+
print("base_commit not specified, Assuming base_pickle is named <commit>-foo.*")
590+
args.base_commit = args.base_pickle.split('-')[0]
591+
if not args.target_commit:
592+
print("target_commit not specified, Assuming target_pickle is named <commit>-foo.*")
593+
args.target_commit = args.target_pickle.split('-')[0]
594+
595+
import warnings
596+
warnings.filterwarnings('ignore',category=FutureWarning)
597+
warnings.filterwarnings('ignore',category=DeprecationWarning)
598+
599+
if args.base_commit and args.target_commit:
600+
print("Verifying specified commits exist in repo...")
601+
r=git.Repo(VB_DIR)
602+
for c in [ args.base_commit, args.target_commit ]:
603+
try:
604+
msg = r.commit(c).message.strip()
605+
except git.BadObject:
606+
print("The commit '%s' was not found, aborting..." % c)
607+
sys.exit(1)
608+
else:
609+
print("%s: %s" % (c,msg))
610+
611+
main()

0 commit comments

Comments
 (0)