Skip to content

Commit c48db2f

Browse files
author
y-p
committed
BLD: test_perf, bring back pickle-compare code clobbered in 244d567
1 parent b364f91 commit c48db2f

File tree

1 file changed

+136
-57
lines changed

1 file changed

+136
-57
lines changed

vb_suite/test_perf.py

+136-57
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,20 @@
3737
import random
3838
import numpy as np
3939

40+
import pandas as pd
4041
from pandas import DataFrame, Series
4142

4243
from suite import REPO_PATH
43-
44+
VB_DIR = os.path.dirname(os.path.abspath(__file__))
4445
DEFAULT_MIN_DURATION = 0.01
4546
HEAD_COL="head[ms]"
4647
BASE_COL="base[ms]"
4748

49+
try:
50+
import git # gitpython
51+
except Exception:
52+
print("Error: Please install the `gitpython` package\n")
53+
sys.exit(1)
4854

4955
class RevParseAction(argparse.Action):
5056
def __call__(self, parser, namespace, values, option_string=None):
@@ -66,6 +72,14 @@ def __call__(self, parser, namespace, values, option_string=None):
6672
parser.add_argument('-t', '--target-commit',
6773
help='The commit to compare against the baseline (default: HEAD).',
6874
type=str, action=RevParseAction)
75+
parser.add_argument('--base-pickle',
76+
help='name of pickle file with timings data generated by a former `-H -d FILE` run. '\
77+
'filename must be of the form <hash>-*.* or specify --base-commit seperately',
78+
type=str)
79+
parser.add_argument('--target-pickle',
80+
help='name of pickle file with timings data generated by a former `-H -d FILE` run '\
81+
'filename must be of the form <hash>-*.* or specify --target-commit seperately',
82+
type=str)
6983
parser.add_argument('-m', '--min-duration',
7084
help='Minimum duration (in ms) of baseline test for inclusion in report (default: %.3f).' % DEFAULT_MIN_DURATION,
7185
type=float,
@@ -213,30 +227,82 @@ def profile_comparative(benchmarks):
213227

214228
head_res = get_results_df(db, h_head)
215229
baseline_res = get_results_df(db, h_baseline)
216-
ratio = head_res['timing'] / baseline_res['timing']
217-
totals = DataFrame({HEAD_COL:head_res['timing'],
218-
BASE_COL:baseline_res['timing'],
219-
'ratio':ratio,
220-
'name':baseline_res.name},
221-
columns=[HEAD_COL, BASE_COL, "ratio", "name"])
222-
totals = totals.ix[totals[HEAD_COL] > args.min_duration]
223-
# ignore below threshold
224-
totals = totals.dropna(
225-
).sort("ratio").set_index('name') # sort in ascending order
226-
227-
h_msg = repo.messages.get(h_head, "")
228-
b_msg = repo.messages.get(h_baseline, "")
229-
230-
print_report(totals,h_head=h_head,h_msg=h_msg,
231-
h_baseline=h_baseline,b_msg=b_msg)
232-
233-
if args.outdf:
234-
prprint("The results DataFrame was written to '%s'\n" % args.outdf)
235-
totals.save(args.outdf)
230+
231+
report_comparative(head_res,baseline_res)
232+
236233
finally:
237234
# print("Disposing of TMP_DIR: %s" % TMP_DIR)
238235
shutil.rmtree(TMP_DIR)
239236

237+
def prep_pickle_for_total(df, agg_name='median'):
238+
"""
239+
accepts a datafram resulting from invocation with -H -d o.pickle
240+
If multiple data columns are present (-N was used), the
241+
`agg_name` attr of the datafram will be used to reduce
242+
them to a single value per vbench, df.median is used by defa
243+
ult.
244+
245+
Returns a datadrame of the form expected by prep_totals
246+
"""
247+
def prep(df):
248+
agg = getattr(df,agg_name)
249+
df = DataFrame(agg(1))
250+
cols = list(df.columns)
251+
cols[0]='timing'
252+
df.columns=cols
253+
df['name'] = list(df.index)
254+
return df
255+
256+
return prep(df)
257+
258+
def prep_totals(head_res, baseline_res):
259+
"""
260+
Each argument should be a dataframe with 'timing' and 'name' columns
261+
where name is the name of the vbench.
262+
263+
returns a 'totals' dataframe, suitable as input for print_report.
264+
"""
265+
head_res, baseline_res = head_res.align(baseline_res)
266+
ratio = head_res['timing'] / baseline_res['timing']
267+
totals = DataFrame({HEAD_COL:head_res['timing'],
268+
BASE_COL:baseline_res['timing'],
269+
'ratio':ratio,
270+
'name':baseline_res.name},
271+
columns=[HEAD_COL, BASE_COL, "ratio", "name"])
272+
totals = totals.ix[totals[HEAD_COL] > args.min_duration]
273+
# ignore below threshold
274+
totals = totals.dropna(
275+
).sort("ratio").set_index('name') # sort in ascending order
276+
return totals
277+
278+
def report_comparative(head_res,baseline_res):
279+
try:
280+
r=git.Repo(VB_DIR)
281+
except:
282+
import pdb
283+
pdb.set_trace()
284+
285+
totals = prep_totals(head_res,baseline_res)
286+
287+
h_head = args.target_commit
288+
h_baseline = args.base_commit
289+
h_msg = b_msg = "Unknown"
290+
try:
291+
h_msg = r.commit(h_head).message.strip()
292+
except git.exc.BadObject:
293+
pass
294+
try:
295+
b_msg = r.commit(h_baseline).message.strip()
296+
except git.exc.BadObject:
297+
pass
298+
299+
300+
print_report(totals,h_head=h_head,h_msg=h_msg,
301+
h_baseline=h_baseline,b_msg=b_msg)
302+
303+
if args.outdf:
304+
prprint("The results DataFrame was written to '%s'\n" % args.outdf)
305+
totals.to_pickle(args.outdf)
240306

241307
def profile_head_single(benchmark):
242308
import gc
@@ -312,7 +378,7 @@ def profile_head(benchmarks):
312378

313379
if args.outdf:
314380
prprint("The results DataFrame was written to '%s'\n" % args.outdf)
315-
DataFrame(results).save(args.outdf)
381+
DataFrame(results).to_pickle(args.outdf)
316382

317383
def print_report(df,h_head=None,h_msg="",h_baseline=None,b_msg=""):
318384

@@ -395,38 +461,22 @@ def main():
395461
random.seed(args.seed)
396462
np.random.seed(args.seed)
397463

398-
affinity_set = False
464+
if args.base_pickle and args.target_pickle:
465+
baseline_res = prep_pickle_for_total(pd.read_pickle(args.base_pickle))
466+
target_res = prep_pickle_for_total(pd.read_pickle(args.target_pickle))
399467

400-
# try psutil first since it is more commonly present and better
401-
# maintained. Some people experienced problems with affinity package
402-
# (see https://code.google.com/p/psutil/issues/detail?id=238 for more references)
403-
try:
404-
import psutil
405-
if hasattr(psutil.Process, 'set_cpu_affinity'):
406-
psutil.Process(os.getpid()).set_cpu_affinity([args.affinity])
407-
affinity_set = True
408-
except ImportError:
409-
pass
468+
report_comparative(target_res, baseline_res)
469+
sys.exit(0)
410470

411-
if not affinity_set:
412-
try:
413-
import affinity
414-
affinity.set_process_affinity_mask(0, args.affinity)
415-
assert affinity.get_process_affinity_mask(0) == args.affinity
416-
affinity_set = True
471+
if args.affinity is not None:
472+
try: # use psutil rather then stale affinity module. Thanks @yarikoptic
473+
import psutil
474+
if hasattr(psutil.Process, 'set_cpu_affinity'):
475+
psutil.Process(os.getpid()).set_cpu_affinity([args.affinity])
476+
print("CPU affinity set to %d" % args.affinity)
417477
except ImportError:
418-
pass
419-
420-
if not affinity_set:
421-
import warnings
422-
warnings.warn("\n\n"
423-
"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"
424-
"The 'affinity' or 'psutil' >= 0.5.0 modules are not available, results may be unreliable\n"
425-
"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n"
426-
)
427-
time.sleep(2)
428-
else:
429-
print("CPU affinity set to %d" % args.affinity)
478+
print("-a/--affinity specified, but the 'psutil' module is not available, aborting.\n")
479+
sys.exit(1)
430480

431481
print("\n")
432482
prprint("LOG_FILE = %s" % args.log_file)
@@ -508,10 +558,39 @@ def inner(repo_path):
508558

509559
if __name__ == '__main__':
510560
args = parser.parse_args()
511-
if not args.head and (not args.base_commit and not args.target_commit):
561+
if (not args.head
562+
and not (args.base_commit and args.target_commit)
563+
and not (args.base_pickle and args.target_pickle)):
512564
parser.print_help()
513-
else:
514-
import warnings
515-
warnings.filterwarnings('ignore',category=FutureWarning)
516-
warnings.filterwarnings('ignore',category=DeprecationWarning)
517-
main()
565+
sys.exit(1)
566+
elif ((args.base_pickle or args.target_pickle) and not
567+
(args.base_pickle and args.target_pickle)):
568+
print("Must specify Both --base-pickle and --target-pickle.")
569+
sys.exit(1)
570+
571+
if ((args.base_pickle or args.target_pickle) and not
572+
(args.base_commit and args.target_commit)):
573+
if not args.base_commit:
574+
print("base_commit not specified, Assuming base_pickle is named <commit>-foo.*")
575+
args.base_commit = args.base_pickle.split('-')[0]
576+
if not args.target_commit:
577+
print("target_commit not specified, Assuming target_pickle is named <commit>-foo.*")
578+
args.target_commit = args.target_pickle.split('-')[0]
579+
580+
import warnings
581+
warnings.filterwarnings('ignore',category=FutureWarning)
582+
warnings.filterwarnings('ignore',category=DeprecationWarning)
583+
584+
if args.base_commit and args.target_commit:
585+
print("Verifying specified commits exist in repo...")
586+
r=git.Repo(VB_DIR)
587+
for c in [ args.base_commit, args.target_commit ]:
588+
try:
589+
msg = r.commit(c).message.strip()
590+
except git.BadObject:
591+
print("The commit '%s' was not found, aborting..." % c)
592+
sys.exit(1)
593+
else:
594+
print("%s: %s" % (c,msg))
595+
596+
main()

0 commit comments

Comments
 (0)