Skip to content

BUG: revert 2/3 changes in vbsuite #4478

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 7, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions vb_suite/groupby.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from vbench.api import Benchmark
from datetime import datetime
from pandas.compat import map

common_setup = """from pandas_vb_common import *
"""
Expand Down Expand Up @@ -285,12 +284,12 @@ def f(g):
share_na = 0.1

dates = date_range('1997-12-31', periods=n_dates, freq='B')
dates = Index(lmap(lambda x: x.year * 10000 + x.month * 100 + x.day, dates))
dates = Index(map(lambda x: x.year * 10000 + x.month * 100 + x.day, dates))

secid_min = int('10000000', 16)
secid_max = int('F0000000', 16)
step = (secid_max - secid_min) // (n_securities - 1)
security_ids = lmap(lambda x: hex(x)[2:10].upper(), range(secid_min, secid_max + 1, step))
security_ids = map(lambda x: hex(x)[2:10].upper(), range(secid_min, secid_max + 1, step))

data_index = MultiIndex(levels=[dates.values, security_ids],
labels=[[i for i in xrange(n_dates) for _ in xrange(n_securities)], range(n_securities) * n_dates],
Expand Down
1 change: 0 additions & 1 deletion vb_suite/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,6 @@
start_date=datetime(2012, 1, 1))

setup = common_setup + """
from pandas.compat import range
import pandas.core.expressions as expr
df = DataFrame(np.random.randn(50000, 100))
df2 = DataFrame(np.random.randn(50000, 100))
Expand Down
4 changes: 2 additions & 2 deletions vb_suite/make.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def auto_update():
html()
upload()
sendmail()
except (Exception, SystemExit) as inst:
except (Exception, SystemExit), inst:
msg += str(inst) + '\n'
sendmail(msg)

Expand Down Expand Up @@ -159,7 +159,7 @@ def _get_config():
func = funcd.get(arg)
if func is None:
raise SystemExit('Do not know how to handle %s; valid args are %s' % (
arg, list(funcd.keys())))
arg, funcd.keys()))
func()
else:
small_docs = False
Expand Down
2 changes: 1 addition & 1 deletion vb_suite/measure_memory_consumption.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def main():

s = Series(results)
s.sort()
print(s)
print((s))

finally:
shutil.rmtree(TMP_DIR)
Expand Down
4 changes: 2 additions & 2 deletions vb_suite/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
start_date=datetime(2011, 11, 1))

setup = common_setup + """
from pandas.compat import cStringIO as StringIO
from cStringIO import StringIO
import os
N = 10000
K = 8
Expand All @@ -63,7 +63,7 @@
read_table_multiple_date = Benchmark(cmd, setup, start_date=sdate)

setup = common_setup + """
from pandas.compat import cStringIO as StringIO
from cStringIO import StringIO
import os
N = 10000
K = 8
Expand Down
23 changes: 12 additions & 11 deletions vb_suite/perf_HEAD.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@

"""

from pandas.io.common import urlopen
import urllib2
from contextlib import closing
from urllib2 import urlopen
import json

import pandas as pd
import pandas.compat as compat

WEB_TIMEOUT = 10

Expand All @@ -24,7 +25,7 @@ def get_travis_data():
if not jobid:
return None, None

with urlopen("https://api.travis-ci.org/workers/") as resp:
with closing(urlopen("https://api.travis-ci.org/workers/")) as resp:
workers = json.loads(resp.read())

host = njobs = None
Expand Down Expand Up @@ -71,7 +72,7 @@ def dump_as_gist(data, desc="The Commit", njobs=None):
print("\n\n" + "-" * 80)

gist = json.loads(r.read())
file_raw_url = list(gist['files'].items())[0][1]['raw_url']
file_raw_url = gist['files'].items()[0][1]['raw_url']
print("[vbench-gist-raw_url] %s" % file_raw_url)
print("[vbench-html-url] %s" % gist['html_url'])
print("[vbench-api-url] %s" % gist['url'])
Expand Down Expand Up @@ -103,7 +104,7 @@ def main():

except Exception as e:
exit_code = 1
if (isinstance(e, KeyboardInterrupt) or
if (type(e) == KeyboardInterrupt or
'KeyboardInterrupt' in str(d)):
raise KeyboardInterrupt()

Expand All @@ -113,7 +114,7 @@ def main():
if d['succeeded']:
print("\nException:\n%s\n" % str(e))
else:
for k, v in sorted(compat.iteritems(d)):
for k, v in sorted(d.iteritems()):
print("{k}: {v}".format(k=k, v=v))

print("------->\n")
Expand All @@ -132,7 +133,7 @@ def main():


def get_vbench_log(build_url):
with urlopen(build_url) as r:
with closing(urllib2.urlopen(build_url)) as r:
if not (200 <= r.getcode() < 300):
return

Expand All @@ -143,7 +144,7 @@ def get_vbench_log(build_url):
if not s:
return
id = s[0]['id'] # should be just one for now
with urlopen("https://api.travis-ci.org/jobs/%s" % id) as r2:
with closing(urllib2.urlopen("https://api.travis-ci.org/jobs/%s" % id)) as r2:
if not 200 <= r.getcode() < 300:
return
s2 = json.loads(r2.read())
Expand Down Expand Up @@ -171,7 +172,7 @@ def convert_json_to_df(results_url):
df contains timings for all successful vbenchmarks
"""

with urlopen(results_url) as resp:
with closing(urlopen(results_url)) as resp:
res = json.loads(resp.read())
timings = res.get("timings")
if not timings:
Expand Down Expand Up @@ -215,7 +216,7 @@ def get_results_from_builds(builds):
dfs = OrderedDict()

while True:
with urlopen(url) as r:
with closing(urlopen(url)) as r:
if not (200 <= r.getcode() < 300):
break
builds = json.loads(r.read())
Expand All @@ -237,6 +238,6 @@ def mk_unique(df):
dfs = get_all_results(repo_id)
for k in dfs:
dfs[k] = mk_unique(dfs[k])
ss = [pd.Series(v.timing, name=k) for k, v in compat.iteritems(dfs)]
ss = [pd.Series(v.timing, name=k) for k, v in dfs.iteritems()]
results = pd.concat(reversed(ss), 1)
return results
10 changes: 4 additions & 6 deletions vb_suite/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
import sys
import os

from pandas.compat import u

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
Expand Down Expand Up @@ -51,8 +49,8 @@
master_doc = 'index'

# General information about the project.
project = u('pandas')
copyright = u('2008-2011, the pandas development team')
project = u'pandas'
copyright = u'2008-2011, the pandas development team'

# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
Expand Down Expand Up @@ -199,8 +197,8 @@
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
('index', 'performance.tex',
u('pandas vbench Performance Benchmarks'),
u('Wes McKinney'), 'manual'),
u'pandas vbench Performance Benchmarks',
u'Wes McKinney', 'manual'),
]

# The name of an image file (relative to this directory) to place at the top of
Expand Down
21 changes: 10 additions & 11 deletions vb_suite/suite.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from __future__ import print_function
from vbench.api import Benchmark, GitRepo
from datetime import datetime

Expand Down Expand Up @@ -91,15 +90,15 @@ def generate_rst_files(benchmarks):
fig_base_path = os.path.join(vb_path, 'figures')

if not os.path.exists(vb_path):
print('creating %s' % vb_path)
print 'creating %s' % vb_path
os.makedirs(vb_path)

if not os.path.exists(fig_base_path):
print('creating %s' % fig_base_path)
print 'creating %s' % fig_base_path
os.makedirs(fig_base_path)

for bmk in benchmarks:
print('Generating rst file for %s' % bmk.name)
print 'Generating rst file for %s' % bmk.name
rst_path = os.path.join(RST_BASE, 'vbench/%s.txt' % bmk.name)

fig_full_path = os.path.join(fig_base_path, '%s.png' % bmk.name)
Expand All @@ -121,7 +120,7 @@ def generate_rst_files(benchmarks):
f.write(rst_text)

with open(os.path.join(RST_BASE, 'index.rst'), 'w') as f:
print("""
print >> f, """
Performance Benchmarks
======================

Expand All @@ -142,15 +141,15 @@ def generate_rst_files(benchmarks):
.. toctree::
:hidden:
:maxdepth: 3
""", file=f)
"""
for modname, mod_bmks in sorted(by_module.items()):
print(' vb_%s' % modname, file=f)
print >> f, ' vb_%s' % modname
modpath = os.path.join(RST_BASE, 'vb_%s.rst' % modname)
with open(modpath, 'w') as mh:
header = '%s\n%s\n\n' % (modname, '=' * len(modname))
print(header, file=mh)
print >> mh, header

for bmk in mod_bmks:
print(bmk.name, file=mh)
print('-' * len(bmk.name), file=mh)
print('.. include:: vbench/%s.txt\n' % bmk.name, file=mh)
print >> mh, bmk.name
print >> mh, '-' * len(bmk.name)
print >> mh, '.. include:: vbench/%s.txt\n' % bmk.name
16 changes: 6 additions & 10 deletions vb_suite/test_perf.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,7 @@
5) print the results to the log file and to stdout.

"""
from __future__ import print_function

from pandas.compat import range, lmap
import shutil
import os
import sys
Expand Down Expand Up @@ -139,11 +137,11 @@ def get_results_df(db, rev):
"""Takes a git commit hash and returns a Dataframe of benchmark results
"""
bench = DataFrame(db.get_benchmarks())
results = DataFrame(lmap(list,db.get_rev_results(rev).values()))
results = DataFrame(map(list,db.get_rev_results(rev).values()))

# Sinch vbench.db._reg_rev_results returns an unlabeled dict,
# we have to break encapsulation a bit.
results.columns = list(db._results.c.keys())
results.columns = db._results.c.keys()
results = results.join(bench['name'], on='checksum').set_index("checksum")
return results

Expand Down Expand Up @@ -277,8 +275,7 @@ def profile_head_single(benchmark):
err = str(e)
except:
pass
print("%s died with:\n%s\nSkipping...\n" % (benchmark.name,
err))
print("%s died with:\n%s\nSkipping...\n" % (benchmark.name, err))

results.append(d.get('timing',np.nan))
gc.enable()
Expand All @@ -299,8 +296,7 @@ def profile_head_single(benchmark):
# return df.set_index("name")[HEAD_COL]

def profile_head(benchmarks):
print("Performing %d benchmarks (%d runs each)" % (len(benchmarks),
args.hrepeats))
print( "Performing %d benchmarks (%d runs each)" % ( len(benchmarks), args.hrepeats))

ss= [profile_head_single(b) for b in benchmarks]
print("\n")
Expand Down Expand Up @@ -466,7 +462,7 @@ def main():
def _parse_commit_log(this,repo_path,base_commit=None):
from vbench.git import _convert_timezones
from pandas import Series
from pandas.compat import parse_date
from dateutil import parser as dparser

git_cmd = 'git --git-dir=%s/.git --work-tree=%s ' % (repo_path, repo_path)
githist = git_cmd + ('log --graph --pretty=format:'+
Expand All @@ -488,7 +484,7 @@ def _parse_commit_log(this,repo_path,base_commit=None):
_, sha, stamp, message, author = line.split('::', 4)

# parse timestamp into datetime object
stamp = parse_date(stamp)
stamp = dparser.parse(stamp)

shas.append(sha)
timestamps.append(stamp)
Expand Down