DOC: more vbench scaffolding, rst generation

wesm · wesm · commit 408e74249836 · 2011-12-20T22:56:47.000-05:00
diff --git a/doc/source/index.rst b/doc/source/index.rst
@@ -125,3 +125,4 @@ See the package overview for more detail about what's in the library.
     comparison_with_r
     benchmarks
     api
+    vbench
diff --git a/vb_suite/generate_rst_files.py b/vb_suite/generate_rst_files.py
@@ -0,0 +1,2 @@
+from suite import benchmarks, generate_rst_files
+generate_rst_files(benchmarks)
diff --git a/vb_suite/groupby.py b/vb_suite/groupby.py
@@ -7,7 +7,7 @@
 setup = common_setup + """
 
 N = 100000
-ngroups = 5
+ngroups = 100
 
 def get_test_data(ngroups=100, n=N):
     unique_groups = range(ngroups)
@@ -28,12 +28,12 @@ def f():
 """
 
 stmt1 = "df.groupby(['key1', 'key2'])['data'].agg(lambda x: x.values.sum())"
-bm_groupby1 = Benchmark(stmt1, setup,
-                        name="groupby_multi_python",
-                        start_date=datetime(2011, 7, 1))
+groupby_multi_python = Benchmark(stmt1, setup,
+                                 name="groupby_multi_python",
+                                 start_date=datetime(2011, 7, 1))
 
 stmt3 = "df.groupby(['key1', 'key2']).sum()"
-bm_groupby3 = Benchmark(stmt3, setup,
-                        name="groupby_multi_cython",
-                        start_date=datetime(2011, 7, 1))
+groupby_multi_cython = Benchmark(stmt3, setup,
+                                 name="groupby_multi_cython",
+                                 start_date=datetime(2011, 7, 1))
 
diff --git a/vb_suite/index_object.py b/vb_suite/index_object.py
@@ -0,0 +1,5 @@
+from vbench.benchmark import Benchmark
+from datetime import datetime
+
+SECTION = "Index / MultiIndex objects"
+
diff --git a/vb_suite/run_suite.py b/vb_suite/run_suite.py
@@ -1,37 +1,12 @@
-from vbench.api import Benchmark, GitRepo, BenchmarkRunner
-from datetime import datetime
-
-modules = ['groupby', 'indexing', 'reindex', 'binary_ops',
-           'sparse', 'index_object']
-
-all_benchmarks = []
-for modname in modules:
-    ref = __import__(modname)
-    for k, v in ref.__dict__.iteritems():
-        if isinstance(v, Benchmark):
-            all_benchmarks.append(v)
-
-REPO_PATH = '/home/wesm/code/pandas'
-REPO_URL = 'git@github.com:wesm/pandas.git'
-DB_PATH = '/home/wesm/code/pandas/vb_suite/benchmarks.db'
-TMP_DIR = '/home/wesm/tmp/vb_pandas'
-PREPARE = """
-python setup.py clean
-"""
-BUILD = """
-python setup.py build_ext --inplace
-"""
-dependencies = ['pandas_vb_common.py']
-
-START_DATE = datetime(2011, 3, 1)
-
-repo = GitRepo(REPO_PATH)
-
-to_consider = repo.shas.truncate(START_DATE)
-
-runner = BenchmarkRunner(all_benchmarks, REPO_PATH, REPO_URL,
-                         BUILD, DB_PATH, TMP_DIR, PREPARE,
-                         run_option='eod', start_date=START_DATE,
-                         module_dependencies=dependencies)
-
-runner.run()
+from vbench.api import BenchmarkRunner
+from suite import *
+
+def run_process():
+    runner = BenchmarkRunner(benchmarks, REPO_PATH, REPO_URL,
+                             BUILD, DB_PATH, TMP_DIR, PREPARE,
+                             run_option='eod', start_date=START_DATE,
+                             module_dependencies=dependencies)
+    runner.run()
+
+if __name__ == '__main__':
+    run_process()
diff --git a/vb_suite/suite.py b/vb_suite/suite.py
@@ -0,0 +1,79 @@
+from vbench.api import Benchmark, GitRepo, BenchmarkRunner
+from datetime import datetime
+
+import os
+
+modules = ['groupby', 'indexing', 'reindex', 'binary_ops',
+           'sparse', 'index_object']
+
+benchmarks = []
+for modname in modules:
+    ref = __import__(modname)
+    for k, v in ref.__dict__.iteritems():
+        if isinstance(v, Benchmark):
+            benchmarks.append(v)
+
+REPO_PATH = '/home/wesm/code/pandas'
+REPO_URL = 'git@github.com:wesm/pandas.git'
+DB_PATH = '/home/wesm/code/pandas/vb_suite/benchmarks.db'
+TMP_DIR = '/home/wesm/tmp/vb_pandas'
+PREPARE = """
+python setup.py clean
+"""
+BUILD = """
+python setup.py build_ext --inplace
+"""
+dependencies = ['pandas_vb_common.py']
+
+START_DATE = datetime(2011, 3, 1)
+
+repo = GitRepo(REPO_PATH)
+
+RST_BASE = '../doc/source'
+
+# HACK!
+
+timespan = [datetime(2011, 1, 1), datetime(2012, 1, 1)]
+
+def generate_rst_files(benchmarks):
+    import matplotlib as mpl
+    mpl.use('Agg')
+    import matplotlib.pyplot as plt
+
+    vb_path = os.path.join(RST_BASE, 'vbench')
+    fig_base_path = os.path.join(vb_path, 'figures')
+
+    if not os.path.exists(vb_path):
+        print 'creating %s' % vb_path
+        os.makedirs(vb_path)
+
+    if not os.path.exists(fig_base_path):
+        print 'creating %s' % fig_base_path
+        os.makedirs(fig_base_path)
+
+    for bmk in benchmarks:
+        print 'Generating rst file for %s' % bmk.name
+        rst_path = os.path.join(RST_BASE, 'vbench/%s.rst' % bmk.name)
+
+        fig_full_path = os.path.join(fig_base_path, '%s.png' % bmk.name)
+
+        # make the figure
+        plt.figure(figsize=(10, 6))
+        ax = plt.gca()
+        bmk.plot(DB_PATH, ax=ax)
+        plt.xlim(timespan)
+        plt.savefig(fig_full_path, bbox_inches='tight')
+        plt.close('all')
+
+        fig_rel_path = 'vbench/figures/%s.png' % bmk.name
+        rst_text = bmk.to_rst(image_path=fig_rel_path)
+        with open(rst_path, 'w') as f:
+            f.write(rst_text)
+
+    with open(os.path.join(RST_BASE, 'vbench.rst'), 'w') as f:
+        print >> f, """
+VBENCH
+------
+"""
+        for bmk in benchmarks:
+            print >> f, '.. include:: vbench/%s.rst' % bmk.name

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+from suite import benchmarks, generate_rst_files`
	`2`	`+generate_rst_files(benchmarks)`