PERF: vbenches for #8882, releasing the GIL

jreback · jreback · commit b08ab8eda52e · 2015-06-26T19:23:12.000-04:00
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
@@ -1817,3 +1817,36 @@ def use_numexpr(use, min_elements=expr._MIN_ELEMENTS):
 for name, obj in inspect.getmembers(sys.modules[__name__]):
     if inspect.isfunction(obj) and name.startswith('assert'):
         setattr(TestCase, name, staticmethod(obj))
+
+def test_parallel(num_threads=2):
+    """Decorator to run the same function multiple times in parallel.
+
+    Parameters
+    ----------
+    num_threads : int, optional
+        The number of times the function is run in parallel.
+
+    Notes
+    -----
+    This decorator does not pass the return value of the decorated function.
+
+    Original from scikit-image: https://github.com/scikit-image/scikit-image/pull/1519
+
+    """
+
+    assert num_threads > 0
+    import threading
+
+    def wrapper(func):
+        @wraps(func)
+        def inner(*args, **kwargs):
+            threads = []
+            for i in range(num_threads):
+                thread = threading.Thread(target=func, args=args, kwargs=kwargs)
+                threads.append(thread)
+            for thread in threads:
+                thread.start()
+            for thread in threads:
+                thread.join()
+        return inner
+    return wrapper
diff --git a/vb_suite/gil.py b/vb_suite/gil.py
@@ -0,0 +1,75 @@
+from vbench.api import Benchmark
+from datetime import datetime
+
+common_setup = """from pandas_vb_common import *
+"""
+
+basic = common_setup + """
+from pandas.util.testing import test_parallel
+
+N = 1000000
+ngroups = 1000
+np.random.seed(1234)
+
+df = DataFrame({'key' : np.random.randint(0,ngroups,size=N),
+                'data' : np.random.randn(N) })
+"""
+
+setup = basic + """
+
+def f():
+    df.groupby('key')['data'].sum()
+
+# run consecutivily
+def g2():
+    for i in range(2):
+        f()
+def g4():
+    for i in range(4):
+        f()
+def g8():
+    for i in range(8):
+        f()
+
+# run in parallel
+@test_parallel(num_threads=2)
+def pg2():
+    f()
+
+@test_parallel(num_threads=4)
+def pg4():
+    f()
+
+@test_parallel(num_threads=8)
+def pg8():
+    f()
+
+"""
+
+nogil_groupby_sum_4 = Benchmark(
+    'pg4()', setup,
+    start_date=datetime(2015, 1, 1))
+
+nogil_groupby_sum_8 = Benchmark(
+    'pg8()', setup,
+    start_date=datetime(2015, 1, 1))
+
+
+#### test all groupby funcs ####
+
+setup = basic + """
+
+@test_parallel(num_threads=2)
+def pg2():
+    df.groupby('key')['data'].func()
+
+"""
+
+for f in ['sum','prod','var','count','min','max','mean','last']:
+
+    name = "nogil_groupby_{f}_2".format(f=f)
+    bmark = Benchmark('pg2()', setup.replace('func',f), start_date=datetime(2015, 1, 1))
+    bmark.name = name
+    globals()[name] = bmark
+
+del bmark
diff --git a/vb_suite/suite.py b/vb_suite/suite.py
@@ -16,6 +16,7 @@
            'inference',
            'hdfstore_bench',
            'join_merge',
+           'gil',
            'miscellaneous',
            'panel_ctor',
            'packers',