Skip to content

Commit 08bb64a

Browse files
committed
PERF: vbenches for pandas-dev#8882, releasing the GIL
1 parent 1d87174 commit 08bb64a

File tree

3 files changed

+109
-0
lines changed

3 files changed

+109
-0
lines changed

pandas/util/testing.py

+33
Original file line numberDiff line numberDiff line change
@@ -1782,3 +1782,36 @@ def use_numexpr(use, min_elements=expr._MIN_ELEMENTS):
17821782
for name, obj in inspect.getmembers(sys.modules[__name__]):
17831783
if inspect.isfunction(obj) and name.startswith('assert'):
17841784
setattr(TestCase, name, staticmethod(obj))
1785+
1786+
def test_parallel(num_threads=2):
1787+
"""Decorator to run the same function multiple times in parallel.
1788+
1789+
Parameters
1790+
----------
1791+
num_threads : int, optional
1792+
The number of times the function is run in parallel.
1793+
1794+
Notes
1795+
-----
1796+
This decorator does not pass the return value of the decorated function.
1797+
1798+
Original from scikit-image: https://github.com/scikit-image/scikit-image/pull/1519
1799+
1800+
"""
1801+
1802+
assert num_threads > 0
1803+
import threading
1804+
1805+
def wrapper(func):
1806+
@wraps(func)
1807+
def inner(*args, **kwargs):
1808+
threads = []
1809+
for i in range(num_threads):
1810+
thread = threading.Thread(target=func, args=args, kwargs=kwargs)
1811+
threads.append(thread)
1812+
for thread in threads:
1813+
thread.start()
1814+
for thread in threads:
1815+
thread.join()
1816+
return inner
1817+
return wrapper

vb_suite/gil.py

+75
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
from vbench.api import Benchmark
2+
from datetime import datetime
3+
4+
common_setup = """from pandas_vb_common import *
5+
"""
6+
7+
basic = common_setup + """
8+
from pandas.util.testing import test_parallel
9+
10+
N = 1000000
11+
ngroups = 1000
12+
np.random.seed(1234)
13+
14+
df = DataFrame({'key' : np.random.randint(0,ngroups,size=N),
15+
'data' : np.random.randn(N) })
16+
"""
17+
18+
setup = basic + """
19+
20+
def f():
21+
df.groupby('key')['data'].sum()
22+
23+
# run consecutivily
24+
def g2():
25+
for i in range(2):
26+
f()
27+
def g4():
28+
for i in range(4):
29+
f()
30+
def g8():
31+
for i in range(8):
32+
f()
33+
34+
# run in parallel
35+
@test_parallel(num_threads=2)
36+
def pg2():
37+
f()
38+
39+
@test_parallel(num_threads=4)
40+
def pg4():
41+
f()
42+
43+
@test_parallel(num_threads=8)
44+
def pg8():
45+
f()
46+
47+
"""
48+
49+
nogil_groupby_sum_4 = Benchmark(
50+
'pg4()', setup,
51+
start_date=datetime(2015, 1, 1))
52+
53+
nogil_groupby_sum_8 = Benchmark(
54+
'pg8()', setup,
55+
start_date=datetime(2015, 1, 1))
56+
57+
58+
#### test all groupby funcs ####
59+
60+
setup = basic + """
61+
62+
@test_parallel(num_threads=2)
63+
def pg2():
64+
df.groupby('key')['data'].func()
65+
66+
"""
67+
68+
for f in ['sum','prod','var','count','min','max','mean','last']:
69+
70+
name = "nogil_groupby_{f}_2".format(f=f)
71+
bmark = Benchmark('pg2()', setup.replace('func',f), start_date=datetime(2015, 1, 1))
72+
bmark.name = name
73+
globals()[name] = bmark
74+
75+
del bmark

vb_suite/suite.py

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
'inference',
1717
'hdfstore_bench',
1818
'join_merge',
19+
'gil',
1920
'miscellaneous',
2021
'panel_ctor',
2122
'packers',

0 commit comments

Comments
 (0)