Skip to content

Commit b08ab8e

Browse files
committed
PERF: vbenches for #8882, releasing the GIL
1 parent 25fc49d commit b08ab8e

File tree

3 files changed

+109
-0
lines changed

3 files changed

+109
-0
lines changed

pandas/util/testing.py

+33
Original file line numberDiff line numberDiff line change
@@ -1817,3 +1817,36 @@ def use_numexpr(use, min_elements=expr._MIN_ELEMENTS):
18171817
for name, obj in inspect.getmembers(sys.modules[__name__]):
18181818
if inspect.isfunction(obj) and name.startswith('assert'):
18191819
setattr(TestCase, name, staticmethod(obj))
1820+
1821+
def test_parallel(num_threads=2):
1822+
"""Decorator to run the same function multiple times in parallel.
1823+
1824+
Parameters
1825+
----------
1826+
num_threads : int, optional
1827+
The number of times the function is run in parallel.
1828+
1829+
Notes
1830+
-----
1831+
This decorator does not pass the return value of the decorated function.
1832+
1833+
Original from scikit-image: https://github.com/scikit-image/scikit-image/pull/1519
1834+
1835+
"""
1836+
1837+
assert num_threads > 0
1838+
import threading
1839+
1840+
def wrapper(func):
1841+
@wraps(func)
1842+
def inner(*args, **kwargs):
1843+
threads = []
1844+
for i in range(num_threads):
1845+
thread = threading.Thread(target=func, args=args, kwargs=kwargs)
1846+
threads.append(thread)
1847+
for thread in threads:
1848+
thread.start()
1849+
for thread in threads:
1850+
thread.join()
1851+
return inner
1852+
return wrapper

vb_suite/gil.py

+75
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
from vbench.api import Benchmark
2+
from datetime import datetime
3+
4+
common_setup = """from pandas_vb_common import *
5+
"""
6+
7+
basic = common_setup + """
8+
from pandas.util.testing import test_parallel
9+
10+
N = 1000000
11+
ngroups = 1000
12+
np.random.seed(1234)
13+
14+
df = DataFrame({'key' : np.random.randint(0,ngroups,size=N),
15+
'data' : np.random.randn(N) })
16+
"""
17+
18+
setup = basic + """
19+
20+
def f():
21+
df.groupby('key')['data'].sum()
22+
23+
# run consecutivily
24+
def g2():
25+
for i in range(2):
26+
f()
27+
def g4():
28+
for i in range(4):
29+
f()
30+
def g8():
31+
for i in range(8):
32+
f()
33+
34+
# run in parallel
35+
@test_parallel(num_threads=2)
36+
def pg2():
37+
f()
38+
39+
@test_parallel(num_threads=4)
40+
def pg4():
41+
f()
42+
43+
@test_parallel(num_threads=8)
44+
def pg8():
45+
f()
46+
47+
"""
48+
49+
nogil_groupby_sum_4 = Benchmark(
50+
'pg4()', setup,
51+
start_date=datetime(2015, 1, 1))
52+
53+
nogil_groupby_sum_8 = Benchmark(
54+
'pg8()', setup,
55+
start_date=datetime(2015, 1, 1))
56+
57+
58+
#### test all groupby funcs ####
59+
60+
setup = basic + """
61+
62+
@test_parallel(num_threads=2)
63+
def pg2():
64+
df.groupby('key')['data'].func()
65+
66+
"""
67+
68+
for f in ['sum','prod','var','count','min','max','mean','last']:
69+
70+
name = "nogil_groupby_{f}_2".format(f=f)
71+
bmark = Benchmark('pg2()', setup.replace('func',f), start_date=datetime(2015, 1, 1))
72+
bmark.name = name
73+
globals()[name] = bmark
74+
75+
del bmark

vb_suite/suite.py

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
'inference',
1717
'hdfstore_bench',
1818
'join_merge',
19+
'gil',
1920
'miscellaneous',
2021
'panel_ctor',
2122
'packers',

0 commit comments

Comments
 (0)