Skip to content

Commit fe13de0

Browse files
committed
wip on pandas-dev#8882, releasing the GIL
1 parent 0aceb38 commit fe13de0

File tree

2 files changed

+75
-0
lines changed

2 files changed

+75
-0
lines changed

pandas/util/testing.py

+33
Original file line numberDiff line numberDiff line change
@@ -1782,3 +1782,36 @@ def use_numexpr(use, min_elements=expr._MIN_ELEMENTS):
17821782
for name, obj in inspect.getmembers(sys.modules[__name__]):
17831783
if inspect.isfunction(obj) and name.startswith('assert'):
17841784
setattr(TestCase, name, staticmethod(obj))
1785+
1786+
def test_parallel(num_threads=2):
1787+
"""Decorator to run the same function multiple times in parallel.
1788+
1789+
Parameters
1790+
----------
1791+
num_threads : int, optional
1792+
The number of times the function is run in parallel.
1793+
1794+
Notes
1795+
-----
1796+
This decorator does not pass the return value of the decorated function.
1797+
1798+
Original from scikit-image: https://github.com/scikit-image/scikit-image/pull/1519
1799+
1800+
"""
1801+
1802+
assert num_threads > 0
1803+
import threading
1804+
1805+
def wrapper(func):
1806+
@wraps(func)
1807+
def inner(*args, **kwargs):
1808+
threads = []
1809+
for i in range(num_threads):
1810+
thread = threading.Thread(target=func, args=args, kwargs=kwargs)
1811+
threads.append(thread)
1812+
for thread in threads:
1813+
thread.start()
1814+
for thread in threads:
1815+
thread.join()
1816+
return inner
1817+
return wrapper

vb_suite/gil.py

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
from vbench.api import Benchmark
2+
from datetime import datetime
3+
4+
common_setup = """from pandas_vb_common import *
5+
from pandas.util.testing import test_parallel
6+
"""
7+
8+
setup = common_setup + """
9+
N = 1000000
10+
ngroups = 100
11+
12+
def get_test_data(ngroups=100, n=N):
13+
unique_groups = range(ngroups)
14+
arr = np.asarray(np.tile(unique_groups, n / ngroups), dtype=object)
15+
16+
if len(arr) < n:
17+
arr = np.asarray(list(arr) + unique_groups[:n - len(arr)],
18+
dtype=object)
19+
20+
random.shuffle(arr)
21+
return arr
22+
23+
df = DataFrame({'key1' : get_test_data(ngroups=ngroups),
24+
'key2' : get_test_data(ngroups=ngroups),
25+
'data1' : np.random.randn(N),
26+
'data2' : np.random.randn(N)})
27+
"""
28+
29+
setup = common_setup + """
30+
31+
def f():
32+
df.groupby('key1')['data1'].sum()
33+
34+
@test_parallel()
35+
def pf():
36+
f()
37+
38+
"""
39+
40+
groupby_frame_cython_many_columns_nogil = Benchmark(
41+
'f()', setup,
42+
start_date=datetime(2011, 8, 1))

0 commit comments

Comments
 (0)