Skip to content

Commit 6db459a

Browse files
committed
PERF: GH10213 kth_smallest GIL release
1 parent 9bdae60 commit 6db459a

File tree

5 files changed

+62
-28
lines changed

5 files changed

+62
-28
lines changed

asv_bench/benchmarks/gil.py

+22-1
Original file line numberDiff line numberDiff line change
@@ -298,4 +298,25 @@ def take_1d_pg2_int64(self):
298298

299299
@test_parallel(num_threads=2)
300300
def take_1d_pg2_float64(self):
301-
com.take_1d(self.df.float64.values, self.indexer)
301+
com.take_1d(self.df.float64.values, self.indexer)
302+
303+
304+
class nogil_kth_smallest(object):
305+
number = 1
306+
repeat = 5
307+
308+
def setup(self):
309+
if (not have_real_test_parallel):
310+
raise NotImplementedError
311+
np.random.seed(1234)
312+
self.N = 10000000
313+
self.k = 500000
314+
self.a = np.random.randn(self.N)
315+
self.b = self.a.copy()
316+
self.kwargs_list = [{'arr': self.a}, {'arr': self.b}]
317+
318+
def time_nogil_kth_smallest(self):
319+
@test_parallel(num_threads=2, kwargs_list=self.kwargs_list)
320+
def run(arr):
321+
algos.kth_smallest(arr, self.k)
322+
run()

asv_bench/benchmarks/pandas_vb_common.py

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import pandas.util.testing as tm
88
import random
99
import numpy as np
10+
import threading
1011
try:
1112
from pandas.compat import range
1213
except ImportError:

doc/source/whatsnew/v0.17.0.txt

+3-2
Original file line numberDiff line numberDiff line change
@@ -69,14 +69,15 @@ Releasing the GIL
6969

7070
We are releasing the global-interpreter-lock (GIL) on some cython operations.
7171
This will allow other threads to run simultaneously during computation, potentially allowing performance improvements
72-
from multi-threading. Notably ``groupby`` and some indexing operations are a benefit from this. (:issue:`8882`)
72+
from multi-threading. Notably ``groupby``, ``nsmallest`` and some indexing operations benefit from this. (:issue:`8882`)
7373

7474
For example the groupby expression in the following code will have the GIL released during the factorization step, e.g. ``df.groupby('key')``
7575
as well as the ``.sum()`` operation.
7676

7777
.. code-block:: python
7878

79-
N = 1e6
79+
N = 1000000
80+
ngroups = 10
8081
df = DataFrame({'key' : np.random.randint(0,ngroups,size=N),
8182
'data' : np.random.randn(N) })
8283
df.groupby('key')['data'].sum()

pandas/algos.pyx

+22-22
Original file line numberDiff line numberDiff line change
@@ -740,7 +740,7 @@ ctypedef fused numeric:
740740
float64_t
741741

742742

743-
cdef inline Py_ssize_t swap(numeric *a, numeric *b) except -1:
743+
cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil except -1:
744744
cdef numeric t
745745

746746
# cython doesn't allow pointer dereference so use array syntax
@@ -756,27 +756,27 @@ cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k):
756756
cdef:
757757
Py_ssize_t i, j, l, m, n = a.size
758758
numeric x
759-
760-
l = 0
761-
m = n - 1
762-
763-
while l < m:
764-
x = a[k]
765-
i = l
766-
j = m
767-
768-
while 1:
769-
while a[i] < x: i += 1
770-
while x < a[j]: j -= 1
771-
if i <= j:
772-
swap(&a[i], &a[j])
773-
i += 1; j -= 1
774-
775-
if i > j: break
776-
777-
if j < k: l = i
778-
if k < i: m = j
779-
return a[k]
759+
with nogil:
760+
l = 0
761+
m = n - 1
762+
763+
while l < m:
764+
x = a[k]
765+
i = l
766+
j = m
767+
768+
while 1:
769+
while a[i] < x: i += 1
770+
while x < a[j]: j -= 1
771+
if i <= j:
772+
swap(&a[i], &a[j])
773+
i += 1; j -= 1
774+
775+
if i > j: break
776+
777+
if j < k: l = i
778+
if k < i: m = j
779+
return a[k]
780780

781781

782782
cdef inline kth_smallest_c(float64_t* a, Py_ssize_t k, Py_ssize_t n):

pandas/util/testing.py

+14-3
Original file line numberDiff line numberDiff line change
@@ -2044,14 +2044,16 @@ def use_numexpr(use, min_elements=expr._MIN_ELEMENTS):
20442044
if inspect.isfunction(obj) and name.startswith('assert'):
20452045
setattr(TestCase, name, staticmethod(obj))
20462046

2047-
def test_parallel(num_threads=2):
2047+
2048+
def test_parallel(num_threads=2, kwargs_list=None):
20482049
"""Decorator to run the same function multiple times in parallel.
20492050
20502051
Parameters
20512052
----------
20522053
num_threads : int, optional
20532054
The number of times the function is run in parallel.
2054-
2055+
kwargs_list : list of dicts, optional
2056+
The list of kwargs to update original function kwargs on different threads.
20552057
Notes
20562058
-----
20572059
This decorator does not pass the return value of the decorated function.
@@ -2061,14 +2063,23 @@ def test_parallel(num_threads=2):
20612063
"""
20622064

20632065
assert num_threads > 0
2066+
has_kwargs_list = kwargs_list is not None
2067+
if has_kwargs_list:
2068+
assert len(kwargs_list) == num_threads
20642069
import threading
20652070

20662071
def wrapper(func):
20672072
@wraps(func)
20682073
def inner(*args, **kwargs):
2074+
if has_kwargs_list:
2075+
update_kwargs = lambda i: dict(kwargs, **kwargs_list[i])
2076+
else:
2077+
update_kwargs = lambda i: kwargs
20692078
threads = []
20702079
for i in range(num_threads):
2071-
thread = threading.Thread(target=func, args=args, kwargs=kwargs)
2080+
updated_kwargs = update_kwargs(i)
2081+
thread = threading.Thread(target=func, args=args,
2082+
kwargs=updated_kwargs)
20722083
threads.append(thread)
20732084
for thread in threads:
20742085
thread.start()

0 commit comments

Comments
 (0)