|
| 1 | +from vbench.api import Benchmark |
| 2 | +from datetime import datetime |
| 3 | + |
| 4 | +common_setup = """from pandas_vb_common import * |
| 5 | +""" |
| 6 | + |
| 7 | +basic = common_setup + """ |
| 8 | +from pandas.util.testing import test_parallel |
| 9 | +
|
| 10 | +N = 1000000 |
| 11 | +ngroups = 1000 |
| 12 | +np.random.seed(1234) |
| 13 | +
|
| 14 | +df = DataFrame({'key' : np.random.randint(0,ngroups,size=N), |
| 15 | + 'data' : np.random.randn(N) }) |
| 16 | +""" |
| 17 | + |
| 18 | +setup = basic + """ |
| 19 | +
|
| 20 | +def f(): |
| 21 | + df.groupby('key')['data'].sum() |
| 22 | +
|
| 23 | +# run consecutivily |
| 24 | +def g2(): |
| 25 | + for i in range(2): |
| 26 | + f() |
| 27 | +def g4(): |
| 28 | + for i in range(4): |
| 29 | + f() |
| 30 | +def g8(): |
| 31 | + for i in range(8): |
| 32 | + f() |
| 33 | +
|
| 34 | +# run in parallel |
| 35 | +@test_parallel(num_threads=2) |
| 36 | +def pg2(): |
| 37 | + f() |
| 38 | +
|
| 39 | +@test_parallel(num_threads=4) |
| 40 | +def pg4(): |
| 41 | + f() |
| 42 | +
|
| 43 | +@test_parallel(num_threads=8) |
| 44 | +def pg8(): |
| 45 | + f() |
| 46 | +
|
| 47 | +""" |
| 48 | + |
| 49 | +nogil_groupby_sum_4 = Benchmark( |
| 50 | + 'pg4()', setup, |
| 51 | + start_date=datetime(2015, 1, 1)) |
| 52 | + |
| 53 | +nogil_groupby_sum_8 = Benchmark( |
| 54 | + 'pg8()', setup, |
| 55 | + start_date=datetime(2015, 1, 1)) |
| 56 | + |
| 57 | + |
| 58 | +#### test all groupby funcs #### |
| 59 | + |
| 60 | +setup = basic + """ |
| 61 | +
|
| 62 | +@test_parallel(num_threads=2) |
| 63 | +def pg2(): |
| 64 | + df.groupby('key')['data'].func() |
| 65 | +
|
| 66 | +""" |
| 67 | + |
| 68 | +for f in ['sum','prod','var','count','min','max','mean','last']: |
| 69 | + |
| 70 | + name = "nogil_groupby_{f}_2".format(f=f) |
| 71 | + bmark = Benchmark('pg2()', setup.replace('func',f), start_date=datetime(2015, 1, 1)) |
| 72 | + bmark.name = name |
| 73 | + globals()[name] = bmark |
| 74 | + |
| 75 | +del bmark |
0 commit comments