|
5 | 5 |
|
6 | 6 | # GH 8524
|
7 | 7 |
|
8 |
| -setup = """from pandas_vb_common import * |
| 8 | +common_setup = """from pandas_vb_common import * |
9 | 9 | from pandas import factorize
|
10 | 10 | SIZE = 1000000
|
11 |
| -
|
12 |
| -int_values_uniq = np.arange(SIZE) * 100 |
13 |
| -str_values_uniq = tm.makeStringIndex(SIZE) |
14 |
| -float_values_uniq = np.linspace(0., 1., num=SIZE) * 100 |
15 |
| -
|
16 | 11 | indices = np.random.randint(100, size=SIZE)
|
17 |
| -int_values_dup = int_values_uniq.take(indices) |
18 |
| -str_values_dup = str_values_uniq.take(indices) |
19 |
| -shortstr_values_dup = Index(np.take(['AA', 'BB', 'CC', 'DD'], |
20 |
| - np.random.randint(4, size=SIZE))) |
21 |
| -float_values_dup = float_values_uniq.take(indices) |
22 | 12 | """
|
23 | 13 |
|
24 | 14 |
|
| 15 | +# --- Integer array factorization |
| 16 | +setup = common_setup + """ |
| 17 | +int_values_uniq = np.arange(SIZE) * 100 |
| 18 | +""" |
25 | 19 | factorize_int_uniq = Benchmark("factorize(int_values_uniq)", setup,
|
26 | 20 | start_date=START_DATE)
|
| 21 | +setup = common_setup + """ |
| 22 | +int_values_dup = (np.arange(SIZE) * 100).take(indices) |
| 23 | +""" |
27 | 24 | factorize_int_dup = Benchmark("factorize(int_values_dup)", setup,
|
28 | 25 | start_date=START_DATE)
|
29 | 26 |
|
30 |
| -factorize_str_uniq = Benchmark("factorize(str_values_uniq)", setup, |
| 27 | + |
| 28 | +# --- String array factorization |
| 29 | +setup = common_setup + """ |
| 30 | +str_values_uniq = tm.makeStringIndex(SIZE) |
| 31 | +""" |
| 32 | +factorize_str_uniq = Benchmark("factorize(str_values_uniq)", setup=setup, |
31 | 33 | start_date=START_DATE)
|
32 |
| -factorize_str_dup = Benchmark("factorize(str_values_dup)", setup, |
| 34 | +setup = common_setup + """ |
| 35 | +str_values_dup = tm.makeStringIndex(SIZE).take(indices) |
| 36 | +""" |
| 37 | +factorize_str_dup = Benchmark("factorize(str_values_dup)", setup=setup, |
33 | 38 | start_date=START_DATE)
|
34 |
| -factorize_shortstr_dup = Benchmark("factorize(shortstr_values_dup)", setup, |
35 |
| - start_date=START_DATE) |
| 39 | +setup = common_setup + """ |
| 40 | +shortstr_4_dup = Index(np.take(['AA', 'BB', 'CC', 'DD'], |
| 41 | + np.random.randint(4, size=SIZE))) |
| 42 | +""" |
| 43 | +factorize_shortstr_4_dup = Benchmark("factorize(shortstr_values_dup)", |
| 44 | + setup=setup, start_date=START_DATE) |
| 45 | +setup = common_setup + """ |
| 46 | +shortstr_many_dup = tm.rands_array(2, SIZE) |
| 47 | +""" |
| 48 | +factorize_shortstr_many_dup = Benchmark("factorize(shortstr_many_dup)", |
| 49 | + setup=setup, start_date=START_DATE) |
36 | 50 |
|
37 |
| -factorize_float_uniq = Benchmark("factorize(float_values_uniq)", setup, |
| 51 | + |
| 52 | +# --- Float array factorization |
| 53 | +setup = common_setup + """ |
| 54 | +float_values_uniq = np.linspace(0., 1., num=SIZE) * 100 |
| 55 | +""" |
| 56 | +factorize_float_uniq = Benchmark("factorize(float_values_uniq)", setup=setup, |
38 | 57 | start_date=START_DATE)
|
| 58 | +setup = common_setup + """ |
| 59 | +float_values_dup = (np.linspace(0., 1., num=SIZE) * 100).take(indices) |
| 60 | +""" |
39 | 61 | factorize_float_dup = Benchmark("factorize(float_values_dup)", setup,
|
40 | 62 | start_date=START_DATE)
|
0 commit comments