|
| 1 | +from vbench.api import Benchmark |
| 2 | +from datetime import datetime |
| 3 | + |
| 4 | +START_DATE = datetime(2014, 10, 13) |
| 5 | + |
| 6 | +# GH 8524 |
| 7 | + |
| 8 | +setup = """from pandas_vb_common import * |
| 9 | +from pandas import factorize |
| 10 | +SIZE = 1000000 |
| 11 | +
|
| 12 | +int_values_uniq = np.arange(SIZE) * 100 |
| 13 | +str_values_uniq = tm.makeStringIndex(SIZE) |
| 14 | +float_values_uniq = np.linspace(0., 1., num=SIZE) * 100 |
| 15 | +
|
| 16 | +indices = np.random.randint(100, size=SIZE) |
| 17 | +int_values_dup = int_values_uniq.take(indices) |
| 18 | +str_values_dup = str_values_uniq.take(indices) |
| 19 | +float_values_dup = float_values_uniq.take(indices) |
| 20 | +""" |
| 21 | + |
| 22 | + |
| 23 | +factorize_int_dup = Benchmark("factorize(int_values_dup)", setup, |
| 24 | + start_date=START_DATE) |
| 25 | +factorize_int_uniq = Benchmark("factorize(int_values_uniq)", setup, |
| 26 | + start_date=START_DATE) |
| 27 | + |
| 28 | +factorize_str_dup = Benchmark("factorize(str_values_dup)", setup, |
| 29 | + start_date=START_DATE) |
| 30 | +factorize_str_uniq = Benchmark("factorize(str_values_uniq)", setup, |
| 31 | + start_date=START_DATE) |
| 32 | + |
| 33 | +factorize_float_dup = Benchmark("factorize(float_values_dup)", setup, |
| 34 | + start_date=START_DATE) |
| 35 | +factorize_float_uniq = Benchmark("factorize(float_values_uniq)", setup, |
| 36 | + start_date=START_DATE) |
0 commit comments