|
3 | 3 | from pandas.types.concat import union_categoricals
|
4 | 4 | except ImportError:
|
5 | 5 | pass
|
6 |
| -import string |
7 | 6 |
|
8 | 7 |
|
9 |
| -class concat_categorical(object): |
| 8 | +class Categoricals(object): |
10 | 9 | goal_time = 0.2
|
11 | 10 |
|
12 | 11 | def setup(self):
|
13 |
| - self.s = pd.Series((list('aabbcd') * 1000000)).astype('category') |
| 12 | + N = 100000 |
| 13 | + self.s = pd.Series((list('aabbcd') * N)).astype('category') |
14 | 14 |
|
15 |
| - def time_concat_categorical(self): |
16 |
| - concat([self.s, self.s]) |
| 15 | + self.a = pd.Categorical((list('aabbcd') * N)) |
| 16 | + self.b = pd.Categorical((list('bbcdjk') * N)) |
17 | 17 |
|
| 18 | + self.categories = list('abcde') |
| 19 | + self.cat_idx = Index(self.categories) |
| 20 | + self.values = np.tile(self.categories, N) |
| 21 | + self.codes = np.tile(range(len(self.categories)), N) |
18 | 22 |
|
19 |
| -class union_categorical(object): |
20 |
| - goal_time = 0.2 |
| 23 | + self.datetimes = pd.Series(pd.date_range( |
| 24 | + '1995-01-01 00:00:00', periods=10000, freq='s')) |
21 | 25 |
|
22 |
| - def setup(self): |
23 |
| - self.a = pd.Categorical((list('aabbcd') * 1000000)) |
24 |
| - self.b = pd.Categorical((list('bbcdjk') * 1000000)) |
| 26 | + def time_concat(self): |
| 27 | + concat([self.s, self.s]) |
25 | 28 |
|
26 |
| - def time_union_categorical(self): |
| 29 | + def time_union(self): |
27 | 30 | union_categoricals([self.a, self.b])
|
28 | 31 |
|
29 |
| - |
30 |
| -class categorical_value_counts(object): |
31 |
| - goal_time = 1 |
32 |
| - |
33 |
| - def setup(self): |
34 |
| - n = 500000 |
35 |
| - np.random.seed(2718281) |
36 |
| - arr = ['s%04d' % i for i in np.random.randint(0, n // 10, size=n)] |
37 |
| - self.ts = Series(arr).astype('category') |
38 |
| - |
39 |
| - def time_value_counts(self): |
40 |
| - self.ts.value_counts(dropna=False) |
41 |
| - |
42 |
| - def time_value_counts_dropna(self): |
43 |
| - self.ts.value_counts(dropna=True) |
44 |
| - |
45 |
| - |
46 |
| -class categorical_constructor(object): |
47 |
| - goal_time = 0.2 |
48 |
| - |
49 |
| - def setup(self): |
50 |
| - n = 5 |
51 |
| - N = 1e6 |
52 |
| - self.categories = list(string.ascii_letters[:n]) |
53 |
| - self.cat_idx = Index(self.categories) |
54 |
| - self.values = np.tile(self.categories, N) |
55 |
| - self.codes = np.tile(range(n), N) |
56 |
| - |
57 |
| - def time_regular_constructor(self): |
| 32 | + def time_constructor_regular(self): |
58 | 33 | Categorical(self.values, self.categories)
|
59 | 34 |
|
60 |
| - def time_fastpath(self): |
| 35 | + def time_constructor_fastpath(self): |
61 | 36 | Categorical(self.codes, self.cat_idx, fastpath=True)
|
62 | 37 |
|
63 |
| - |
64 |
| -class categorical_constructor_with_datetimes(object): |
65 |
| - goal_time = 0.2 |
66 |
| - |
67 |
| - def setup(self): |
68 |
| - self.datetimes = pd.Series(pd.date_range( |
69 |
| - '1995-01-01 00:00:00', periods=10000, freq='s')) |
70 |
| - |
71 |
| - def time_datetimes(self): |
| 38 | + def time_constructor_datetimes(self): |
72 | 39 | Categorical(self.datetimes)
|
73 | 40 |
|
74 |
| - def time_datetimes_with_nat(self): |
| 41 | + def time_constructor_datetimes_with_nat(self): |
75 | 42 | t = self.datetimes
|
76 | 43 | t.iloc[-1] = pd.NaT
|
77 | 44 | Categorical(t)
|
78 | 45 |
|
79 | 46 |
|
80 |
| -class categorical_rendering(object): |
81 |
| - goal_time = 3e-3 |
| 47 | +class Categoricals2(object): |
| 48 | + goal_time = 0.2 |
82 | 49 |
|
83 | 50 | def setup(self):
|
84 |
| - n = 1000 |
85 |
| - items = [str(i) for i in range(n)] |
86 |
| - s = pd.Series(items, dtype='category') |
87 |
| - df = pd.DataFrame({'C': s, 'data': np.random.randn(n)}) |
88 |
| - self.data = df[df.C == '20'] |
| 51 | + n = 500000 |
| 52 | + np.random.seed(2718281) |
| 53 | + arr = ['s%04d' % i for i in np.random.randint(0, n // 10, size=n)] |
| 54 | + self.ts = Series(arr).astype('category') |
| 55 | + |
| 56 | + self.sel = self.ts.loc[[0]] |
| 57 | + |
| 58 | + def time_value_counts(self): |
| 59 | + self.ts.value_counts(dropna=False) |
| 60 | + |
| 61 | + def time_value_counts_dropna(self): |
| 62 | + self.ts.value_counts(dropna=True) |
89 | 63 |
|
90 | 64 | def time_rendering(self):
|
91 |
| - str(self.data.C) |
| 65 | + str(self.sel) |
0 commit comments