Skip to content

Commit 697279b

Browse files
Clean-up categorical benchmarks
1 parent d7245f5 commit 697279b

File tree

1 file changed

+33
-59
lines changed

1 file changed

+33
-59
lines changed

asv_bench/benchmarks/categoricals.py

+33-59
Original file line numberDiff line numberDiff line change
@@ -3,89 +3,63 @@
33
from pandas.types.concat import union_categoricals
44
except ImportError:
55
pass
6-
import string
76

87

9-
class concat_categorical(object):
8+
class Categoricals(object):
109
goal_time = 0.2
1110

1211
def setup(self):
13-
self.s = pd.Series((list('aabbcd') * 1000000)).astype('category')
12+
N = 100000
13+
self.s = pd.Series((list('aabbcd') * N)).astype('category')
1414

15-
def time_concat_categorical(self):
16-
concat([self.s, self.s])
15+
self.a = pd.Categorical((list('aabbcd') * N))
16+
self.b = pd.Categorical((list('bbcdjk') * N))
1717

18+
self.categories = list('abcde')
19+
self.cat_idx = Index(self.categories)
20+
self.values = np.tile(self.categories, N)
21+
self.codes = np.tile(range(len(self.categories)), N)
1822

19-
class union_categorical(object):
20-
goal_time = 0.2
23+
self.datetimes = pd.Series(pd.date_range(
24+
'1995-01-01 00:00:00', periods=10000, freq='s'))
2125

22-
def setup(self):
23-
self.a = pd.Categorical((list('aabbcd') * 1000000))
24-
self.b = pd.Categorical((list('bbcdjk') * 1000000))
26+
def time_concat(self):
27+
concat([self.s, self.s])
2528

26-
def time_union_categorical(self):
29+
def time_union(self):
2730
union_categoricals([self.a, self.b])
2831

29-
30-
class categorical_value_counts(object):
31-
goal_time = 1
32-
33-
def setup(self):
34-
n = 500000
35-
np.random.seed(2718281)
36-
arr = ['s%04d' % i for i in np.random.randint(0, n // 10, size=n)]
37-
self.ts = Series(arr).astype('category')
38-
39-
def time_value_counts(self):
40-
self.ts.value_counts(dropna=False)
41-
42-
def time_value_counts_dropna(self):
43-
self.ts.value_counts(dropna=True)
44-
45-
46-
class categorical_constructor(object):
47-
goal_time = 0.2
48-
49-
def setup(self):
50-
n = 5
51-
N = 1e6
52-
self.categories = list(string.ascii_letters[:n])
53-
self.cat_idx = Index(self.categories)
54-
self.values = np.tile(self.categories, N)
55-
self.codes = np.tile(range(n), N)
56-
57-
def time_regular_constructor(self):
32+
def time_constructor_regular(self):
5833
Categorical(self.values, self.categories)
5934

60-
def time_fastpath(self):
35+
def time_constructor_fastpath(self):
6136
Categorical(self.codes, self.cat_idx, fastpath=True)
6237

63-
64-
class categorical_constructor_with_datetimes(object):
65-
goal_time = 0.2
66-
67-
def setup(self):
68-
self.datetimes = pd.Series(pd.date_range(
69-
'1995-01-01 00:00:00', periods=10000, freq='s'))
70-
71-
def time_datetimes(self):
38+
def time_constructor_datetimes(self):
7239
Categorical(self.datetimes)
7340

74-
def time_datetimes_with_nat(self):
41+
def time_constructor_datetimes_with_nat(self):
7542
t = self.datetimes
7643
t.iloc[-1] = pd.NaT
7744
Categorical(t)
7845

7946

80-
class categorical_rendering(object):
81-
goal_time = 3e-3
47+
class Categoricals2(object):
48+
goal_time = 0.2
8249

8350
def setup(self):
84-
n = 1000
85-
items = [str(i) for i in range(n)]
86-
s = pd.Series(items, dtype='category')
87-
df = pd.DataFrame({'C': s, 'data': np.random.randn(n)})
88-
self.data = df[df.C == '20']
51+
n = 500000
52+
np.random.seed(2718281)
53+
arr = ['s%04d' % i for i in np.random.randint(0, n // 10, size=n)]
54+
self.ts = Series(arr).astype('category')
55+
56+
self.sel = self.ts.loc[[0]]
57+
58+
def time_value_counts(self):
59+
self.ts.value_counts(dropna=False)
60+
61+
def time_value_counts_dropna(self):
62+
self.ts.value_counts(dropna=True)
8963

9064
def time_rendering(self):
91-
str(self.data.C)
65+
str(self.sel)

0 commit comments

Comments
 (0)