Skip to content

Commit 18d9d1c

Browse files
Clean-up inference/miscellaneous benchmarks
1 parent ab32803 commit 18d9d1c

File tree

4 files changed

+72
-166
lines changed

4 files changed

+72
-166
lines changed

asv_bench/benchmarks/algorithms.py

+8
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ def setup(self):
2323
self.arrneg = np.arange(-1000000, 0)
2424
self.arrmixed = np.array([1, -1]).repeat(500000)
2525

26+
# match
27+
self.uniques = tm.makeStringIndex(1000).values
28+
self.all = self.uniques.repeat(10)
29+
2630
def time_factorize_int(self):
2731
self.int.factorize()
2832

@@ -55,3 +59,7 @@ def time_add_overflow_neg_arr(self):
5559

5660
def time_add_overflow_mixed_arr(self):
5761
self.checked_add(self.arr, self.arrmixed)
62+
63+
def time_match_strings(self):
64+
pd.match(self.all, self.uniques)
65+

asv_bench/benchmarks/attrs_caching.py

+17
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from .pandas_vb_common import *
2+
from pandas.util.decorators import cache_readonly
23

34

45
class DataFrameAttributes(object):
@@ -13,3 +14,19 @@ def time_get_index(self):
1314

1415
def time_set_index(self):
1516
self.df.index = self.cur_index
17+
18+
19+
class CacheReadonly(object):
20+
goal_time = 0.2
21+
22+
def setup(self):
23+
24+
class Foo:
25+
26+
@cache_readonly
27+
def prop(self):
28+
return 5
29+
self.obj = Foo()
30+
31+
def time_cache_readonly(self):
32+
self.obj.prop

asv_bench/benchmarks/inference.py

+47-114
Original file line numberDiff line numberDiff line change
@@ -2,143 +2,76 @@
22
import pandas as pd
33

44

5-
class dtype_infer_datetime64(object):
5+
class DtypeInfer(object):
66
goal_time = 0.2
77

8-
def setup(self):
9-
self.N = 500000
10-
self.df_int64 = DataFrame(dict(A=np.arange(self.N, dtype='int64'), B=np.arange(self.N, dtype='int64')))
11-
self.df_int32 = DataFrame(dict(A=np.arange(self.N, dtype='int32'), B=np.arange(self.N, dtype='int32')))
12-
self.df_uint32 = DataFrame(dict(A=np.arange(self.N, dtype='uint32'), B=np.arange(self.N, dtype='uint32')))
13-
self.df_float64 = DataFrame(dict(A=np.arange(self.N, dtype='float64'), B=np.arange(self.N, dtype='float64')))
14-
self.df_float32 = DataFrame(dict(A=np.arange(self.N, dtype='float32'), B=np.arange(self.N, dtype='float32')))
15-
self.df_datetime64 = DataFrame(dict(A=pd.to_datetime(np.arange(self.N, dtype='int64'), unit='ms'), B=pd.to_datetime(np.arange(self.N, dtype='int64'), unit='ms')))
16-
self.df_timedelta64 = DataFrame(dict(A=(self.df_datetime64['A'] - self.df_datetime64['B']), B=self.df_datetime64['B']))
17-
18-
def time_dtype_infer_datetime64(self):
19-
(self.df_datetime64['A'] - self.df_datetime64['B'])
20-
21-
22-
class dtype_infer_float32(object):
23-
goal_time = 0.2
8+
# from GH 7332
249

2510
def setup(self):
2611
self.N = 500000
27-
self.df_int64 = DataFrame(dict(A=np.arange(self.N, dtype='int64'), B=np.arange(self.N, dtype='int64')))
28-
self.df_int32 = DataFrame(dict(A=np.arange(self.N, dtype='int32'), B=np.arange(self.N, dtype='int32')))
29-
self.df_uint32 = DataFrame(dict(A=np.arange(self.N, dtype='uint32'), B=np.arange(self.N, dtype='uint32')))
30-
self.df_float64 = DataFrame(dict(A=np.arange(self.N, dtype='float64'), B=np.arange(self.N, dtype='float64')))
31-
self.df_float32 = DataFrame(dict(A=np.arange(self.N, dtype='float32'), B=np.arange(self.N, dtype='float32')))
32-
self.df_datetime64 = DataFrame(dict(A=pd.to_datetime(np.arange(self.N, dtype='int64'), unit='ms'), B=pd.to_datetime(np.arange(self.N, dtype='int64'), unit='ms')))
33-
self.df_timedelta64 = DataFrame(dict(A=(self.df_datetime64['A'] - self.df_datetime64['B']), B=self.df_datetime64['B']))
34-
35-
def time_dtype_infer_float32(self):
36-
(self.df_float32['A'] + self.df_float32['B'])
12+
self.df_int64 = DataFrame(dict(A=np.arange(self.N, dtype='int64'),
13+
B=np.arange(self.N, dtype='int64')))
14+
self.df_int32 = DataFrame(dict(A=np.arange(self.N, dtype='int32'),
15+
B=np.arange(self.N, dtype='int32')))
16+
self.df_uint32 = DataFrame(dict(A=np.arange(self.N, dtype='uint32'),
17+
B=np.arange(self.N, dtype='uint32')))
18+
self.df_float64 = DataFrame(dict(A=np.arange(self.N, dtype='float64'),
19+
B=np.arange(self.N, dtype='float64')))
20+
self.df_float32 = DataFrame(dict(A=np.arange(self.N, dtype='float32'),
21+
B=np.arange(self.N, dtype='float32')))
22+
self.df_datetime64 = DataFrame(dict(A=pd.to_datetime(np.arange(self.N, dtype='int64'), unit='ms'),
23+
B=pd.to_datetime(np.arange(self.N, dtype='int64'), unit='ms')))
24+
self.df_timedelta64 = DataFrame(dict(A=(self.df_datetime64['A'] - self.df_datetime64['B']),
25+
B=self.df_datetime64['B']))
26+
27+
def time_int64(self):
28+
(self.df_int64['A'] + self.df_int64['B'])
3729

30+
def time_int32(self):
31+
(self.df_int32['A'] + self.df_int32['B'])
3832

39-
class dtype_infer_float64(object):
40-
goal_time = 0.2
33+
def time_uint32(self):
34+
(self.df_uint32['A'] + self.df_uint32['B'])
4135

42-
def setup(self):
43-
self.N = 500000
44-
self.df_int64 = DataFrame(dict(A=np.arange(self.N, dtype='int64'), B=np.arange(self.N, dtype='int64')))
45-
self.df_int32 = DataFrame(dict(A=np.arange(self.N, dtype='int32'), B=np.arange(self.N, dtype='int32')))
46-
self.df_uint32 = DataFrame(dict(A=np.arange(self.N, dtype='uint32'), B=np.arange(self.N, dtype='uint32')))
47-
self.df_float64 = DataFrame(dict(A=np.arange(self.N, dtype='float64'), B=np.arange(self.N, dtype='float64')))
48-
self.df_float32 = DataFrame(dict(A=np.arange(self.N, dtype='float32'), B=np.arange(self.N, dtype='float32')))
49-
self.df_datetime64 = DataFrame(dict(A=pd.to_datetime(np.arange(self.N, dtype='int64'), unit='ms'), B=pd.to_datetime(np.arange(self.N, dtype='int64'), unit='ms')))
50-
self.df_timedelta64 = DataFrame(dict(A=(self.df_datetime64['A'] - self.df_datetime64['B']), B=self.df_datetime64['B']))
51-
52-
def time_dtype_infer_float64(self):
36+
def time_float64(self):
5337
(self.df_float64['A'] + self.df_float64['B'])
5438

39+
def time_float32(self):
40+
(self.df_float32['A'] + self.df_float32['B'])
5541

56-
class dtype_infer_int32(object):
57-
goal_time = 0.2
58-
59-
def setup(self):
60-
self.N = 500000
61-
self.df_int64 = DataFrame(dict(A=np.arange(self.N, dtype='int64'), B=np.arange(self.N, dtype='int64')))
62-
self.df_int32 = DataFrame(dict(A=np.arange(self.N, dtype='int32'), B=np.arange(self.N, dtype='int32')))
63-
self.df_uint32 = DataFrame(dict(A=np.arange(self.N, dtype='uint32'), B=np.arange(self.N, dtype='uint32')))
64-
self.df_float64 = DataFrame(dict(A=np.arange(self.N, dtype='float64'), B=np.arange(self.N, dtype='float64')))
65-
self.df_float32 = DataFrame(dict(A=np.arange(self.N, dtype='float32'), B=np.arange(self.N, dtype='float32')))
66-
self.df_datetime64 = DataFrame(dict(A=pd.to_datetime(np.arange(self.N, dtype='int64'), unit='ms'), B=pd.to_datetime(np.arange(self.N, dtype='int64'), unit='ms')))
67-
self.df_timedelta64 = DataFrame(dict(A=(self.df_datetime64['A'] - self.df_datetime64['B']), B=self.df_datetime64['B']))
68-
69-
def time_dtype_infer_int32(self):
70-
(self.df_int32['A'] + self.df_int32['B'])
71-
42+
def time_datetime64(self):
43+
(self.df_datetime64['A'] - self.df_datetime64['B'])
7244

73-
class dtype_infer_int64(object):
74-
goal_time = 0.2
45+
def time_timedelta64_1(self):
46+
(self.df_timedelta64['A'] + self.df_timedelta64['B'])
7547

76-
def setup(self):
77-
self.N = 500000
78-
self.df_int64 = DataFrame(dict(A=np.arange(self.N, dtype='int64'), B=np.arange(self.N, dtype='int64')))
79-
self.df_int32 = DataFrame(dict(A=np.arange(self.N, dtype='int32'), B=np.arange(self.N, dtype='int32')))
80-
self.df_uint32 = DataFrame(dict(A=np.arange(self.N, dtype='uint32'), B=np.arange(self.N, dtype='uint32')))
81-
self.df_float64 = DataFrame(dict(A=np.arange(self.N, dtype='float64'), B=np.arange(self.N, dtype='float64')))
82-
self.df_float32 = DataFrame(dict(A=np.arange(self.N, dtype='float32'), B=np.arange(self.N, dtype='float32')))
83-
self.df_datetime64 = DataFrame(dict(A=pd.to_datetime(np.arange(self.N, dtype='int64'), unit='ms'), B=pd.to_datetime(np.arange(self.N, dtype='int64'), unit='ms')))
84-
self.df_timedelta64 = DataFrame(dict(A=(self.df_datetime64['A'] - self.df_datetime64['B']), B=self.df_datetime64['B']))
85-
86-
def time_dtype_infer_int64(self):
87-
(self.df_int64['A'] + self.df_int64['B'])
48+
def time_timedelta64_2(self):
49+
(self.df_timedelta64['A'] + self.df_timedelta64['A'])
8850

8951

90-
class dtype_infer_timedelta64_1(object):
52+
class to_numeric(object):
9153
goal_time = 0.2
9254

9355
def setup(self):
94-
self.N = 500000
95-
self.df_int64 = DataFrame(dict(A=np.arange(self.N, dtype='int64'), B=np.arange(self.N, dtype='int64')))
96-
self.df_int32 = DataFrame(dict(A=np.arange(self.N, dtype='int32'), B=np.arange(self.N, dtype='int32')))
97-
self.df_uint32 = DataFrame(dict(A=np.arange(self.N, dtype='uint32'), B=np.arange(self.N, dtype='uint32')))
98-
self.df_float64 = DataFrame(dict(A=np.arange(self.N, dtype='float64'), B=np.arange(self.N, dtype='float64')))
99-
self.df_float32 = DataFrame(dict(A=np.arange(self.N, dtype='float32'), B=np.arange(self.N, dtype='float32')))
100-
self.df_datetime64 = DataFrame(dict(A=pd.to_datetime(np.arange(self.N, dtype='int64'), unit='ms'), B=pd.to_datetime(np.arange(self.N, dtype='int64'), unit='ms')))
101-
self.df_timedelta64 = DataFrame(dict(A=(self.df_datetime64['A'] - self.df_datetime64['B']), B=self.df_datetime64['B']))
102-
103-
def time_dtype_infer_timedelta64_1(self):
104-
(self.df_timedelta64['A'] + self.df_timedelta64['B'])
105-
56+
self.n = 10000
57+
self.float = Series(np.random.randn(self.n * 100))
58+
self.numstr = self.float.astype('str')
59+
self.str = Series(tm.makeStringIndex(self.n))
10660

107-
class dtype_infer_timedelta64_2(object):
108-
goal_time = 0.2
109-
110-
def setup(self):
111-
self.N = 500000
112-
self.df_int64 = DataFrame(dict(A=np.arange(self.N, dtype='int64'), B=np.arange(self.N, dtype='int64')))
113-
self.df_int32 = DataFrame(dict(A=np.arange(self.N, dtype='int32'), B=np.arange(self.N, dtype='int32')))
114-
self.df_uint32 = DataFrame(dict(A=np.arange(self.N, dtype='uint32'), B=np.arange(self.N, dtype='uint32')))
115-
self.df_float64 = DataFrame(dict(A=np.arange(self.N, dtype='float64'), B=np.arange(self.N, dtype='float64')))
116-
self.df_float32 = DataFrame(dict(A=np.arange(self.N, dtype='float32'), B=np.arange(self.N, dtype='float32')))
117-
self.df_datetime64 = DataFrame(dict(A=pd.to_datetime(np.arange(self.N, dtype='int64'), unit='ms'), B=pd.to_datetime(np.arange(self.N, dtype='int64'), unit='ms')))
118-
self.df_timedelta64 = DataFrame(dict(A=(self.df_datetime64['A'] - self.df_datetime64['B']), B=self.df_datetime64['B']))
119-
120-
def time_dtype_infer_timedelta64_2(self):
121-
(self.df_timedelta64['A'] + self.df_timedelta64['A'])
61+
def time_from_float(self):
62+
pd.to_numeric(self.float)
12263

64+
def time_from_numeric_str(self):
65+
pd.to_numeric(self.numstr)
12366

124-
class dtype_infer_uint32(object):
125-
goal_time = 0.2
67+
def time_from_str_ignore(self):
68+
pd.to_numeric(self.str, errors='ignore')
12669

127-
def setup(self):
128-
self.N = 500000
129-
self.df_int64 = DataFrame(dict(A=np.arange(self.N, dtype='int64'), B=np.arange(self.N, dtype='int64')))
130-
self.df_int32 = DataFrame(dict(A=np.arange(self.N, dtype='int32'), B=np.arange(self.N, dtype='int32')))
131-
self.df_uint32 = DataFrame(dict(A=np.arange(self.N, dtype='uint32'), B=np.arange(self.N, dtype='uint32')))
132-
self.df_float64 = DataFrame(dict(A=np.arange(self.N, dtype='float64'), B=np.arange(self.N, dtype='float64')))
133-
self.df_float32 = DataFrame(dict(A=np.arange(self.N, dtype='float32'), B=np.arange(self.N, dtype='float32')))
134-
self.df_datetime64 = DataFrame(dict(A=pd.to_datetime(np.arange(self.N, dtype='int64'), unit='ms'), B=pd.to_datetime(np.arange(self.N, dtype='int64'), unit='ms')))
135-
self.df_timedelta64 = DataFrame(dict(A=(self.df_datetime64['A'] - self.df_datetime64['B']), B=self.df_datetime64['B']))
136-
137-
def time_dtype_infer_uint32(self):
138-
(self.df_uint32['A'] + self.df_uint32['B'])
70+
def time_from_str_coerce(self):
71+
pd.to_numeric(self.str, errors='coerce')
13972

14073

141-
class to_numeric(object):
74+
class to_numeric_downcast(object):
14275

14376
param_names = ['dtype', 'downcast']
14477
params = [['string-float', 'string-int', 'string-nint', 'datetime64',
@@ -162,4 +95,4 @@ def setup(self, dtype, downcast):
16295
self.data = self.data_dict[dtype]
16396

16497
def time_downcast(self, dtype, downcast):
165-
pd.to_numeric(self.data, downcast=downcast)
98+
pd.to_numeric(self.data, downcast=downcast)

asv_bench/benchmarks/miscellaneous.py

-52
This file was deleted.

0 commit comments

Comments
 (0)