Skip to content

Commit ad3eca1

Browse files
CLN/PERF: clean-up of the benchmarks (#14099)
1 parent 1dbc7be commit ad3eca1

28 files changed

+1940
-7140
lines changed

asv_bench/benchmarks/algorithms.py

+14-7
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from pandas.util import testing as tm
44

55

6-
class algorithm(object):
6+
class Algorithms(object):
77
goal_time = 0.2
88

99
def setup(self):
@@ -24,21 +24,28 @@ def setup(self):
2424
self.arrneg = np.arange(-1000000, 0)
2525
self.arrmixed = np.array([1, -1]).repeat(500000)
2626

27-
def time_int_factorize(self):
27+
# match
28+
self.uniques = tm.makeStringIndex(1000).values
29+
self.all = self.uniques.repeat(10)
30+
31+
def time_factorize_int(self):
2832
self.int.factorize()
2933

30-
def time_float_factorize(self):
34+
def time_factorize_float(self):
3135
self.int.factorize()
3236

33-
def time_int_unique_duplicated(self):
37+
def time_duplicated_int_unique(self):
3438
self.int_unique.duplicated()
3539

36-
def time_int_duplicated(self):
40+
def time_duplicated_int(self):
3741
self.int.duplicated()
3842

39-
def time_float_duplicated(self):
43+
def time_duplicated_float(self):
4044
self.float.duplicated()
4145

46+
def time_match_strings(self):
47+
pd.match(self.all, self.uniques)
48+
4249
def time_add_overflow_pos_scalar(self):
4350
self.checked_add(self.arr, 1)
4451

@@ -58,7 +65,7 @@ def time_add_overflow_mixed_arr(self):
5865
self.checked_add(self.arr, self.arrmixed)
5966

6067

61-
class hashing(object):
68+
class Hashing(object):
6269
goal_time = 0.2
6370

6471
def setup(self):

asv_bench/benchmarks/attrs_caching.py

+16-7
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,32 @@
11
from .pandas_vb_common import *
2+
from pandas.util.decorators import cache_readonly
23

34

4-
class getattr_dataframe_index(object):
5+
class DataFrameAttributes(object):
56
goal_time = 0.2
67

78
def setup(self):
89
self.df = DataFrame(np.random.randn(10, 6))
910
self.cur_index = self.df.index
1011

11-
def time_getattr_dataframe_index(self):
12+
def time_get_index(self):
1213
self.foo = self.df.index
1314

15+
def time_set_index(self):
16+
self.df.index = self.cur_index
17+
1418

15-
class setattr_dataframe_index(object):
19+
class CacheReadonly(object):
1620
goal_time = 0.2
1721

1822
def setup(self):
19-
self.df = DataFrame(np.random.randn(10, 6))
20-
self.cur_index = self.df.index
2123

22-
def time_setattr_dataframe_index(self):
23-
self.df.index = self.cur_index
24+
class Foo:
25+
26+
@cache_readonly
27+
def prop(self):
28+
return 5
29+
self.obj = Foo()
30+
31+
def time_cache_readonly(self):
32+
self.obj.prop

asv_bench/benchmarks/binary_ops.py

+44-195
Original file line numberDiff line numberDiff line change
@@ -2,193 +2,79 @@
22
import pandas.computation.expressions as expr
33

44

5-
class frame_add(object):
5+
class Ops(object):
66
goal_time = 0.2
77

8-
def setup(self):
8+
params = [[True, False], ['default', 1]]
9+
param_names = ['use_numexpr', 'threads']
10+
11+
def setup(self, use_numexpr, threads):
912
self.df = DataFrame(np.random.randn(20000, 100))
1013
self.df2 = DataFrame(np.random.randn(20000, 100))
1114

12-
def time_frame_add(self):
13-
(self.df + self.df2)
15+
if threads != 'default':
16+
expr.set_numexpr_threads(threads)
17+
if not use_numexpr:
18+
expr.set_use_numexpr(False)
1419

1520

16-
class frame_add_no_ne(object):
17-
goal_time = 0.2
18-
19-
def setup(self):
20-
self.df = DataFrame(np.random.randn(20000, 100))
21-
self.df2 = DataFrame(np.random.randn(20000, 100))
22-
expr.set_use_numexpr(False)
23-
24-
def time_frame_add_no_ne(self):
21+
def time_frame_add(self, use_numexpr, threads):
2522
(self.df + self.df2)
2623

27-
def teardown(self):
28-
expr.set_use_numexpr(True)
24+
def time_frame_mult(self, use_numexpr, threads):
25+
(self.df * self.df2)
2926

27+
def time_frame_multi_and(self, use_numexpr, threads):
28+
self.df[((self.df > 0) & (self.df2 > 0))]
3029

31-
class frame_add_st(object):
32-
goal_time = 0.2
30+
def time_frame_comparison(self, use_numexpr, threads):
31+
(self.df > self.df2)
3332

34-
def setup(self):
35-
self.df = DataFrame(np.random.randn(20000, 100))
36-
self.df2 = DataFrame(np.random.randn(20000, 100))
37-
expr.set_numexpr_threads(1)
38-
39-
def time_frame_add_st(self):
40-
(self.df + self.df2)
41-
42-
def teardown(self):
33+
def teardown(self, use_numexpr, threads):
34+
expr.set_use_numexpr(True)
4335
expr.set_numexpr_threads()
4436

4537

46-
class frame_float_div(object):
38+
class Ops2(object):
4739
goal_time = 0.2
4840

4941
def setup(self):
5042
self.df = DataFrame(np.random.randn(1000, 1000))
5143
self.df2 = DataFrame(np.random.randn(1000, 1000))
5244

53-
def time_frame_float_div(self):
54-
(self.df // self.df2)
45+
self.df_int = DataFrame(
46+
np.random.random_integers(np.iinfo(np.int16).min,
47+
np.iinfo(np.int16).max,
48+
size=(1000, 1000)))
49+
self.df2_int = DataFrame(
50+
np.random.random_integers(np.iinfo(np.int16).min,
51+
np.iinfo(np.int16).max,
52+
size=(1000, 1000)))
5553

54+
## Division
5655

57-
class frame_float_div_by_zero(object):
58-
goal_time = 0.2
59-
60-
def setup(self):
61-
self.df = DataFrame(np.random.randn(1000, 1000))
56+
def time_frame_float_div(self):
57+
(self.df // self.df2)
6258

6359
def time_frame_float_div_by_zero(self):
6460
(self.df / 0)
6561

66-
67-
class frame_float_floor_by_zero(object):
68-
goal_time = 0.2
69-
70-
def setup(self):
71-
self.df = DataFrame(np.random.randn(1000, 1000))
72-
7362
def time_frame_float_floor_by_zero(self):
7463
(self.df // 0)
7564

76-
77-
class frame_float_mod(object):
78-
goal_time = 0.2
79-
80-
def setup(self):
81-
self.df = DataFrame(np.random.randn(1000, 1000))
82-
self.df2 = DataFrame(np.random.randn(1000, 1000))
83-
84-
def time_frame_float_mod(self):
85-
(self.df / self.df2)
86-
87-
88-
class frame_int_div_by_zero(object):
89-
goal_time = 0.2
90-
91-
def setup(self):
92-
self.df = DataFrame(np.random.random_integers(np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(1000, 1000)))
93-
9465
def time_frame_int_div_by_zero(self):
95-
(self.df / 0)
96-
66+
(self.df_int / 0)
9767

98-
class frame_int_mod(object):
99-
goal_time = 0.2
100-
101-
def setup(self):
102-
self.df = DataFrame(np.random.random_integers(np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(1000, 1000)))
103-
self.df2 = DataFrame(np.random.random_integers(np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(1000, 1000)))
68+
## Modulo
10469

10570
def time_frame_int_mod(self):
10671
(self.df / self.df2)
10772

108-
109-
class frame_mult(object):
110-
goal_time = 0.2
111-
112-
def setup(self):
113-
self.df = DataFrame(np.random.randn(20000, 100))
114-
self.df2 = DataFrame(np.random.randn(20000, 100))
115-
116-
def time_frame_mult(self):
117-
(self.df * self.df2)
118-
119-
120-
class frame_mult_no_ne(object):
121-
goal_time = 0.2
122-
123-
def setup(self):
124-
self.df = DataFrame(np.random.randn(20000, 100))
125-
self.df2 = DataFrame(np.random.randn(20000, 100))
126-
expr.set_use_numexpr(False)
127-
128-
def time_frame_mult_no_ne(self):
129-
(self.df * self.df2)
130-
131-
def teardown(self):
132-
expr.set_use_numexpr(True)
133-
134-
135-
class frame_mult_st(object):
136-
goal_time = 0.2
137-
138-
def setup(self):
139-
self.df = DataFrame(np.random.randn(20000, 100))
140-
self.df2 = DataFrame(np.random.randn(20000, 100))
141-
expr.set_numexpr_threads(1)
142-
143-
def time_frame_mult_st(self):
144-
(self.df * self.df2)
145-
146-
def teardown(self):
147-
expr.set_numexpr_threads()
148-
149-
150-
class frame_multi_and(object):
151-
goal_time = 0.2
152-
153-
def setup(self):
154-
self.df = DataFrame(np.random.randn(20000, 100))
155-
self.df2 = DataFrame(np.random.randn(20000, 100))
156-
157-
def time_frame_multi_and(self):
158-
self.df[((self.df > 0) & (self.df2 > 0))]
159-
160-
161-
class frame_multi_and_no_ne(object):
162-
goal_time = 0.2
163-
164-
def setup(self):
165-
self.df = DataFrame(np.random.randn(20000, 100))
166-
self.df2 = DataFrame(np.random.randn(20000, 100))
167-
expr.set_use_numexpr(False)
168-
169-
def time_frame_multi_and_no_ne(self):
170-
self.df[((self.df > 0) & (self.df2 > 0))]
171-
172-
def teardown(self):
173-
expr.set_use_numexpr(True)
174-
175-
176-
class frame_multi_and_st(object):
177-
goal_time = 0.2
178-
179-
def setup(self):
180-
self.df = DataFrame(np.random.randn(20000, 100))
181-
self.df2 = DataFrame(np.random.randn(20000, 100))
182-
expr.set_numexpr_threads(1)
183-
184-
def time_frame_multi_and_st(self):
185-
self.df[((self.df > 0) & (self.df2 > 0))]
186-
187-
def teardown(self):
188-
expr.set_numexpr_threads()
73+
def time_frame_float_mod(self):
74+
(self.df / self.df2)
18975

19076

191-
class series_timestamp_compare(object):
77+
class Timeseries(object):
19278
goal_time = 0.2
19379

19480
def setup(self):
@@ -197,65 +83,28 @@ def setup(self):
19783
self.s = Series(date_range('20010101', periods=self.N, freq='T'))
19884
self.ts = self.s[self.halfway]
19985

86+
self.s2 = Series(date_range('20010101', periods=self.N, freq='s'))
87+
20088
def time_series_timestamp_compare(self):
20189
(self.s <= self.ts)
20290

203-
204-
class timestamp_ops_diff1(object):
205-
goal_time = 0.2
206-
N = 1000000
207-
208-
def setup(self):
209-
self.s = self.create()
210-
211-
def create(self):
212-
return Series(date_range('20010101', periods=self.N, freq='s'))
91+
def time_timestamp_series_compare(self):
92+
(self.ts >= self.s)
21393

21494
def time_timestamp_ops_diff1(self):
215-
self.s.diff()
216-
217-
class timestamp_tz_ops_diff1(timestamp_ops_diff1):
218-
N = 10000
219-
220-
def create(self):
221-
return Series(date_range('20010101', periods=self.N, freq='s', tz='US/Eastern'))
222-
223-
class timestamp_ops_diff2(object):
224-
goal_time = 0.2
225-
N = 1000000
226-
227-
def setup(self):
228-
self.s = self.create()
229-
230-
def create(self):
231-
return Series(date_range('20010101', periods=self.N, freq='s'))
95+
self.s2.diff()
23296

23397
def time_timestamp_ops_diff2(self):
23498
(self.s - self.s.shift())
23599

236-
class timestamp_tz_ops_diff2(timestamp_ops_diff2):
237-
N = 10000
238100

239-
def create(self):
240-
return Series(date_range('20010101', periods=self.N, freq='s', tz='US/Eastern'))
241101

242-
class timestamp_series_compare(object):
243-
goal_time = 0.2
244-
N = 1000000
102+
class TimeseriesTZ(Timeseries):
245103

246104
def setup(self):
105+
self.N = 1000000
247106
self.halfway = ((self.N // 2) - 1)
248-
self.s = self.create()
107+
self.s = Series(date_range('20010101', periods=self.N, freq='T', tz='US/Eastern'))
249108
self.ts = self.s[self.halfway]
250109

251-
def create(self):
252-
return Series(date_range('20010101', periods=self.N, freq='T'))
253-
254-
def time_timestamp_series_compare(self):
255-
(self.ts >= self.s)
256-
257-
class timestamp_tz_series_compare(timestamp_series_compare):
258-
N = 10000
259-
260-
def create(self):
261-
return Series(date_range('20010101', periods=self.N, freq='T', tz='US/Eastern'))
110+
self.s2 = Series(date_range('20010101', periods=self.N, freq='s', tz='US/Eastern'))

0 commit comments

Comments
 (0)