Skip to content

Commit d7245f5

Browse files
Clean-up algos/attrs/binary_ops benchmarks
1 parent 36a6370 commit d7245f5

File tree

4 files changed

+56
-217
lines changed

4 files changed

+56
-217
lines changed

asv_bench/benchmarks/algorithms.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import pandas as pd
33

44

5-
class algorithm(object):
5+
class Algorithms(object):
66
goal_time = 0.2
77

88
def setup(self):
@@ -15,17 +15,17 @@ def setup(self):
1515
self.int = pd.Int64Index(np.arange(N).repeat(5))
1616
self.float = pd.Float64Index(np.random.randn(N).repeat(5))
1717

18-
def time_int_factorize(self):
18+
def time_factorize_int(self):
1919
self.int.factorize()
2020

21-
def time_float_factorize(self):
21+
def time_factorize_float(self):
2222
self.int.factorize()
2323

24-
def time_int_unique_duplicated(self):
24+
def time_duplicated_int_unique(self):
2525
self.int_unique.duplicated()
2626

27-
def time_int_duplicated(self):
27+
def time_duplicated_int(self):
2828
self.int.duplicated()
2929

30-
def time_float_duplicated(self):
30+
def time_duplicated_float(self):
3131
self.float.duplicated()

asv_bench/benchmarks/attrs_caching.py

+3-11
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,15 @@
11
from .pandas_vb_common import *
22

33

4-
class getattr_dataframe_index(object):
4+
class DataFrameAttributes(object):
55
goal_time = 0.2
66

77
def setup(self):
88
self.df = DataFrame(np.random.randn(10, 6))
99
self.cur_index = self.df.index
1010

11-
def time_getattr_dataframe_index(self):
11+
def time_get_index(self):
1212
self.foo = self.df.index
1313

14-
15-
class setattr_dataframe_index(object):
16-
goal_time = 0.2
17-
18-
def setup(self):
19-
self.df = DataFrame(np.random.randn(10, 6))
20-
self.cur_index = self.df.index
21-
22-
def time_setattr_dataframe_index(self):
14+
def time_set_index(self):
2315
self.df.index = self.cur_index

asv_bench/benchmarks/binary_ops.py

+42-196
Original file line numberDiff line numberDiff line change
@@ -2,193 +2,76 @@
22
import pandas.computation.expressions as expr
33

44

5-
class frame_add(object):
5+
class Ops(object):
66
goal_time = 0.2
77

8-
def setup(self):
9-
self.df = DataFrame(np.random.randn(20000, 100))
10-
self.df2 = DataFrame(np.random.randn(20000, 100))
11-
12-
def time_frame_add(self):
13-
(self.df + self.df2)
8+
params = [[True, False], ['default', 1]]
9+
param_names = ['use_numexpr', 'threads']
1410

15-
16-
class frame_add_no_ne(object):
17-
goal_time = 0.2
18-
19-
def setup(self):
11+
def setup(self, use_numexpr, threads):
2012
self.df = DataFrame(np.random.randn(20000, 100))
2113
self.df2 = DataFrame(np.random.randn(20000, 100))
22-
expr.set_use_numexpr(False)
2314

24-
def time_frame_add_no_ne(self):
25-
(self.df + self.df2)
26-
27-
def teardown(self):
28-
expr.set_use_numexpr(True)
15+
if threads != 'default':
16+
expr.set_numexpr_threads(threads)
17+
if not use_numexpr:
18+
expr.set_use_numexpr(False)
2919

3020

31-
class frame_add_st(object):
32-
goal_time = 0.2
21+
def time_frame_add(self, use_numexpr, threads):
22+
(self.df + self.df2)
3323

34-
def setup(self):
35-
self.df = DataFrame(np.random.randn(20000, 100))
36-
self.df2 = DataFrame(np.random.randn(20000, 100))
37-
expr.set_numexpr_threads(1)
24+
def time_frame_mult(self, use_numexpr, threads):
25+
(self.df * self.df2)
3826

39-
def time_frame_add_st(self):
40-
(self.df + self.df2)
27+
def time_frame_multi_and(self, use_numexpr, threads):
28+
self.df[((self.df > 0) & (self.df2 > 0))]
4129

42-
def teardown(self):
30+
def teardown(self, use_numexpr, threads):
31+
expr.set_use_numexpr(True)
4332
expr.set_numexpr_threads()
4433

4534

46-
class frame_float_div(object):
35+
class Ops2(object):
4736
goal_time = 0.2
4837

4938
def setup(self):
5039
self.df = DataFrame(np.random.randn(1000, 1000))
5140
self.df2 = DataFrame(np.random.randn(1000, 1000))
5241

53-
def time_frame_float_div(self):
54-
(self.df // self.df2)
42+
self.df_int = DataFrame(
43+
np.random.random_integers(np.iinfo(np.int16).min,
44+
np.iinfo(np.int16).max,
45+
size=(1000, 1000)))
46+
self.df2_int = DataFrame(
47+
np.random.random_integers(np.iinfo(np.int16).min,
48+
np.iinfo(np.int16).max,
49+
size=(1000, 1000)))
5550

51+
## Division
5652

57-
class frame_float_div_by_zero(object):
58-
goal_time = 0.2
59-
60-
def setup(self):
61-
self.df = DataFrame(np.random.randn(1000, 1000))
53+
def time_frame_float_div(self):
54+
(self.df // self.df2)
6255

6356
def time_frame_float_div_by_zero(self):
6457
(self.df / 0)
6558

66-
67-
class frame_float_floor_by_zero(object):
68-
goal_time = 0.2
69-
70-
def setup(self):
71-
self.df = DataFrame(np.random.randn(1000, 1000))
72-
7359
def time_frame_float_floor_by_zero(self):
7460
(self.df // 0)
7561

76-
77-
class frame_float_mod(object):
78-
goal_time = 0.2
79-
80-
def setup(self):
81-
self.df = DataFrame(np.random.randn(1000, 1000))
82-
self.df2 = DataFrame(np.random.randn(1000, 1000))
83-
84-
def time_frame_float_mod(self):
85-
(self.df / self.df2)
86-
87-
88-
class frame_int_div_by_zero(object):
89-
goal_time = 0.2
90-
91-
def setup(self):
92-
self.df = DataFrame(np.random.random_integers(np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(1000, 1000)))
93-
9462
def time_frame_int_div_by_zero(self):
95-
(self.df / 0)
63+
(self.df_int / 0)
9664

97-
98-
class frame_int_mod(object):
99-
goal_time = 0.2
100-
101-
def setup(self):
102-
self.df = DataFrame(np.random.random_integers(np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(1000, 1000)))
103-
self.df2 = DataFrame(np.random.random_integers(np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(1000, 1000)))
65+
## Modulo
10466

10567
def time_frame_int_mod(self):
10668
(self.df / self.df2)
10769

108-
109-
class frame_mult(object):
110-
goal_time = 0.2
111-
112-
def setup(self):
113-
self.df = DataFrame(np.random.randn(20000, 100))
114-
self.df2 = DataFrame(np.random.randn(20000, 100))
115-
116-
def time_frame_mult(self):
117-
(self.df * self.df2)
118-
119-
120-
class frame_mult_no_ne(object):
121-
goal_time = 0.2
122-
123-
def setup(self):
124-
self.df = DataFrame(np.random.randn(20000, 100))
125-
self.df2 = DataFrame(np.random.randn(20000, 100))
126-
expr.set_use_numexpr(False)
127-
128-
def time_frame_mult_no_ne(self):
129-
(self.df * self.df2)
130-
131-
def teardown(self):
132-
expr.set_use_numexpr(True)
133-
134-
135-
class frame_mult_st(object):
136-
goal_time = 0.2
137-
138-
def setup(self):
139-
self.df = DataFrame(np.random.randn(20000, 100))
140-
self.df2 = DataFrame(np.random.randn(20000, 100))
141-
expr.set_numexpr_threads(1)
142-
143-
def time_frame_mult_st(self):
144-
(self.df * self.df2)
145-
146-
def teardown(self):
147-
expr.set_numexpr_threads()
148-
149-
150-
class frame_multi_and(object):
151-
goal_time = 0.2
152-
153-
def setup(self):
154-
self.df = DataFrame(np.random.randn(20000, 100))
155-
self.df2 = DataFrame(np.random.randn(20000, 100))
156-
157-
def time_frame_multi_and(self):
158-
self.df[((self.df > 0) & (self.df2 > 0))]
159-
160-
161-
class frame_multi_and_no_ne(object):
162-
goal_time = 0.2
163-
164-
def setup(self):
165-
self.df = DataFrame(np.random.randn(20000, 100))
166-
self.df2 = DataFrame(np.random.randn(20000, 100))
167-
expr.set_use_numexpr(False)
168-
169-
def time_frame_multi_and_no_ne(self):
170-
self.df[((self.df > 0) & (self.df2 > 0))]
171-
172-
def teardown(self):
173-
expr.set_use_numexpr(True)
174-
175-
176-
class frame_multi_and_st(object):
177-
goal_time = 0.2
178-
179-
def setup(self):
180-
self.df = DataFrame(np.random.randn(20000, 100))
181-
self.df2 = DataFrame(np.random.randn(20000, 100))
182-
expr.set_numexpr_threads(1)
183-
184-
def time_frame_multi_and_st(self):
185-
self.df[((self.df > 0) & (self.df2 > 0))]
186-
187-
def teardown(self):
188-
expr.set_numexpr_threads()
70+
def time_frame_float_mod(self):
71+
(self.df / self.df2)
18972

19073

191-
class series_timestamp_compare(object):
74+
class Timeseries(object):
19275
goal_time = 0.2
19376

19477
def setup(self):
@@ -197,65 +80,28 @@ def setup(self):
19780
self.s = Series(date_range('20010101', periods=self.N, freq='T'))
19881
self.ts = self.s[self.halfway]
19982

83+
self.s2 = Series(date_range('20010101', periods=self.N, freq='s'))
84+
20085
def time_series_timestamp_compare(self):
20186
(self.s <= self.ts)
20287

203-
204-
class timestamp_ops_diff1(object):
205-
goal_time = 0.2
206-
N = 1000000
207-
208-
def setup(self):
209-
self.s = self.create()
210-
211-
def create(self):
212-
return Series(date_range('20010101', periods=self.N, freq='s'))
88+
def time_timestamp_series_compare(self):
89+
(self.ts >= self.s)
21390

21491
def time_timestamp_ops_diff1(self):
215-
self.s.diff()
216-
217-
class timestamp_tz_ops_diff1(timestamp_ops_diff1):
218-
N = 10000
219-
220-
def create(self):
221-
return Series(date_range('20010101', periods=self.N, freq='s', tz='US/Eastern'))
222-
223-
class timestamp_ops_diff2(object):
224-
goal_time = 0.2
225-
N = 1000000
226-
227-
def setup(self):
228-
self.s = self.create()
229-
230-
def create(self):
231-
return Series(date_range('20010101', periods=self.N, freq='s'))
92+
self.s2.diff()
23293

23394
def time_timestamp_ops_diff2(self):
23495
(self.s - self.s.shift())
23596

236-
class timestamp_tz_ops_diff2(timestamp_ops_diff2):
237-
N = 10000
23897

239-
def create(self):
240-
return Series(date_range('20010101', periods=self.N, freq='s', tz='US/Eastern'))
24198

242-
class timestamp_series_compare(object):
243-
goal_time = 0.2
244-
N = 1000000
99+
class TimeseriesTZ(Timeseries):
245100

246101
def setup(self):
102+
self.N = 1000000
247103
self.halfway = ((self.N // 2) - 1)
248-
self.s = self.create()
104+
self.s = Series(date_range('20010101', periods=self.N, freq='T', tz='US/Eastern'))
249105
self.ts = self.s[self.halfway]
250106

251-
def create(self):
252-
return Series(date_range('20010101', periods=self.N, freq='T'))
253-
254-
def time_timestamp_series_compare(self):
255-
(self.ts >= self.s)
256-
257-
class timestamp_tz_series_compare(timestamp_series_compare):
258-
N = 10000
259-
260-
def create(self):
261-
return Series(date_range('20010101', periods=self.N, freq='T', tz='US/Eastern'))
107+
self.s2 = Series(date_range('20010101', periods=self.N, freq='s', tz='US/Eastern'))

asv_bench/benchmarks/inference.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -146,14 +146,15 @@ class to_numeric(object):
146146
[None, 'integer', 'signed', 'unsigned', 'float']]
147147

148148
N = 500000
149+
N2 = int(N / 2)
149150

150151
data_dict = {
151-
'string-int': (['1'] * (N / 2)) + ([2] * (N / 2)),
152-
'string-nint': (['-1'] * (N / 2)) + ([2] * (N / 2)),
152+
'string-int': (['1'] * N2) + ([2] * N2),
153+
'string-nint': (['-1'] * N2) + ([2] * N2),
153154
'datetime64': np.repeat(np.array(['1970-01-01', '1970-01-02'],
154155
dtype='datetime64[D]'), N),
155-
'string-float': (['1.1'] * (N / 2)) + ([2] * (N / 2)),
156-
'int-list': ([1] * (N / 2)) + ([2] * (N / 2)),
156+
'string-float': (['1.1'] * N2) + ([2] * N2),
157+
'int-list': ([1] * N2) + ([2] * N2),
157158
'int32': np.repeat(np.int32(1), N)
158159
}
159160

0 commit comments

Comments
 (0)