Skip to content

Commit d539bdd

Browse files
mroeschkejreback
authored andcommitted
CLN: ASV stat_ops (pandas-dev#19049)
1 parent 1d32264 commit d539bdd

File tree

1 file changed

+74
-165
lines changed

1 file changed

+74
-165
lines changed

asv_bench/benchmarks/stat_ops.py

+74-165
Original file line numberDiff line numberDiff line change
@@ -1,205 +1,114 @@
1-
from .pandas_vb_common import *
1+
import numpy as np
2+
import pandas as pd
23

4+
from .pandas_vb_common import setup # noqa
35

4-
def _set_use_bottleneck_False():
5-
try:
6-
pd.options.compute.use_bottleneck = False
7-
except:
8-
from pandas.core import nanops
9-
nanops._USE_BOTTLENECK = False
106

7+
ops = ['mean', 'sum', 'median', 'std', 'skew', 'kurt', 'mad', 'prod', 'sem',
8+
'var']
119

12-
class FrameOps(object):
13-
goal_time = 0.2
14-
15-
param_names = ['op', 'use_bottleneck', 'dtype', 'axis']
16-
params = [['mean', 'sum', 'median'],
17-
[True, False],
18-
['float', 'int'],
19-
[0, 1]]
20-
21-
def setup(self, op, use_bottleneck, dtype, axis):
22-
if dtype == 'float':
23-
self.df = DataFrame(np.random.randn(100000, 4))
24-
elif dtype == 'int':
25-
self.df = DataFrame(np.random.randint(1000, size=(100000, 4)))
26-
27-
if not use_bottleneck:
28-
_set_use_bottleneck_False()
29-
30-
self.func = getattr(self.df, op)
31-
32-
def time_op(self, op, use_bottleneck, dtype, axis):
33-
self.func(axis=axis)
3410

11+
class FrameOps(object):
3512

36-
class stat_ops_level_frame_sum(object):
3713
goal_time = 0.2
14+
params = [ops, ['float', 'int'], [0, 1], [True, False]]
15+
param_names = ['op', 'dtype', 'axis', 'use_bottleneck']
3816

39-
def setup(self):
40-
self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)])
41-
random.shuffle(self.index.values)
42-
self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index)
43-
self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1])
44-
45-
def time_stat_ops_level_frame_sum(self):
46-
self.df.sum(level=1)
47-
48-
49-
class stat_ops_level_frame_sum_multiple(object):
50-
goal_time = 0.2
17+
def setup(self, op, dtype, axis, use_bottleneck):
18+
df = pd.DataFrame(np.random.randn(100000, 4)).astype(dtype)
19+
try:
20+
pd.options.compute.use_bottleneck = use_bottleneck
21+
except:
22+
from pandas.core import nanops
23+
nanops._USE_BOTTLENECK = use_bottleneck
24+
self.df_func = getattr(df, op)
5125

52-
def setup(self):
53-
self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)])
54-
random.shuffle(self.index.values)
55-
self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index)
56-
self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1])
26+
def time_op(self, op, dtype, axis, use_bottleneck):
27+
self.df_func(axis=axis)
5728

58-
def time_stat_ops_level_frame_sum_multiple(self):
59-
self.df.sum(level=[0, 1])
6029

30+
class FrameMultiIndexOps(object):
6131

62-
class stat_ops_level_series_sum(object):
6332
goal_time = 0.2
33+
params = ([0, 1, [0, 1]], ops)
34+
param_names = ['level', 'op']
6435

65-
def setup(self):
66-
self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)])
67-
random.shuffle(self.index.values)
68-
self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index)
69-
self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1])
36+
def setup(self, level, op):
37+
levels = [np.arange(10), np.arange(100), np.arange(100)]
38+
labels = [np.arange(10).repeat(10000),
39+
np.tile(np.arange(100).repeat(100), 10),
40+
np.tile(np.tile(np.arange(100), 100), 10)]
41+
index = pd.MultiIndex(levels=levels, labels=labels)
42+
df = pd.DataFrame(np.random.randn(len(index), 4), index=index)
43+
self.df_func = getattr(df, op)
7044

71-
def time_stat_ops_level_series_sum(self):
72-
self.df[1].sum(level=1)
45+
def time_op(self, level, op):
46+
self.df_func(level=level)
7347

7448

75-
class stat_ops_level_series_sum_multiple(object):
76-
goal_time = 0.2
77-
78-
def setup(self):
79-
self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)])
80-
random.shuffle(self.index.values)
81-
self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index)
82-
self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1])
83-
84-
def time_stat_ops_level_series_sum_multiple(self):
85-
self.df[1].sum(level=[0, 1])
49+
class SeriesOps(object):
8650

87-
88-
class stat_ops_series_std(object):
8951
goal_time = 0.2
52+
params = [ops, ['float', 'int'], [True, False]]
53+
param_names = ['op', 'dtype', 'use_bottleneck']
9054

91-
def setup(self):
92-
self.s = Series(np.random.randn(100000), index=np.arange(100000))
93-
self.s[::2] = np.nan
94-
95-
def time_stat_ops_series_std(self):
96-
self.s.std()
55+
def setup(self, op, dtype, use_bottleneck):
56+
s = pd.Series(np.random.randn(100000)).astype(dtype)
57+
try:
58+
pd.options.compute.use_bottleneck = use_bottleneck
59+
except:
60+
from pandas.core import nanops
61+
nanops._USE_BOTTLENECK = use_bottleneck
62+
self.s_func = getattr(s, op)
9763

64+
def time_op(self, op, dtype, use_bottleneck):
65+
self.s_func()
9866

99-
class stats_corr_spearman(object):
100-
goal_time = 0.2
10167

102-
def setup(self):
103-
self.df = DataFrame(np.random.randn(1000, 30))
68+
class SeriesMultiIndexOps(object):
10469

105-
def time_stats_corr_spearman(self):
106-
self.df.corr(method='spearman')
107-
108-
109-
class stats_rank2d_axis0_average(object):
11070
goal_time = 0.2
71+
params = ([0, 1, [0, 1]], ops)
72+
param_names = ['level', 'op']
11173

112-
def setup(self):
113-
self.df = DataFrame(np.random.randn(5000, 50))
114-
115-
def time_stats_rank2d_axis0_average(self):
116-
self.df.rank()
74+
def setup(self, level, op):
75+
levels = [np.arange(10), np.arange(100), np.arange(100)]
76+
labels = [np.arange(10).repeat(10000),
77+
np.tile(np.arange(100).repeat(100), 10),
78+
np.tile(np.tile(np.arange(100), 100), 10)]
79+
index = pd.MultiIndex(levels=levels, labels=labels)
80+
s = pd.Series(np.random.randn(len(index)), index=index)
81+
self.s_func = getattr(s, op)
11782

83+
def time_op(self, level, op):
84+
self.s_func(level=level)
11885

119-
class stats_rank2d_axis1_average(object):
120-
goal_time = 0.2
121-
122-
def setup(self):
123-
self.df = DataFrame(np.random.randn(5000, 50))
12486

125-
def time_stats_rank2d_axis1_average(self):
126-
self.df.rank(1)
87+
class Rank(object):
12788

128-
129-
class stats_rank_average(object):
130-
goal_time = 0.2
131-
132-
def setup(self):
133-
self.values = np.concatenate([np.arange(100000), np.random.randn(100000), np.arange(100000)])
134-
self.s = Series(self.values)
135-
136-
def time_stats_rank_average(self):
137-
self.s.rank()
138-
139-
140-
class stats_rank_average_int(object):
141-
goal_time = 0.2
142-
143-
def setup(self):
144-
self.values = np.random.randint(0, 100000, size=200000)
145-
self.s = Series(self.values)
146-
147-
def time_stats_rank_average_int(self):
148-
self.s.rank()
149-
150-
151-
class stats_rank_pct_average(object):
15289
goal_time = 0.2
90+
params = [['DataFrame', 'Series'], [True, False]]
91+
param_names = ['constructor', 'pct']
15392

154-
def setup(self):
155-
self.values = np.concatenate([np.arange(100000), np.random.randn(100000), np.arange(100000)])
156-
self.s = Series(self.values)
93+
def setup(self, constructor, pct):
94+
values = np.random.randn(10**5)
95+
self.data = getattr(pd, constructor)(values)
15796

158-
def time_stats_rank_pct_average(self):
159-
self.s.rank(pct=True)
97+
def time_rank(self, constructor, pct):
98+
self.data.rank(pct=pct)
16099

100+
def time_average_old(self, constructor, pct):
101+
self.data.rank(pct=pct) / len(self.data)
161102

162-
class stats_rank_pct_average_old(object):
163-
goal_time = 0.2
164-
165-
def setup(self):
166-
self.values = np.concatenate([np.arange(100000), np.random.randn(100000), np.arange(100000)])
167-
self.s = Series(self.values)
168-
169-
def time_stats_rank_pct_average_old(self):
170-
(self.s.rank() / len(self.s))
171103

104+
class Correlation(object):
172105

173-
class stats_rolling_mean(object):
174106
goal_time = 0.2
107+
params = ['spearman', 'kendall', 'pearson']
108+
param_names = ['method']
175109

176-
def setup(self):
177-
self.arr = np.random.randn(100000)
178-
self.win = 100
179-
180-
def time_rolling_mean(self):
181-
rolling_mean(self.arr, self.win)
182-
183-
def time_rolling_median(self):
184-
rolling_median(self.arr, self.win)
185-
186-
def time_rolling_min(self):
187-
rolling_min(self.arr, self.win)
188-
189-
def time_rolling_max(self):
190-
rolling_max(self.arr, self.win)
191-
192-
def time_rolling_sum(self):
193-
rolling_sum(self.arr, self.win)
194-
195-
def time_rolling_std(self):
196-
rolling_std(self.arr, self.win)
197-
198-
def time_rolling_var(self):
199-
rolling_var(self.arr, self.win)
200-
201-
def time_rolling_skew(self):
202-
rolling_skew(self.arr, self.win)
110+
def setup(self, method):
111+
self.df = pd.DataFrame(np.random.randn(1000, 30))
203112

204-
def time_rolling_kurt(self):
205-
rolling_kurt(self.arr, self.win)
113+
def time_corr(self, method):
114+
self.df.corr(method=method)

0 commit comments

Comments
 (0)