Skip to content

Commit c9ec227

Browse files
committed
CLN: ASV stat_ops
1 parent cfa5ea6 commit c9ec227

File tree

2 files changed

+86
-161
lines changed

2 files changed

+86
-161
lines changed

asv_bench/benchmarks/rolling.py

+15
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,18 @@ def setup(self, contructor, window, dtype, percentile):
3939

4040
def time_quantile(self, contructor, window, dtype, percentile):
4141
self.roll.quantile(percentile)
42+
43+
44+
class DepreciatedRolling(object):
45+
46+
sample_time = 0.2
47+
params = ['rolling_median', 'rolling_mean', 'rolling_min', 'rolling_max',
48+
'rolling_var', 'rolling_skew', 'rolling_kurt', 'rolling_std']
49+
param_names = ['method']
50+
51+
def setup(self, method):
52+
self.arr = np.random.randn(100000)
53+
self.win = 100
54+
55+
def time_method(self, method):
56+
getattr(pd, method)(self.arr, self.win)

asv_bench/benchmarks/stat_ops.py

+71-161
Original file line numberDiff line numberDiff line change
@@ -1,205 +1,115 @@
1-
from .pandas_vb_common import *
1+
import numpy as np
2+
import pandas as pd
23

3-
4-
def _set_use_bottleneck_False():
5-
try:
6-
pd.options.compute.use_bottleneck = False
7-
except:
8-
from pandas.core import nanops
9-
nanops._USE_BOTTLENECK = False
4+
from .pandas_vb_common import setup # noqa
105

116

127
class FrameOps(object):
13-
goal_time = 0.2
148

9+
goal_time = 0.2
1510
param_names = ['op', 'use_bottleneck', 'dtype', 'axis']
16-
params = [['mean', 'sum', 'median'],
11+
params = [['mean', 'sum', 'median', 'std'],
1712
[True, False],
1813
['float', 'int'],
1914
[0, 1]]
2015

2116
def setup(self, op, use_bottleneck, dtype, axis):
22-
if dtype == 'float':
23-
self.df = DataFrame(np.random.randn(100000, 4))
24-
elif dtype == 'int':
25-
self.df = DataFrame(np.random.randint(1000, size=(100000, 4)))
26-
27-
if not use_bottleneck:
28-
_set_use_bottleneck_False()
29-
30-
self.func = getattr(self.df, op)
17+
df = pd.DataFrame(np.random.randn(100000, 4)).astype(dtype)
18+
try:
19+
pd.options.compute.use_bottleneck = use_bottleneck
20+
except:
21+
from pandas.core import nanops
22+
nanops._USE_BOTTLENECK = use_bottleneck
23+
self.df_func = getattr(df, op)
3124

3225
def time_op(self, op, use_bottleneck, dtype, axis):
33-
self.func(axis=axis)
34-
35-
36-
class stat_ops_level_frame_sum(object):
37-
goal_time = 0.2
38-
39-
def setup(self):
40-
self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)])
41-
random.shuffle(self.index.values)
42-
self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index)
43-
self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1])
44-
45-
def time_stat_ops_level_frame_sum(self):
46-
self.df.sum(level=1)
47-
48-
49-
class stat_ops_level_frame_sum_multiple(object):
50-
goal_time = 0.2
51-
52-
def setup(self):
53-
self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)])
54-
random.shuffle(self.index.values)
55-
self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index)
56-
self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1])
57-
58-
def time_stat_ops_level_frame_sum_multiple(self):
59-
self.df.sum(level=[0, 1])
60-
61-
62-
class stat_ops_level_series_sum(object):
63-
goal_time = 0.2
64-
65-
def setup(self):
66-
self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)])
67-
random.shuffle(self.index.values)
68-
self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index)
69-
self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1])
70-
71-
def time_stat_ops_level_series_sum(self):
72-
self.df[1].sum(level=1)
73-
74-
75-
class stat_ops_level_series_sum_multiple(object):
76-
goal_time = 0.2
77-
78-
def setup(self):
79-
self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)])
80-
random.shuffle(self.index.values)
81-
self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index)
82-
self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1])
26+
self.df_func(axis=axis)
8327

84-
def time_stat_ops_level_series_sum_multiple(self):
85-
self.df[1].sum(level=[0, 1])
8628

29+
class FrameMultiIndexOps(object):
8730

88-
class stat_ops_series_std(object):
8931
goal_time = 0.2
32+
params = ([0, 1, [0, 1]], ['mean', 'sum', 'median'])
33+
param_names = ['level', 'op']
9034

91-
def setup(self):
92-
self.s = Series(np.random.randn(100000), index=np.arange(100000))
93-
self.s[::2] = np.nan
35+
def setup(self, level, op):
36+
levels = [np.arange(10), np.arange(100), np.arange(100)]
37+
labels = [np.arange(10).repeat(10000),
38+
np.tile(np.arange(100).repeat(100), 10),
39+
np.tile(np.tile(np.arange(100), 100), 10)]
40+
index = pd.MultiIndex(levels=levels, labels=labels)
41+
df = pd.DataFrame(np.random.randn(len(index), 4), index=index)
42+
self.df_func = getattr(df, op)
9443

95-
def time_stat_ops_series_std(self):
96-
self.s.std()
44+
def time_op(self, level, op):
45+
self.df_func(level=level)
9746

9847

99-
class stats_corr_spearman(object):
100-
goal_time = 0.2
101-
102-
def setup(self):
103-
self.df = DataFrame(np.random.randn(1000, 30))
104-
105-
def time_stats_corr_spearman(self):
106-
self.df.corr(method='spearman')
48+
class SeriesOps(object):
10749

108-
109-
class stats_rank2d_axis0_average(object):
11050
goal_time = 0.2
51+
param_names = ['op', 'use_bottleneck', 'dtype']
52+
params = [['mean', 'sum', 'median', 'std'],
53+
[True, False],
54+
['float', 'int']]
11155

112-
def setup(self):
113-
self.df = DataFrame(np.random.randn(5000, 50))
114-
115-
def time_stats_rank2d_axis0_average(self):
116-
self.df.rank()
117-
118-
119-
class stats_rank2d_axis1_average(object):
120-
goal_time = 0.2
56+
def setup(self, op, use_bottleneck, dtype):
57+
s = pd.Series(np.random.randn(100000)).astype(dtype)
58+
try:
59+
pd.options.compute.use_bottleneck = use_bottleneck
60+
except:
61+
from pandas.core import nanops
62+
nanops._USE_BOTTLENECK = use_bottleneck
63+
self.s_func = getattr(s, op)
12164

122-
def setup(self):
123-
self.df = DataFrame(np.random.randn(5000, 50))
65+
def time_op(self, op, use_bottleneck, dtype):
66+
self.s_func()
12467

125-
def time_stats_rank2d_axis1_average(self):
126-
self.df.rank(1)
12768

69+
class SeriesMultiIndexOps(object):
12870

129-
class stats_rank_average(object):
13071
goal_time = 0.2
72+
params = ([0, 1, [0, 1]], ['mean', 'sum', 'median'])
73+
param_names = ['level', 'op']
13174

132-
def setup(self):
133-
self.values = np.concatenate([np.arange(100000), np.random.randn(100000), np.arange(100000)])
134-
self.s = Series(self.values)
135-
136-
def time_stats_rank_average(self):
137-
self.s.rank()
138-
139-
140-
class stats_rank_average_int(object):
141-
goal_time = 0.2
75+
def setup(self, level, op):
76+
levels = [np.arange(10), np.arange(100), np.arange(100)]
77+
labels = [np.arange(10).repeat(10000),
78+
np.tile(np.arange(100).repeat(100), 10),
79+
np.tile(np.tile(np.arange(100), 100), 10)]
80+
index = pd.MultiIndex(levels=levels, labels=labels)
81+
s = pd.Series(np.random.randn(len(index)), index=index)
82+
self.s_func = getattr(s, op)
14283

143-
def setup(self):
144-
self.values = np.random.randint(0, 100000, size=200000)
145-
self.s = Series(self.values)
84+
def time_op(self, level, op):
85+
self.s_func(level=level)
14686

147-
def time_stats_rank_average_int(self):
148-
self.s.rank()
14987

88+
class Rank(object):
15089

151-
class stats_rank_pct_average(object):
15290
goal_time = 0.2
91+
params = [['DataFrame', 'Series'], [True, False]]
92+
param_names = ['constructor', 'pct']
15393

154-
def setup(self):
155-
self.values = np.concatenate([np.arange(100000), np.random.randn(100000), np.arange(100000)])
156-
self.s = Series(self.values)
157-
158-
def time_stats_rank_pct_average(self):
159-
self.s.rank(pct=True)
160-
94+
def setup(self, constructor, pct):
95+
values = np.random.randn(10**5)
96+
self.data = getattr(pd, constructor)(values)
16197

162-
class stats_rank_pct_average_old(object):
163-
goal_time = 0.2
98+
def time_rank(self, constructor, pct):
99+
self.data.rank(pct=pct)
164100

165-
def setup(self):
166-
self.values = np.concatenate([np.arange(100000), np.random.randn(100000), np.arange(100000)])
167-
self.s = Series(self.values)
101+
def time_average_old(self, constructor, pct):
102+
self.data.rank(pct=pct) / len(self.data)
168103

169-
def time_stats_rank_pct_average_old(self):
170-
(self.s.rank() / len(self.s))
171104

105+
class Correlation(object):
172106

173-
class stats_rolling_mean(object):
174107
goal_time = 0.2
108+
params = ['spearman', 'kendall', 'pearson']
109+
param_names = ['method']
175110

176-
def setup(self):
177-
self.arr = np.random.randn(100000)
178-
self.win = 100
179-
180-
def time_rolling_mean(self):
181-
rolling_mean(self.arr, self.win)
182-
183-
def time_rolling_median(self):
184-
rolling_median(self.arr, self.win)
185-
186-
def time_rolling_min(self):
187-
rolling_min(self.arr, self.win)
188-
189-
def time_rolling_max(self):
190-
rolling_max(self.arr, self.win)
191-
192-
def time_rolling_sum(self):
193-
rolling_sum(self.arr, self.win)
194-
195-
def time_rolling_std(self):
196-
rolling_std(self.arr, self.win)
197-
198-
def time_rolling_var(self):
199-
rolling_var(self.arr, self.win)
200-
201-
def time_rolling_skew(self):
202-
rolling_skew(self.arr, self.win)
111+
def setup(self, method):
112+
self.df = pd.DataFrame(np.random.randn(1000, 30))
203113

204-
def time_rolling_kurt(self):
205-
rolling_kurt(self.arr, self.win)
114+
def time_corr(self, method):
115+
self.df.corr(method=method)

0 commit comments

Comments
 (0)