|
1 |
| -from .pandas_vb_common import * |
| 1 | +import numpy as np |
| 2 | +import pandas as pd |
2 | 3 |
|
| 4 | +from .pandas_vb_common import setup # noqa |
3 | 5 |
|
4 |
| -def _set_use_bottleneck_False(): |
5 |
| - try: |
6 |
| - pd.options.compute.use_bottleneck = False |
7 |
| - except: |
8 |
| - from pandas.core import nanops |
9 |
| - nanops._USE_BOTTLENECK = False |
10 | 6 |
|
| 7 | +ops = ['mean', 'sum', 'median', 'std', 'skew', 'kurt', 'mad', 'prod', 'sem', |
| 8 | + 'var'] |
11 | 9 |
|
12 |
| -class FrameOps(object): |
13 |
| - goal_time = 0.2 |
14 |
| - |
15 |
| - param_names = ['op', 'use_bottleneck', 'dtype', 'axis'] |
16 |
| - params = [['mean', 'sum', 'median'], |
17 |
| - [True, False], |
18 |
| - ['float', 'int'], |
19 |
| - [0, 1]] |
20 |
| - |
21 |
| - def setup(self, op, use_bottleneck, dtype, axis): |
22 |
| - if dtype == 'float': |
23 |
| - self.df = DataFrame(np.random.randn(100000, 4)) |
24 |
| - elif dtype == 'int': |
25 |
| - self.df = DataFrame(np.random.randint(1000, size=(100000, 4))) |
26 |
| - |
27 |
| - if not use_bottleneck: |
28 |
| - _set_use_bottleneck_False() |
29 |
| - |
30 |
| - self.func = getattr(self.df, op) |
31 |
| - |
32 |
| - def time_op(self, op, use_bottleneck, dtype, axis): |
33 |
| - self.func(axis=axis) |
34 | 10 |
|
| 11 | +class FrameOps(object): |
35 | 12 |
|
36 |
| -class stat_ops_level_frame_sum(object): |
37 | 13 | goal_time = 0.2
|
| 14 | + params = [ops, ['float', 'int'], [0, 1], [True, False]] |
| 15 | + param_names = ['op', 'dtype', 'axis', 'use_bottleneck'] |
38 | 16 |
|
39 |
| - def setup(self): |
40 |
| - self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)]) |
41 |
| - random.shuffle(self.index.values) |
42 |
| - self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index) |
43 |
| - self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1]) |
44 |
| - |
45 |
| - def time_stat_ops_level_frame_sum(self): |
46 |
| - self.df.sum(level=1) |
47 |
| - |
48 |
| - |
49 |
| -class stat_ops_level_frame_sum_multiple(object): |
50 |
| - goal_time = 0.2 |
| 17 | + def setup(self, op, dtype, axis, use_bottleneck): |
| 18 | + df = pd.DataFrame(np.random.randn(100000, 4)).astype(dtype) |
| 19 | + try: |
| 20 | + pd.options.compute.use_bottleneck = use_bottleneck |
| 21 | + except: |
| 22 | + from pandas.core import nanops |
| 23 | + nanops._USE_BOTTLENECK = use_bottleneck |
| 24 | + self.df_func = getattr(df, op) |
51 | 25 |
|
52 |
| - def setup(self): |
53 |
| - self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)]) |
54 |
| - random.shuffle(self.index.values) |
55 |
| - self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index) |
56 |
| - self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1]) |
| 26 | + def time_op(self, op, dtype, axis, use_bottleneck): |
| 27 | + self.df_func(axis=axis) |
57 | 28 |
|
58 |
| - def time_stat_ops_level_frame_sum_multiple(self): |
59 |
| - self.df.sum(level=[0, 1]) |
60 | 29 |
|
| 30 | +class FrameMultiIndexOps(object): |
61 | 31 |
|
62 |
| -class stat_ops_level_series_sum(object): |
63 | 32 | goal_time = 0.2
|
| 33 | + params = ([0, 1, [0, 1]], ops) |
| 34 | + param_names = ['level', 'op'] |
64 | 35 |
|
65 |
| - def setup(self): |
66 |
| - self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)]) |
67 |
| - random.shuffle(self.index.values) |
68 |
| - self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index) |
69 |
| - self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1]) |
| 36 | + def setup(self, level, op): |
| 37 | + levels = [np.arange(10), np.arange(100), np.arange(100)] |
| 38 | + labels = [np.arange(10).repeat(10000), |
| 39 | + np.tile(np.arange(100).repeat(100), 10), |
| 40 | + np.tile(np.tile(np.arange(100), 100), 10)] |
| 41 | + index = pd.MultiIndex(levels=levels, labels=labels) |
| 42 | + df = pd.DataFrame(np.random.randn(len(index), 4), index=index) |
| 43 | + self.df_func = getattr(df, op) |
70 | 44 |
|
71 |
| - def time_stat_ops_level_series_sum(self): |
72 |
| - self.df[1].sum(level=1) |
| 45 | + def time_op(self, level, op): |
| 46 | + self.df_func(level=level) |
73 | 47 |
|
74 | 48 |
|
75 |
| -class stat_ops_level_series_sum_multiple(object): |
76 |
| - goal_time = 0.2 |
77 |
| - |
78 |
| - def setup(self): |
79 |
| - self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)]) |
80 |
| - random.shuffle(self.index.values) |
81 |
| - self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index) |
82 |
| - self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1]) |
83 |
| - |
84 |
| - def time_stat_ops_level_series_sum_multiple(self): |
85 |
| - self.df[1].sum(level=[0, 1]) |
| 49 | +class SeriesOps(object): |
86 | 50 |
|
87 |
| - |
88 |
| -class stat_ops_series_std(object): |
89 | 51 | goal_time = 0.2
|
| 52 | + params = [ops, ['float', 'int'], [True, False]] |
| 53 | + param_names = ['op', 'dtype', 'use_bottleneck'] |
90 | 54 |
|
91 |
| - def setup(self): |
92 |
| - self.s = Series(np.random.randn(100000), index=np.arange(100000)) |
93 |
| - self.s[::2] = np.nan |
94 |
| - |
95 |
| - def time_stat_ops_series_std(self): |
96 |
| - self.s.std() |
| 55 | + def setup(self, op, dtype, use_bottleneck): |
| 56 | + s = pd.Series(np.random.randn(100000)).astype(dtype) |
| 57 | + try: |
| 58 | + pd.options.compute.use_bottleneck = use_bottleneck |
| 59 | + except: |
| 60 | + from pandas.core import nanops |
| 61 | + nanops._USE_BOTTLENECK = use_bottleneck |
| 62 | + self.s_func = getattr(s, op) |
97 | 63 |
|
| 64 | + def time_op(self, op, dtype, use_bottleneck): |
| 65 | + self.s_func() |
98 | 66 |
|
99 |
| -class stats_corr_spearman(object): |
100 |
| - goal_time = 0.2 |
101 | 67 |
|
102 |
| - def setup(self): |
103 |
| - self.df = DataFrame(np.random.randn(1000, 30)) |
| 68 | +class SeriesMultiIndexOps(object): |
104 | 69 |
|
105 |
| - def time_stats_corr_spearman(self): |
106 |
| - self.df.corr(method='spearman') |
107 |
| - |
108 |
| - |
109 |
| -class stats_rank2d_axis0_average(object): |
110 | 70 | goal_time = 0.2
|
| 71 | + params = ([0, 1, [0, 1]], ops) |
| 72 | + param_names = ['level', 'op'] |
111 | 73 |
|
112 |
| - def setup(self): |
113 |
| - self.df = DataFrame(np.random.randn(5000, 50)) |
114 |
| - |
115 |
| - def time_stats_rank2d_axis0_average(self): |
116 |
| - self.df.rank() |
| 74 | + def setup(self, level, op): |
| 75 | + levels = [np.arange(10), np.arange(100), np.arange(100)] |
| 76 | + labels = [np.arange(10).repeat(10000), |
| 77 | + np.tile(np.arange(100).repeat(100), 10), |
| 78 | + np.tile(np.tile(np.arange(100), 100), 10)] |
| 79 | + index = pd.MultiIndex(levels=levels, labels=labels) |
| 80 | + s = pd.Series(np.random.randn(len(index)), index=index) |
| 81 | + self.s_func = getattr(s, op) |
117 | 82 |
|
| 83 | + def time_op(self, level, op): |
| 84 | + self.s_func(level=level) |
118 | 85 |
|
119 |
| -class stats_rank2d_axis1_average(object): |
120 |
| - goal_time = 0.2 |
121 |
| - |
122 |
| - def setup(self): |
123 |
| - self.df = DataFrame(np.random.randn(5000, 50)) |
124 | 86 |
|
125 |
| - def time_stats_rank2d_axis1_average(self): |
126 |
| - self.df.rank(1) |
| 87 | +class Rank(object): |
127 | 88 |
|
128 |
| - |
129 |
| -class stats_rank_average(object): |
130 |
| - goal_time = 0.2 |
131 |
| - |
132 |
| - def setup(self): |
133 |
| - self.values = np.concatenate([np.arange(100000), np.random.randn(100000), np.arange(100000)]) |
134 |
| - self.s = Series(self.values) |
135 |
| - |
136 |
| - def time_stats_rank_average(self): |
137 |
| - self.s.rank() |
138 |
| - |
139 |
| - |
140 |
| -class stats_rank_average_int(object): |
141 |
| - goal_time = 0.2 |
142 |
| - |
143 |
| - def setup(self): |
144 |
| - self.values = np.random.randint(0, 100000, size=200000) |
145 |
| - self.s = Series(self.values) |
146 |
| - |
147 |
| - def time_stats_rank_average_int(self): |
148 |
| - self.s.rank() |
149 |
| - |
150 |
| - |
151 |
| -class stats_rank_pct_average(object): |
152 | 89 | goal_time = 0.2
|
| 90 | + params = [['DataFrame', 'Series'], [True, False]] |
| 91 | + param_names = ['constructor', 'pct'] |
153 | 92 |
|
154 |
| - def setup(self): |
155 |
| - self.values = np.concatenate([np.arange(100000), np.random.randn(100000), np.arange(100000)]) |
156 |
| - self.s = Series(self.values) |
| 93 | + def setup(self, constructor, pct): |
| 94 | + values = np.random.randn(10**5) |
| 95 | + self.data = getattr(pd, constructor)(values) |
157 | 96 |
|
158 |
| - def time_stats_rank_pct_average(self): |
159 |
| - self.s.rank(pct=True) |
| 97 | + def time_rank(self, constructor, pct): |
| 98 | + self.data.rank(pct=pct) |
160 | 99 |
|
| 100 | + def time_average_old(self, constructor, pct): |
| 101 | + self.data.rank(pct=pct) / len(self.data) |
161 | 102 |
|
162 |
| -class stats_rank_pct_average_old(object): |
163 |
| - goal_time = 0.2 |
164 |
| - |
165 |
| - def setup(self): |
166 |
| - self.values = np.concatenate([np.arange(100000), np.random.randn(100000), np.arange(100000)]) |
167 |
| - self.s = Series(self.values) |
168 |
| - |
169 |
| - def time_stats_rank_pct_average_old(self): |
170 |
| - (self.s.rank() / len(self.s)) |
171 | 103 |
|
| 104 | +class Correlation(object): |
172 | 105 |
|
173 |
| -class stats_rolling_mean(object): |
174 | 106 | goal_time = 0.2
|
| 107 | + params = ['spearman', 'kendall', 'pearson'] |
| 108 | + param_names = ['method'] |
175 | 109 |
|
176 |
| - def setup(self): |
177 |
| - self.arr = np.random.randn(100000) |
178 |
| - self.win = 100 |
179 |
| - |
180 |
| - def time_rolling_mean(self): |
181 |
| - rolling_mean(self.arr, self.win) |
182 |
| - |
183 |
| - def time_rolling_median(self): |
184 |
| - rolling_median(self.arr, self.win) |
185 |
| - |
186 |
| - def time_rolling_min(self): |
187 |
| - rolling_min(self.arr, self.win) |
188 |
| - |
189 |
| - def time_rolling_max(self): |
190 |
| - rolling_max(self.arr, self.win) |
191 |
| - |
192 |
| - def time_rolling_sum(self): |
193 |
| - rolling_sum(self.arr, self.win) |
194 |
| - |
195 |
| - def time_rolling_std(self): |
196 |
| - rolling_std(self.arr, self.win) |
197 |
| - |
198 |
| - def time_rolling_var(self): |
199 |
| - rolling_var(self.arr, self.win) |
200 |
| - |
201 |
| - def time_rolling_skew(self): |
202 |
| - rolling_skew(self.arr, self.win) |
| 110 | + def setup(self, method): |
| 111 | + self.df = pd.DataFrame(np.random.randn(1000, 30)) |
203 | 112 |
|
204 |
| - def time_rolling_kurt(self): |
205 |
| - rolling_kurt(self.arr, self.win) |
| 113 | + def time_corr(self, method): |
| 114 | + self.df.corr(method=method) |
0 commit comments