@@ -9,22 +9,24 @@ class Methods:
9
9
10
10
params = (
11
11
["DataFrame" , "Series" ],
12
- [10 , 1000 ],
12
+ [( "rolling" , { "window" : 10 }), ( "rolling" , { "window" : 1000 }), ( "expanding" , {}) ],
13
13
["int" , "float" ],
14
- ["median" , "mean" , "max" , "min" , "std" , "count" , "skew" , "kurt" , "sum" ],
14
+ ["median" , "mean" , "max" , "min" , "std" , "count" , "skew" , "kurt" , "sum" , "sem" ],
15
15
)
16
- param_names = ["constructor" , "window " , "dtype" , "method" ]
16
+ param_names = ["constructor" , "window_kwargs " , "dtype" , "method" ]
17
17
18
- def setup (self , constructor , window , dtype , method ):
18
+ def setup (self , constructor , window_kwargs , dtype , method ):
19
19
N = 10 ** 5
20
+ window , kwargs = window_kwargs
20
21
arr = (100 * np .random .random (N )).astype (dtype )
21
- self .roll = getattr (pd , constructor )(arr ).rolling (window )
22
+ obj = getattr (pd , constructor )(arr )
23
+ self .window = getattr (obj , window )(** kwargs )
22
24
23
- def time_rolling (self , constructor , window , dtype , method ):
24
- getattr (self .roll , method )()
25
+ def time_method (self , constructor , window_kwargs , dtype , method ):
26
+ getattr (self .window , method )()
25
27
26
- def peakmem_rolling (self , constructor , window , dtype , method ):
27
- getattr (self .roll , method )()
28
+ def peakmem_method (self , constructor , window_kwargs , dtype , method ):
29
+ getattr (self .window , method )()
28
30
29
31
30
32
class Apply :
@@ -46,148 +48,160 @@ def time_rolling(self, constructor, window, dtype, function, raw):
46
48
self .roll .apply (function , raw = raw )
47
49
48
50
49
- class NumbaEngine :
51
+ class NumbaEngineMethods :
50
52
params = (
51
53
["DataFrame" , "Series" ],
52
54
["int" , "float" ],
53
- [np . sum , lambda x : np . sum ( x ) + 5 ],
55
+ [( "rolling" , { "window" : 10 }), ( "expanding" , {}) ],
54
56
["sum" , "max" , "min" , "median" , "mean" ],
55
57
[True , False ],
56
58
[None , 100 ],
57
59
)
58
- param_names = ["constructor" , "dtype" , "function" , "method" , "parallel" , "cols" ]
60
+ param_names = [
61
+ "constructor" ,
62
+ "dtype" ,
63
+ "window_kwargs" ,
64
+ "method" ,
65
+ "parallel" ,
66
+ "cols" ,
67
+ ]
59
68
60
- def setup (self , constructor , dtype , function , method , parallel , cols ):
69
+ def setup (self , constructor , dtype , window_kwargs , method , parallel , cols ):
61
70
N = 10 ** 3
71
+ window , kwargs = window_kwargs
62
72
shape = (N , cols ) if cols is not None and constructor != "Series" else N
63
73
arr = (100 * np .random .random (shape )).astype (dtype )
64
74
data = getattr (pd , constructor )(arr )
65
75
66
76
# Warm the cache
67
77
with warnings .catch_warnings (record = True ):
68
78
# Catch parallel=True not being applicable e.g. 1D data
69
- self .roll = data .rolling (10 )
70
- self .roll .apply (
71
- function , raw = True , engine = "numba" , engine_kwargs = {"parallel" : parallel }
72
- )
73
- getattr (self .roll , method )(
79
+ self .window = getattr (data , window )(** kwargs )
80
+ getattr (self .window , method )(
74
81
engine = "numba" , engine_kwargs = {"parallel" : parallel }
75
82
)
76
83
77
- self .expand = data .expanding ()
78
- self .expand .apply (
79
- function , raw = True , engine = "numba" , engine_kwargs = {"parallel" : parallel }
80
- )
81
-
82
- def time_rolling_apply (self , constructor , dtype , function , method , parallel , col ):
83
- with warnings .catch_warnings (record = True ):
84
- self .roll .apply (
85
- function , raw = True , engine = "numba" , engine_kwargs = {"parallel" : parallel }
86
- )
87
-
88
- def time_expanding_apply (self , constructor , dtype , function , method , parallel , col ):
89
- with warnings .catch_warnings (record = True ):
90
- self .expand .apply (
91
- function , raw = True , engine = "numba" , engine_kwargs = {"parallel" : parallel }
92
- )
93
-
94
- def time_rolling_methods (self , constructor , dtype , function , method , parallel , col ):
84
+ def test_method (self , constructor , dtype , window_kwargs , method , parallel , cols ):
95
85
with warnings .catch_warnings (record = True ):
96
- getattr (self .roll , method )(
86
+ getattr (self .window , method )(
97
87
engine = "numba" , engine_kwargs = {"parallel" : parallel }
98
88
)
99
89
100
90
101
- class ExpandingMethods :
102
-
91
+ class NumbaEngineApply :
103
92
params = (
104
93
["DataFrame" , "Series" ],
105
94
["int" , "float" ],
106
- ["median" , "mean" , "max" , "min" , "std" , "count" , "skew" , "kurt" , "sum" ],
95
+ [("rolling" , {"window" : 10 }), ("expanding" , {})],
96
+ [np .sum , lambda x : np .sum (x ) + 5 ],
97
+ [True , False ],
98
+ [None , 100 ],
107
99
)
108
- param_names = ["constructor" , "window" , "dtype" , "method" ]
100
+ param_names = [
101
+ "constructor" ,
102
+ "dtype" ,
103
+ "window_kwargs" ,
104
+ "function" ,
105
+ "parallel" ,
106
+ "cols" ,
107
+ ]
109
108
110
- def setup (self , constructor , dtype , method ):
111
- N = 10 ** 5
112
- N_groupby = 100
113
- arr = (100 * np .random .random (N )).astype (dtype )
114
- self .expanding = getattr (pd , constructor )(arr ).expanding ()
115
- self .expanding_groupby = (
116
- pd .DataFrame ({"A" : arr [:N_groupby ], "B" : range (N_groupby )})
117
- .groupby ("B" )
118
- .expanding ()
119
- )
109
+ def setup (self , constructor , dtype , window_kwargs , function , parallel , cols ):
110
+ N = 10 ** 3
111
+ window , kwargs = window_kwargs
112
+ shape = (N , cols ) if cols is not None and constructor != "Series" else N
113
+ arr = (100 * np .random .random (shape )).astype (dtype )
114
+ data = getattr (pd , constructor )(arr )
120
115
121
- def time_expanding (self , constructor , dtype , method ):
122
- getattr (self .expanding , method )()
116
+ # Warm the cache
117
+ with warnings .catch_warnings (record = True ):
118
+ # Catch parallel=True not being applicable e.g. 1D data
119
+ self .window = getattr (data , window )(** kwargs )
120
+ self .window .apply (
121
+ function , raw = True , engine = "numba" , engine_kwargs = {"parallel" : parallel }
122
+ )
123
123
124
- def time_expanding_groupby (self , constructor , dtype , method ):
125
- getattr (self .expanding_groupby , method )()
124
+ def test_method (self , constructor , dtype , window_kwargs , function , parallel , cols ):
125
+ with warnings .catch_warnings (record = True ):
126
+ self .window .apply (
127
+ function , raw = True , engine = "numba" , engine_kwargs = {"parallel" : parallel }
128
+ )
126
129
127
130
128
131
class EWMMethods :
129
132
130
- params = (["DataFrame" , "Series" ], [10 , 1000 ], ["int" , "float" ], ["mean" , "std" ])
131
- param_names = ["constructor" , "window" , "dtype" , "method" ]
133
+ params = (
134
+ ["DataFrame" , "Series" ],
135
+ [
136
+ ({"halflife" : 10 }, "mean" ),
137
+ ({"halflife" : 10 }, "std" ),
138
+ ({"halflife" : 1000 }, "mean" ),
139
+ ({"halflife" : 1000 }, "std" ),
140
+ (
141
+ {
142
+ "halflife" : "1 Day" ,
143
+ "times" : pd .date_range ("1900" , periods = 10 ** 5 , freq = "23s" ),
144
+ },
145
+ "mean" ,
146
+ ),
147
+ ],
148
+ ["int" , "float" ],
149
+ )
150
+ param_names = ["constructor" , "kwargs_method" , "dtype" ]
132
151
133
- def setup (self , constructor , window , dtype , method ):
152
+ def setup (self , constructor , kwargs_method , dtype ):
134
153
N = 10 ** 5
154
+ kwargs , method = kwargs_method
135
155
arr = (100 * np .random .random (N )).astype (dtype )
136
- times = pd .date_range ("1900" , periods = N , freq = "23s" )
137
- self .ewm = getattr (pd , constructor )(arr ).ewm (halflife = window )
138
- self .ewm_times = getattr (pd , constructor )(arr ).ewm (
139
- halflife = "1 Day" , times = times
140
- )
156
+ self .method = method
157
+ self .ewm = getattr (pd , constructor )(arr ).ewm (** kwargs )
141
158
142
- def time_ewm (self , constructor , window , dtype , method ):
143
- getattr (self .ewm , method )()
144
-
145
- def time_ewm_times (self , constructor , window , dtype , method ):
146
- self .ewm_times .mean ()
159
+ def time_ewm (self , constructor , kwargs_method , dtype ):
160
+ getattr (self .ewm , self .method )()
147
161
148
162
149
163
class VariableWindowMethods (Methods ):
150
164
params = (
151
165
["DataFrame" , "Series" ],
152
166
["50s" , "1h" , "1d" ],
153
167
["int" , "float" ],
154
- ["median" , "mean" , "max" , "min" , "std" , "count" , "skew" , "kurt" , "sum" ],
168
+ ["median" , "mean" , "max" , "min" , "std" , "count" , "skew" , "kurt" , "sum" , "sem" ],
155
169
)
156
170
param_names = ["constructor" , "window" , "dtype" , "method" ]
157
171
158
172
def setup (self , constructor , window , dtype , method ):
159
173
N = 10 ** 5
160
174
arr = (100 * np .random .random (N )).astype (dtype )
161
175
index = pd .date_range ("2017-01-01" , periods = N , freq = "5s" )
162
- self .roll = getattr (pd , constructor )(arr , index = index ).rolling (window )
176
+ self .window = getattr (pd , constructor )(arr , index = index ).rolling (window )
163
177
164
178
165
179
class Pairwise :
166
180
167
- params = ([10 , 1000 , None ], ["corr" , "cov" ], [True , False ])
168
- param_names = ["window" , "method" , "pairwise" ]
181
+ params = (
182
+ [({"window" : 10 }, "rolling" ), ({"window" : 1000 }, "rolling" ), ({}, "expanding" )],
183
+ ["corr" , "cov" ],
184
+ [True , False ],
185
+ )
186
+ param_names = ["window_kwargs" , "method" , "pairwise" ]
169
187
170
- def setup (self , window , method , pairwise ):
188
+ def setup (self , kwargs_window , method , pairwise ):
171
189
N = 10 ** 4
172
190
n_groups = 20
191
+ kwargs , window = kwargs_window
173
192
groups = [i for _ in range (N // n_groups ) for i in range (n_groups )]
174
193
arr = np .random .random (N )
175
194
self .df = pd .DataFrame (arr )
176
- self .df_group = pd .DataFrame ({"A" : groups , "B" : arr }).groupby ("A" )
195
+ self .window = getattr (self .df , window )(** kwargs )
196
+ self .window_group = getattr (
197
+ pd .DataFrame ({"A" : groups , "B" : arr }).groupby ("A" ), window
198
+ )(** kwargs )
177
199
178
- def time_pairwise (self , window , method , pairwise ):
179
- if window is None :
180
- r = self .df .expanding ()
181
- else :
182
- r = self .df .rolling (window = window )
183
- getattr (r , method )(self .df , pairwise = pairwise )
200
+ def time_pairwise (self , kwargs_window , method , pairwise ):
201
+ getattr (self .window , method )(self .df , pairwise = pairwise )
184
202
185
- def time_groupby (self , window , method , pairwise ):
186
- if window is None :
187
- r = self .df_group .expanding ()
188
- else :
189
- r = self .df_group .rolling (window = window )
190
- getattr (r , method )(self .df , pairwise = pairwise )
203
+ def time_groupby (self , kwargs_window , method , pairwise ):
204
+ getattr (self .window_group , method )(self .df , pairwise = pairwise )
191
205
192
206
193
207
class Quantile :
@@ -274,25 +288,29 @@ def peakmem_rolling(self, constructor, window_size, dtype, method):
274
288
275
289
class Groupby :
276
290
277
- params = ["sum" , "median" , "mean" , "max" , "min" , "kurt" , "sum" ]
291
+ params = (
292
+ ["sum" , "median" , "mean" , "max" , "min" , "kurt" , "sum" ],
293
+ [
294
+ ("rolling" , {"window" : 2 }),
295
+ ("rolling" , {"window" : "30s" , "on" : "C" }),
296
+ ("expanding" , {}),
297
+ ],
298
+ )
278
299
279
- def setup (self , method ):
300
+ def setup (self , method , window_kwargs ):
280
301
N = 1000
302
+ window , kwargs = window_kwargs
281
303
df = pd .DataFrame (
282
304
{
283
305
"A" : [str (i ) for i in range (N )] * 10 ,
284
306
"B" : list (range (N )) * 10 ,
285
307
"C" : pd .date_range (start = "1900-01-01" , freq = "1min" , periods = N * 10 ),
286
308
}
287
309
)
288
- self .groupby_roll_int = df .groupby ("A" ).rolling (window = 2 )
289
- self .groupby_roll_offset = df .groupby ("A" ).rolling (window = "30s" , on = "C" )
290
-
291
- def time_rolling_int (self , method ):
292
- getattr (self .groupby_roll_int , method )()
310
+ self .groupby_window = getattr (df .groupby ("A" ), window )(** kwargs )
293
311
294
- def time_rolling_offset (self , method ):
295
- getattr (self .groupby_roll_offset , method )()
312
+ def time_method (self , method , window_kwargs ):
313
+ getattr (self .groupby_window , method )()
296
314
297
315
298
316
class GroupbyLargeGroups :
0 commit comments