@@ -159,7 +159,7 @@ def time_series_nth(self, df):
159
159
df [1 ].groupby (df [0 ]).nth (0 )
160
160
161
161
162
- class Incidies (object ):
162
+ class DateAttributes (object ):
163
163
164
164
goal_time = 0.2
165
165
@@ -168,8 +168,8 @@ def setup(self):
168
168
self .year , self .month , self .day = rng .year , rng .month , rng .day
169
169
self .ts = Series (np .random .randn (len (rng )), index = rng )
170
170
171
- def time_datetime_indicies (self ):
172
- self .ts .groupby ([self .year , self .month , self .day ])
171
+ def time_len_groupby_object (self ):
172
+ len ( self .ts .groupby ([self .year , self .month , self .day ]) )
173
173
174
174
175
175
class Int64 (object ):
@@ -194,7 +194,7 @@ class CountMultiDtype(object):
194
194
195
195
goal_time = 0.2
196
196
197
- def setup (self ):
197
+ def setup_cache (self ):
198
198
n = 10000
199
199
offsets = np .random .randint (n , size = n ).astype ('timedelta64[ns]' )
200
200
dates = np .datetime64 ('now' ) + offsets
@@ -203,18 +203,19 @@ def setup(self):
203
203
value2 = np .random .randn (n )
204
204
value2 [np .random .rand (n ) > 0.5 ] = np .nan
205
205
obj = np .random .choice (list ('ab' ), size = n ).astype (object )
206
- obj [(np .random .randn (n ) > 0.5 )] = np .nan
207
- self .df = DataFrame ({'key1' : np .random .randint (0 , 500 , size = n ),
208
- 'key2' : np .random .randint (0 , 100 , size = n ),
209
- 'dates' : dates ,
210
- 'value2' : value2 ,
211
- 'value3' : np .random .randn (n ),
212
- 'ints' : np .random .randint (0 , 1000 , size = n ),
213
- 'obj' : obj ,
214
- 'offsets' : offsets })
206
+ obj [np .random .randn (n ) > 0.5 ] = np .nan
207
+ df = DataFrame ({'key1' : np .random .randint (0 , 500 , size = n ),
208
+ 'key2' : np .random .randint (0 , 100 , size = n ),
209
+ 'dates' : dates ,
210
+ 'value2' : value2 ,
211
+ 'value3' : np .random .randn (n ),
212
+ 'ints' : np .random .randint (0 , 1000 , size = n ),
213
+ 'obj' : obj ,
214
+ 'offsets' : offsets })
215
+ return df
215
216
216
- def time_multi_count (self ):
217
- self . df .groupby (['key1' , 'key2' ]).count ()
217
+ def time_multi_count (self , df ):
218
+ df .groupby (['key1' , 'key2' ]).count ()
218
219
219
220
220
221
class CountInt (object ):
@@ -236,7 +237,7 @@ def time_int_nunique(self, df):
236
237
df .groupby (['key1' , 'key2' ]).nunique ()
237
238
238
239
239
- class AggMultiColFuncs (object ):
240
+ class AggFunctions (object ):
240
241
241
242
goal_time = 0.2
242
243
@@ -261,22 +262,11 @@ def time_different_numpy_functions(self, df):
261
262
'value2' : np .var ,
262
263
'value3' : np .sum })
263
264
265
+ def time_different_python_functions_multicol (self , df ):
266
+ df .groupby (['key1' , 'key2' ]).agg ([sum , min , max ])
264
267
265
- class AggBuiltins (object ):
266
-
267
- goal_time = 0.2
268
-
269
- def setup_cache (self ):
270
- n = 10 ** 5
271
- df = DataFrame (np .random .randint (1 , n / 100 , (n , 3 )),
272
- columns = ['jim' , 'joe' , 'jolie' ])
273
- return df
274
-
275
- def time_agg_builtin_single_col (self , df ):
276
- df .groupby ('jim' ).agg ([sum , min , max ])
277
-
278
- def time_agg_builtins_multi_col (self , df ):
279
- df .groupby (['jim' , 'joe' ]).agg ([sum , min , max ])
268
+ def time_different_python_functions_singlecol (self , df ):
269
+ df .groupby ('key1' ).agg ([sum , min , max ])
280
270
281
271
282
272
class GroupStrings (object ):
@@ -532,38 +522,21 @@ def time_groupby_extra_cat_nosort(self):
532
522
class Datelike (object ):
533
523
# GH 14338
534
524
goal_time = 0.2
535
- params = [period_range , date_range , partial ( date_range , tz = 'US/Central' ) ]
525
+ params = [' period_range' , ' date_range' , 'date_range_tz' ]
536
526
param_names = ['grouper' ]
537
527
538
528
def setup (self , grouper ):
539
529
N = 10 ** 4
540
- self .grouper = grouper ('1900-01-01' , freq = 'D' , periods = N )
530
+ rng_map = {'period_range' : period_range ,
531
+ 'date_range' : date_range ,
532
+ 'date_range_tz' : partial (date_range , tz = 'US/Central' )}
533
+ self .grouper = rng_map [grouper ]('1900-01-01' , freq = 'D' , periods = N )
541
534
self .df = DataFrame (np .random .randn (10 ** 4 , 2 ))
542
535
543
536
def time_sum (self , grouper ):
544
537
self .df .groupby (self .grouper ).sum ()
545
538
546
539
547
- class PivotTable (object ):
548
- goal_time = 0.2
549
-
550
- def setup (self ):
551
- N = 100000
552
- fac1 = np .array (['A' , 'B' , 'C' ], dtype = 'O' )
553
- fac2 = np .array (['one' , 'two' ], dtype = 'O' )
554
- ind1 = np .random .randint (0 , 3 , size = N )
555
- ind2 = np .random .randint (0 , 2 , size = N )
556
- self .df = DataFrame ({'key1' : fac1 .take (ind1 ),
557
- 'key2' : fac2 .take (ind2 ),
558
- 'key3' : fac2 .take (ind2 ),
559
- 'value1' : np .random .randn (N ),
560
- 'value2' : np .random .randn (N ),
561
- 'value3' : np .random .randn (N )})
562
-
563
- def time_pivot_table (self ):
564
- self .df .pivot_table (index = 'key1' , columns = ['key2' , 'key3' ])
565
-
566
-
567
540
class SumBools (object ):
568
541
# GH 2692
569
542
goal_time = 0.2
0 commit comments