@@ -379,15 +379,24 @@ def time_groupby_dt_timegrouper_size(self):
379
379
#----------------------------------------------------------------------
380
380
# groupby with a variable value for ngroups
381
381
382
- class groupby_ngroups_10000 (object ):
382
+ class groupby_ngroups_int_10000 (object ):
383
383
goal_time = 0.2
384
+ dtype = 'int'
385
+ ngroups = 10000
384
386
385
387
def setup (self ):
386
388
np .random .seed (1234 )
387
- self .ngroups = 10000
388
- self .size = (self .ngroups * 2 )
389
- self .rng = np .arange (self .ngroups )
390
- self .df = DataFrame (dict (timestamp = self .rng .take (np .random .randint (0 , self .ngroups , size = self .size )), value = np .random .randint (0 , self .size , size = self .size )))
389
+ size = self .ngroups * 2
390
+ rng = np .arange (self .ngroups )
391
+ ts = rng .take (np .random .randint (0 , self .ngroups , size = size ))
392
+ if self .dtype == 'int' :
393
+ value = np .random .randint (0 , size , size = size )
394
+ else :
395
+ value = np .concatenate ([np .random .random (self .ngroups ) * 0.1 ,
396
+ np .random .random (self .ngroups ) * 10.0 ])
397
+
398
+ self .df = DataFrame ({'timestamp' : ts ,
399
+ 'value' : value })
391
400
392
401
def time_all (self ):
393
402
self .df .groupby ('value' )['timestamp' ].all ()
@@ -482,109 +491,35 @@ def time_value_counts(self):
482
491
def time_var (self ):
483
492
self .df .groupby ('value' )['timestamp' ].var ()
484
493
485
-
486
- class groupby_ngroups_100 (object ):
494
+ class groupby_ngroups_int_100 (groupby_ngroups_int_10000 ):
487
495
goal_time = 0.2
496
+ dtype = 'int'
497
+ ngroups = 100
488
498
489
- def setup (self ):
490
- np .random .seed (1234 )
491
- self .ngroups = 100
492
- self .size = (self .ngroups * 2 )
493
- self .rng = np .arange (self .ngroups )
494
- self .df = DataFrame (dict (timestamp = self .rng .take (np .random .randint (0 , self .ngroups , size = self .size )), value = np .random .randint (0 , self .size , size = self .size )))
495
-
496
- def time_all (self ):
497
- self .df .groupby ('value' )['timestamp' ].all ()
498
-
499
- def time_any (self ):
500
- self .df .groupby ('value' )['timestamp' ].any ()
501
-
502
- def time_count (self ):
503
- self .df .groupby ('value' )['timestamp' ].count ()
504
-
505
- def time_cumcount (self ):
506
- self .df .groupby ('value' )['timestamp' ].cumcount ()
507
-
508
- def time_cummax (self ):
509
- self .df .groupby ('value' )['timestamp' ].cummax ()
510
-
511
- def time_cummin (self ):
512
- self .df .groupby ('value' )['timestamp' ].cummin ()
513
-
514
- def time_cumprod (self ):
515
- self .df .groupby ('value' )['timestamp' ].cumprod ()
516
-
517
- def time_cumsum (self ):
518
- self .df .groupby ('value' )['timestamp' ].cumsum ()
519
-
520
- def time_describe (self ):
521
- self .df .groupby ('value' )['timestamp' ].describe ()
522
-
523
- def time_diff (self ):
524
- self .df .groupby ('value' )['timestamp' ].diff ()
525
-
526
- def time_first (self ):
527
- self .df .groupby ('value' )['timestamp' ].first ()
528
-
529
- def time_head (self ):
530
- self .df .groupby ('value' )['timestamp' ].head ()
531
-
532
- def time_last (self ):
533
- self .df .groupby ('value' )['timestamp' ].last ()
534
-
535
- def time_mad (self ):
536
- self .df .groupby ('value' )['timestamp' ].mad ()
537
-
538
- def time_max (self ):
539
- self .df .groupby ('value' )['timestamp' ].max ()
540
-
541
- def time_mean (self ):
542
- self .df .groupby ('value' )['timestamp' ].mean ()
543
-
544
- def time_median (self ):
545
- self .df .groupby ('value' )['timestamp' ].median ()
546
-
547
- def time_min (self ):
548
- self .df .groupby ('value' )['timestamp' ].min ()
549
-
550
- def time_nunique (self ):
551
- self .df .groupby ('value' )['timestamp' ].nunique ()
552
-
553
- def time_pct_change (self ):
554
- self .df .groupby ('value' )['timestamp' ].pct_change ()
555
-
556
- def time_prod (self ):
557
- self .df .groupby ('value' )['timestamp' ].prod ()
558
-
559
- def time_rank (self ):
560
- self .df .groupby ('value' )['timestamp' ].rank ()
561
-
562
- def time_sem (self ):
563
- self .df .groupby ('value' )['timestamp' ].sem ()
564
-
565
- def time_size (self ):
566
- self .df .groupby ('value' )['timestamp' ].size ()
567
-
568
- def time_skew (self ):
569
- self .df .groupby ('value' )['timestamp' ].skew ()
570
-
571
- def time_std (self ):
572
- self .df .groupby ('value' )['timestamp' ].std ()
499
+ class groupby_ngroups_float_100 (groupby_ngroups_int_10000 ):
500
+ goal_time = 0.2
501
+ dtype = 'float'
502
+ ngroups = 100
573
503
574
- def time_sum (self ):
575
- self .df .groupby ('value' )['timestamp' ].sum ()
504
+ class groupby_ngroups_float_10000 (groupby_ngroups_int_10000 ):
505
+ goal_time = 0.2
506
+ dtype = 'float'
507
+ ngroups = 10000
576
508
577
- def time_tail (self ):
578
- self .df .groupby ('value' )['timestamp' ].tail ()
579
509
580
- def time_unique (self ):
581
- self .df .groupby ('value' )['timestamp' ].unique ()
510
+ class groupby_float32 (object ):
511
+ # GH 13335
512
+ goal_time = 0.2
582
513
583
- def time_value_counts (self ):
584
- self .df .groupby ('value' )['timestamp' ].value_counts ()
514
+ def setup (self ):
515
+ tmp1 = (np .random .random (10000 ) * 0.1 ).astype (np .float32 )
516
+ tmp2 = (np .random .random (10000 ) * 10.0 ).astype (np .float32 )
517
+ tmp = np .concatenate ((tmp1 , tmp2 ))
518
+ arr = np .repeat (tmp , 10 )
519
+ self .df = DataFrame (dict (a = arr , b = arr ))
585
520
586
- def time_var (self ):
587
- self .df .groupby ('value' )['timestamp ' ].var ()
521
+ def time_groupby_sum (self ):
522
+ self .df .groupby ([ 'a' ] )['b ' ].sum ()
588
523
589
524
590
525
#----------------------------------------------------------------------
0 commit comments