Skip to content

Commit bafd99d

Browse files
committed
PERF: add asv for categorical grouping
1 parent 61c62ec commit bafd99d

File tree

1 file changed

+37
-0
lines changed

1 file changed

+37
-0
lines changed

asv_bench/benchmarks/groupby.py

+37
Original file line numberDiff line numberDiff line change
@@ -492,6 +492,43 @@ def time_groupby_sum(self):
492492
self.df.groupby(['a'])['b'].sum()
493493

494494

495+
class groupby_categorical(object):
496+
goal_time = 0.2
497+
498+
def setup(self):
499+
N = 100000
500+
arr = np.random.random(N)
501+
502+
self.df = DataFrame(dict(
503+
a=Categorical(np.random.randint(1000, size=N)),
504+
b=arr))
505+
self.df_ordered = DataFrame(dict(
506+
a=Categorical(np.random.randint(1000, size=N), ordered=True),
507+
b=arr))
508+
self.df_extra_cat = DataFrame(dict(
509+
a=Categorical(np.random.randint(100, size=N),
510+
categories=np.arange(1000)),
511+
b=arr))
512+
513+
def time_groupby_sort(self):
514+
self.df.groupby('a')['b'].count()
515+
516+
def time_groupby_nosort(self):
517+
self.df.groupby('a', sort=False)['b'].count()
518+
519+
def time_groupby_ordered_sort(self):
520+
self.df_ordered.groupby('a')['b'].count()
521+
522+
def time_groupby_ordered_nosort(self):
523+
self.df_ordered.groupby('a', sort=False)['b'].count()
524+
525+
def time_groupby_extra_cat_sort(self):
526+
self.df_ordered.groupby('a')['b'].count()
527+
528+
def time_groupby_extra_cat_nosort(self):
529+
self.df_ordered.groupby('a', sort=False)['b'].count()
530+
531+
495532
class groupby_period(object):
496533
# GH 14338
497534
goal_time = 0.2

0 commit comments

Comments
 (0)