Skip to content

Commit e75a2a1

Browse files
authored
ASV: Add benchmarks for groupby with multiple categories (#56030)
1 parent 517973e commit e75a2a1

File tree

1 file changed

+45
-0
lines changed

1 file changed

+45
-0
lines changed

asv_bench/benchmarks/groupby.py

+45
Original file line numberDiff line numberDiff line change
@@ -802,6 +802,51 @@ def time_groupby_extra_cat_nosort(self, observed):
802802
self.df_extra_cat.groupby("a", observed=observed, sort=False)["b"].count()
803803

804804

805+
class MultipleCategories:
806+
def setup(self):
807+
N = 10**3
808+
arr = np.random.random(N)
809+
data = {
810+
"a1": Categorical(np.random.randint(10000, size=N)),
811+
"a2": Categorical(np.random.randint(10000, size=N)),
812+
"b": arr,
813+
}
814+
self.df = DataFrame(data)
815+
data = {
816+
"a1": Categorical(np.random.randint(10000, size=N), ordered=True),
817+
"a2": Categorical(np.random.randint(10000, size=N), ordered=True),
818+
"b": arr,
819+
}
820+
self.df_ordered = DataFrame(data)
821+
data = {
822+
"a1": Categorical(np.random.randint(100, size=N), categories=np.arange(N)),
823+
"a2": Categorical(np.random.randint(100, size=N), categories=np.arange(N)),
824+
"b": arr,
825+
}
826+
self.df_extra_cat = DataFrame(data)
827+
828+
def time_groupby_sort(self):
829+
self.df.groupby(["a1", "a2"], observed=False)["b"].count()
830+
831+
def time_groupby_nosort(self):
832+
self.df.groupby(["a1", "a2"], observed=False, sort=False)["b"].count()
833+
834+
def time_groupby_ordered_sort(self):
835+
self.df_ordered.groupby(["a1", "a2"], observed=False)["b"].count()
836+
837+
def time_groupby_ordered_nosort(self):
838+
self.df_ordered.groupby(["a1", "a2"], observed=False, sort=False)["b"].count()
839+
840+
def time_groupby_extra_cat_sort(self):
841+
self.df_extra_cat.groupby(["a1", "a2"], observed=False)["b"].count()
842+
843+
def time_groupby_extra_cat_nosort(self):
844+
self.df_extra_cat.groupby(["a1", "a2"], observed=False, sort=False)["b"].count()
845+
846+
def time_groupby_transform(self):
847+
self.df_extra_cat.groupby(["a1", "a2"], observed=False)["b"].cumsum()
848+
849+
805850
class Datelike:
806851
# GH 14338
807852
params = ["period_range", "date_range", "date_range_tz"]

0 commit comments

Comments
 (0)