From f192105cfcaad5f160eb8ddc0726152475d990ae Mon Sep 17 00:00:00 2001 From: richard Date: Sun, 30 Jul 2023 09:33:05 -0400 Subject: [PATCH 1/3] CLN/ASV: Remove NumPy/built-in aliases in ASVs --- asv_bench/benchmarks/frame_methods.py | 2 +- asv_bench/benchmarks/groupby.py | 18 +++++------------- asv_bench/benchmarks/reshape.py | 4 ++-- 3 files changed, 8 insertions(+), 16 deletions(-) diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index e3176830c23fb..9288f4fc727f7 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -512,7 +512,7 @@ def time_apply_lambda_mean(self): self.df.apply(lambda x: x.mean()) def time_apply_np_mean(self): - self.df.apply(np.mean) + self.df.apply("mean") def time_apply_pass_thru(self): self.df.apply(lambda x: x) diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index e63e66f441afe..c55126a8315a8 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -324,22 +324,14 @@ def setup_cache(self): ) return df + def time_different_str_functions_singlecol(self, df): + df.groupby("key1").agg({"value1": "mean", "value2": "var", "value3": "sum"}) + def time_different_str_functions(self, df): df.groupby(["key1", "key2"]).agg( {"value1": "mean", "value2": "var", "value3": "sum"} ) - def time_different_numpy_functions(self, df): - df.groupby(["key1", "key2"]).agg( - {"value1": np.mean, "value2": np.var, "value3": np.sum} - ) - - def time_different_python_functions_multicol(self, df): - df.groupby(["key1", "key2"]).agg([sum, min, max]) - - def time_different_python_functions_singlecol(self, df): - df.groupby("key1")[["value1", "value2", "value3"]].agg([sum, min, max]) - class GroupStrings: def setup(self): @@ -382,7 +374,7 @@ def time_col_select_lambda_sum(self, df): df.groupby(["key1", "key2"])["data1"].agg(lambda x: x.values.sum()) def time_col_select_numpy_sum(self, df): - df.groupby(["key1", "key2"])["data1"].agg(np.sum) + df.groupby(["key1", "key2"])["data1"].agg("sum") class Size: @@ -926,7 +918,7 @@ def setup(self): self.df = DataFrame({"signal": np.random.rand(N)}) def time_transform_mean(self): - self.df["signal"].groupby(self.g).transform(np.mean) + self.df["signal"].groupby(self.g).transform("mean") class TransformNaN: diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py index f80e9fd9ff256..54326a4433756 100644 --- a/asv_bench/benchmarks/reshape.py +++ b/asv_bench/benchmarks/reshape.py @@ -218,7 +218,7 @@ def time_pivot_table_margins(self): def time_pivot_table_categorical(self): self.df2.pivot_table( - index="col1", values="col3", columns="col2", aggfunc=np.sum, fill_value=0 + index="col1", values="col3", columns="col2", aggfunc="sum", fill_value=0 ) def time_pivot_table_categorical_observed(self): @@ -226,7 +226,7 @@ def time_pivot_table_categorical_observed(self): index="col1", values="col3", columns="col2", - aggfunc=np.sum, + aggfunc="sum", fill_value=0, observed=True, ) From 3180c21bb19dc080dc2b6073e50d8af1822a7315 Mon Sep 17 00:00:00 2001 From: richard Date: Sun, 30 Jul 2023 19:39:34 -0400 Subject: [PATCH 2/3] one more --- asv_bench/benchmarks/groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index c55126a8315a8..015abcfbb405a 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -887,7 +887,7 @@ def time_transform_lambda_max(self): self.df.groupby(level="lev1").transform(lambda x: max(x)) def time_transform_ufunc_max(self): - self.df.groupby(level="lev1").transform(np.max) + self.df.groupby(level="lev1").transform("max") def time_transform_lambda_max_tall(self): self.df_tall.groupby(level=0).transform(lambda x: np.max(x, axis=0)) From b4cea90d64dd286eea9171df67f7760f3a3f80fd Mon Sep 17 00:00:00 2001 From: richard Date: Sun, 30 Jul 2023 19:58:35 -0400 Subject: [PATCH 3/3] Refinements --- asv_bench/benchmarks/frame_methods.py | 2 +- asv_bench/benchmarks/groupby.py | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index 9288f4fc727f7..70ea19d6af2c9 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -511,7 +511,7 @@ def time_apply_axis_1(self): def time_apply_lambda_mean(self): self.df.apply(lambda x: x.mean()) - def time_apply_np_mean(self): + def time_apply_str_mean(self): self.df.apply("mean") def time_apply_pass_thru(self): diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index 015abcfbb405a..334fcdd1d45df 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -324,14 +324,17 @@ def setup_cache(self): ) return df - def time_different_str_functions_singlecol(self, df): - df.groupby("key1").agg({"value1": "mean", "value2": "var", "value3": "sum"}) - def time_different_str_functions(self, df): df.groupby(["key1", "key2"]).agg( {"value1": "mean", "value2": "var", "value3": "sum"} ) + def time_different_str_functions_multicol(self, df): + df.groupby(["key1", "key2"]).agg(["sum", "min", "max"]) + + def time_different_str_functions_singlecol(self, df): + df.groupby("key1").agg({"value1": "mean", "value2": "var", "value3": "sum"}) + class GroupStrings: def setup(self): @@ -373,7 +376,7 @@ def time_cython_sum(self, df): def time_col_select_lambda_sum(self, df): df.groupby(["key1", "key2"])["data1"].agg(lambda x: x.values.sum()) - def time_col_select_numpy_sum(self, df): + def time_col_select_str_sum(self, df): df.groupby(["key1", "key2"])["data1"].agg("sum") @@ -886,7 +889,7 @@ def setup(self): def time_transform_lambda_max(self): self.df.groupby(level="lev1").transform(lambda x: max(x)) - def time_transform_ufunc_max(self): + def time_transform_str_max(self): self.df.groupby(level="lev1").transform("max") def time_transform_lambda_max_tall(self):