From e4cb3a91d0831f8aca1ffb9d9f7026e0f64af931 Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Tue, 4 Apr 2023 09:44:14 -0600 Subject: [PATCH 1/2] PERF: refactor string construction benchmark --- asv_bench/benchmarks/strings.py | 52 ++++++++++++++++----------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py index f270f1a83af39..dba6aedbdffa5 100644 --- a/asv_bench/benchmarks/strings.py +++ b/asv_bench/benchmarks/strings.py @@ -25,33 +25,31 @@ def setup(self, dtype): class Construction: - params = ["str", "string"] - param_names = ["dtype"] - - def setup(self, dtype): - self.series_arr = tm.rands_array(nchars=10, size=10**5) - self.frame_arr = self.series_arr.reshape((50_000, 2)).copy() - - # GH37371. Testing construction of string series/frames from ExtensionArrays - self.series_cat_arr = Categorical(self.series_arr) - - def time_series_construction(self, dtype): - Series(self.series_arr, dtype=dtype) - - def peakmem_series_construction(self, dtype): - Series(self.series_arr, dtype=dtype) - - def time_frame_construction(self, dtype): - DataFrame(self.frame_arr, dtype=dtype) - - def peakmem_frame_construction(self, dtype): - DataFrame(self.frame_arr, dtype=dtype) - - def time_cat_series_construction(self, dtype): - Series(self.series_cat_arr, dtype=dtype) - - def peakmem_cat_series_construction(self, dtype): - Series(self.series_cat_arr, dtype=dtype) + params = ( + ["series", "frame", "categorical_series"], + ["str", "string[python]", "string[pyarrow]"], + ) + param_names = ["pd_type", "dtype"] + pd_mapping = {"series": Series, "frame": DataFrame, "categorical_series": Series} + dtype_mapping = {"str": "str", "string[python]": object, "string[pyarrow]": object} + + def setup(self, pd_type, dtype): + series_arr = tm.rands_array( + nchars=10, size=10**5, dtype=self.dtype_mapping[dtype] + ) + if pd_type == "series": + self.arr = series_arr + if pd_type == "frame": + self.arr = series_arr.reshape((50_000, 2)).copy() + elif pd_type == "categorical_series": + # GH37371. Testing construction of string series/frames from ExtensionArrays + self.arr = Categorical(series_arr) + + def time_construction(self, pd_type, dtype): + self.pd_mapping[pd_type](self.arr, dtype=dtype) + + def peakmem_construction(self, pd_type, dtype): + self.pd_mapping[pd_type](self.arr, dtype=dtype) class Methods(Dtypes): From 47f9a32da3b82fedaa7d52891dcf86acc9490fa5 Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Tue, 4 Apr 2023 10:50:24 -0600 Subject: [PATCH 2/2] CLN: respond to review comments --- asv_bench/benchmarks/strings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py index dba6aedbdffa5..9f1aeb7670628 100644 --- a/asv_bench/benchmarks/strings.py +++ b/asv_bench/benchmarks/strings.py @@ -39,7 +39,7 @@ def setup(self, pd_type, dtype): ) if pd_type == "series": self.arr = series_arr - if pd_type == "frame": + elif pd_type == "frame": self.arr = series_arr.reshape((50_000, 2)).copy() elif pd_type == "categorical_series": # GH37371. Testing construction of string series/frames from ExtensionArrays