From a13ed9fb1fdf858e58dd26da8d2e762600c95b1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Thu, 11 Apr 2024 21:24:45 -0300 Subject: [PATCH 1/4] ASV: Add benchmark for DataFrame.Update --- asv_bench/benchmarks/frame_methods.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index ce31d63f0b70f..b663353517523 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -862,4 +862,26 @@ def time_last_valid_index(self, dtype): self.df.last_valid_index() +class Update: + def setup(self): + rng = np.random.default_rng() + self.df = DataFrame(rng.uniform(size=(1000, 10))) + + idx = rng.choice(range(1000), size=1000, replace=False) + self.df_random = DataFrame(self.df, index=idx) + + idx = rng.choice(range(1000), size=100, replace=False) + cols = rng.choice(range(10), size=2, replace=False) + self.df_sample = DataFrame(rng.uniform(size=(100, 2)), index=idx, columns=cols) + + def time_to_update_big_frame_small_arg(self): + self.df.update(self.df_sample) + + def time_to_update_random_indices(self): + self.df_random.update(self.df_sample) + + def time_to_update_small_frame_big_arg(self): + self.df_sample.update(self.df) + + from .pandas_vb_common import setup # noqa: F401 isort:skip From 877e6fe99da6061b97c5e768b16e74c7973292ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Sat, 13 Apr 2024 15:19:31 -0300 Subject: [PATCH 2/4] ASV: increase runtime of frame_methods.Update --- asv_bench/benchmarks/frame_methods.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index b663353517523..f030427049f5e 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -865,14 +865,16 @@ def time_last_valid_index(self, dtype): class Update: def setup(self): rng = np.random.default_rng() - self.df = DataFrame(rng.uniform(size=(1000, 10))) + self.df = DataFrame(rng.uniform(size=(10_000, 10))) - idx = rng.choice(range(1000), size=1000, replace=False) + idx = rng.choice(range(10_000), size=10_000, replace=False) self.df_random = DataFrame(self.df, index=idx) - idx = rng.choice(range(1000), size=100, replace=False) + idx = rng.choice(range(10_000), size=1_000, replace=False) cols = rng.choice(range(10), size=2, replace=False) - self.df_sample = DataFrame(rng.uniform(size=(100, 2)), index=idx, columns=cols) + self.df_sample = DataFrame( + rng.uniform(size=(1_000, 2)), index=idx, columns=cols + ) def time_to_update_big_frame_small_arg(self): self.df.update(self.df_sample) From 7766488f64b6083baef490a5a6c562c69cf7c2e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Sat, 13 Apr 2024 16:08:50 -0300 Subject: [PATCH 3/4] ASV: increase runtime of frame_methods.Update (again) --- asv_bench/benchmarks/frame_methods.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index f030427049f5e..330dfc6f675a7 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -865,15 +865,15 @@ def time_last_valid_index(self, dtype): class Update: def setup(self): rng = np.random.default_rng() - self.df = DataFrame(rng.uniform(size=(10_000, 10))) + self.df = DataFrame(rng.uniform(size=(100_000, 10))) - idx = rng.choice(range(10_000), size=10_000, replace=False) + idx = rng.choice(range(100_000), size=100_000, replace=False) self.df_random = DataFrame(self.df, index=idx) - idx = rng.choice(range(10_000), size=1_000, replace=False) + idx = rng.choice(range(100_000), size=10_000, replace=False) cols = rng.choice(range(10), size=2, replace=False) self.df_sample = DataFrame( - rng.uniform(size=(1_000, 2)), index=idx, columns=cols + rng.uniform(size=(10_000, 2)), index=idx, columns=cols ) def time_to_update_big_frame_small_arg(self): From e80cc59d2a5ad25debdffb47f2dfb37d46ead0a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Sun, 14 Apr 2024 11:08:12 -0300 Subject: [PATCH 4/4] ASV: increase runtime of frame_methods.Update (again) --- asv_bench/benchmarks/frame_methods.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index 330dfc6f675a7..6a2ab24df26fe 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -865,15 +865,15 @@ def time_last_valid_index(self, dtype): class Update: def setup(self): rng = np.random.default_rng() - self.df = DataFrame(rng.uniform(size=(100_000, 10))) + self.df = DataFrame(rng.uniform(size=(1_000_000, 10))) - idx = rng.choice(range(100_000), size=100_000, replace=False) + idx = rng.choice(range(1_000_000), size=1_000_000, replace=False) self.df_random = DataFrame(self.df, index=idx) - idx = rng.choice(range(100_000), size=10_000, replace=False) + idx = rng.choice(range(1_000_000), size=100_000, replace=False) cols = rng.choice(range(10), size=2, replace=False) self.df_sample = DataFrame( - rng.uniform(size=(10_000, 2)), index=idx, columns=cols + rng.uniform(size=(100_000, 2)), index=idx, columns=cols ) def time_to_update_big_frame_small_arg(self):