From 5f28727f49078c853e2c882c83c054ffca037cde Mon Sep 17 00:00:00 2001 From: Sonal Prabhu Date: Fri, 8 Oct 2021 22:46:12 +0530 Subject: [PATCH 1/5] TST : Added benchmark for indexing with .loc --- asv_bench/benchmarks/indexing.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 58f2a73d82842..5571672c02067 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -247,8 +247,15 @@ class DatetimeIndexIndexing: def setup(self): dti = date_range("2016-01-01", periods=10000, tz="US/Pacific") dti2 = dti.tz_convert("UTC") + dates = pd.date_range('2011-1-1', periods=500000, freq='min') + index = np.random.choice(dates, 500000, replace=True) + df = pd.DataFrame(index=index, data={'a': 1}) + df_sorted = df.sort_index() self.dti = dti self.dti2 = dti2 + self.df = df + self.df_sorted = df_sorted + def time_get_indexer_mismatched_tz(self): # reached via e.g. @@ -256,6 +263,12 @@ def time_get_indexer_mismatched_tz(self): # ser[dti2] self.dti.get_indexer(self.dti2) + def time_loc_unsorted(self): + self.df.loc['2011-6-11'] + + def time_loc_sorted(self): + self.df_sorted.loc['2011-6-11'] + class CategoricalIndexIndexing: From a5022ad93b106559a2a57bf79f92b641edc2d445 Mon Sep 17 00:00:00 2001 From: Sonal Prabhu Date: Fri, 8 Oct 2021 22:53:43 +0530 Subject: [PATCH 2/5] Removed whitespace --- asv_bench/benchmarks/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 5571672c02067..0e4e8f5044aa7 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -265,7 +265,7 @@ def time_get_indexer_mismatched_tz(self): def time_loc_unsorted(self): self.df.loc['2011-6-11'] - + def time_loc_sorted(self): self.df_sorted.loc['2011-6-11'] From d7f5f51d659c248997ecc3ce742485326edb9e21 Mon Sep 17 00:00:00 2001 From: Sonal Prabhu Date: Fri, 8 Oct 2021 23:19:28 +0530 Subject: [PATCH 3/5] Removed pd prefix --- asv_bench/benchmarks/indexing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 0e4e8f5044aa7..54106168da389 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -247,9 +247,9 @@ class DatetimeIndexIndexing: def setup(self): dti = date_range("2016-01-01", periods=10000, tz="US/Pacific") dti2 = dti.tz_convert("UTC") - dates = pd.date_range('2011-1-1', periods=500000, freq='min') + dates = date_range('2011-1-1', periods=500000, freq='min') index = np.random.choice(dates, 500000, replace=True) - df = pd.DataFrame(index=index, data={'a': 1}) + df = DataFrame(index=index, data={'a': 1}) df_sorted = df.sort_index() self.dti = dti self.dti2 = dti2 From b1a04496b240f3010b16418e271204b202b1cce9 Mon Sep 17 00:00:00 2001 From: Sonal Prabhu Date: Sat, 9 Oct 2021 05:13:57 +0000 Subject: [PATCH 4/5] Fixes from pre-commit [automated commit] --- asv_bench/benchmarks/indexing.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 54106168da389..ed1d7da2dfc64 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -247,16 +247,15 @@ class DatetimeIndexIndexing: def setup(self): dti = date_range("2016-01-01", periods=10000, tz="US/Pacific") dti2 = dti.tz_convert("UTC") - dates = date_range('2011-1-1', periods=500000, freq='min') + dates = date_range("2011-1-1", periods=500000, freq="min") index = np.random.choice(dates, 500000, replace=True) - df = DataFrame(index=index, data={'a': 1}) + df = DataFrame(index=index, data={"a": 1}) df_sorted = df.sort_index() self.dti = dti self.dti2 = dti2 self.df = df self.df_sorted = df_sorted - def time_get_indexer_mismatched_tz(self): # reached via e.g. # ser = Series(range(len(dti)), index=dti) @@ -264,10 +263,10 @@ def time_get_indexer_mismatched_tz(self): self.dti.get_indexer(self.dti2) def time_loc_unsorted(self): - self.df.loc['2011-6-11'] + self.df.loc["2011-6-11"] def time_loc_sorted(self): - self.df_sorted.loc['2011-6-11'] + self.df_sorted.loc["2011-6-11"] class CategoricalIndexIndexing: From ea481c263f8257d94f15bb67e694e4042535db6e Mon Sep 17 00:00:00 2001 From: Sonal Prabhu Date: Sun, 14 Nov 2021 10:21:34 +0530 Subject: [PATCH 5/5] Adhered to naming conventions and removed unnecessary variables --- asv_bench/benchmarks/indexing.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index ed1d7da2dfc64..7401cfe77031d 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -247,14 +247,11 @@ class DatetimeIndexIndexing: def setup(self): dti = date_range("2016-01-01", periods=10000, tz="US/Pacific") dti2 = dti.tz_convert("UTC") - dates = date_range("2011-1-1", periods=500000, freq="min") - index = np.random.choice(dates, 500000, replace=True) - df = DataFrame(index=index, data={"a": 1}) - df_sorted = df.sort_index() + index = np.random.choice(dti, 10000, replace=True) + dti_sorted = DataFrame(index=index, data={"a": 1}).sort_index() self.dti = dti self.dti2 = dti2 - self.df = df - self.df_sorted = df_sorted + self.dti_sorted = dti_sorted def time_get_indexer_mismatched_tz(self): # reached via e.g. @@ -263,10 +260,10 @@ def time_get_indexer_mismatched_tz(self): self.dti.get_indexer(self.dti2) def time_loc_unsorted(self): - self.df.loc["2011-6-11"] + self.dti.loc["2016-6-11"] def time_loc_sorted(self): - self.df_sorted.loc["2011-6-11"] + self.dti_sorted.loc["2016-6-11"] class CategoricalIndexIndexing: