From cd59acf50286a90f917a31143fa39314fbef86dd Mon Sep 17 00:00:00 2001 From: Deepan Das Date: Mon, 4 Nov 2019 22:43:05 +0100 Subject: [PATCH 1/6] removing kendall tests --- asv_bench/benchmarks/stat_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py index ed5ebfa61594e..e2aa4765f7b0a 100644 --- a/asv_bench/benchmarks/stat_ops.py +++ b/asv_bench/benchmarks/stat_ops.py @@ -101,7 +101,7 @@ def time_average_old(self, constructor, pct): class Correlation: - params = [["spearman", "kendall", "pearson"], [True, False]] + params = [["spearman", "pearson"], [True, False]] param_names = ["method", "use_bottleneck"] def setup(self, method, use_bottleneck): From ec842d611802d84e23eb9ea02e0fcb131b3fec7c Mon Sep 17 00:00:00 2001 From: Deepan Das Date: Tue, 5 Nov 2019 09:13:54 +0100 Subject: [PATCH 2/6] Removing benchmark flags and adding back kendall with change in dataframe/series dimensions --- asv_bench/benchmarks/stat_ops.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py index e2aa4765f7b0a..3cac7a3da66c0 100644 --- a/asv_bench/benchmarks/stat_ops.py +++ b/asv_bench/benchmarks/stat_ops.py @@ -101,8 +101,8 @@ def time_average_old(self, constructor, pct): class Correlation: - params = [["spearman", "pearson"], [True, False]] - param_names = ["method", "use_bottleneck"] + params = [["spearman", "kendall", "pearson"], [True, False]] + param_names = ["method"] def setup(self, method, use_bottleneck): try: @@ -111,12 +111,12 @@ def setup(self, method, use_bottleneck): from pandas.core import nanops nanops._USE_BOTTLENECK = use_bottleneck - self.df = pd.DataFrame(np.random.randn(1000, 30)) - self.df2 = pd.DataFrame(np.random.randn(1000, 30)) - self.df_wide = pd.DataFrame(np.random.randn(1000, 200)) - self.df_wide_nans = self.df_wide.where(np.random.random((1000, 200)) < 0.9) - self.s = pd.Series(np.random.randn(1000)) - self.s2 = pd.Series(np.random.randn(1000)) + self.df = pd.DataFrame(np.random.randn(500, 15)) + self.df2 = pd.DataFrame(np.random.randn(500, 15)) + self.df_wide = pd.DataFrame(np.random.randn(500, 100)) + self.df_wide_nans = self.df_wide.where(np.random.random((500, 100)) < 0.9) + self.s = pd.Series(np.random.randn(500)) + self.s2 = pd.Series(np.random.randn(500)) def time_corr(self, method, use_bottleneck): self.df.corr(method=method) From 131266a57b24017604636b4112299771a3ac5d08 Mon Sep 17 00:00:00 2001 From: Deepan Das Date: Tue, 5 Nov 2019 09:13:54 +0100 Subject: [PATCH 3/6] Removing bottleneck flags and adding back kendall with change in dataframe/series dimensions --- asv_bench/benchmarks/stat_ops.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py index e2aa4765f7b0a..3cac7a3da66c0 100644 --- a/asv_bench/benchmarks/stat_ops.py +++ b/asv_bench/benchmarks/stat_ops.py @@ -101,8 +101,8 @@ def time_average_old(self, constructor, pct): class Correlation: - params = [["spearman", "pearson"], [True, False]] - param_names = ["method", "use_bottleneck"] + params = [["spearman", "kendall", "pearson"], [True, False]] + param_names = ["method"] def setup(self, method, use_bottleneck): try: @@ -111,12 +111,12 @@ def setup(self, method, use_bottleneck): from pandas.core import nanops nanops._USE_BOTTLENECK = use_bottleneck - self.df = pd.DataFrame(np.random.randn(1000, 30)) - self.df2 = pd.DataFrame(np.random.randn(1000, 30)) - self.df_wide = pd.DataFrame(np.random.randn(1000, 200)) - self.df_wide_nans = self.df_wide.where(np.random.random((1000, 200)) < 0.9) - self.s = pd.Series(np.random.randn(1000)) - self.s2 = pd.Series(np.random.randn(1000)) + self.df = pd.DataFrame(np.random.randn(500, 15)) + self.df2 = pd.DataFrame(np.random.randn(500, 15)) + self.df_wide = pd.DataFrame(np.random.randn(500, 100)) + self.df_wide_nans = self.df_wide.where(np.random.random((500, 100)) < 0.9) + self.s = pd.Series(np.random.randn(500)) + self.s2 = pd.Series(np.random.randn(500)) def time_corr(self, method, use_bottleneck): self.df.corr(method=method) From fcd57a0331efdbd2bee84d19d1b0f8ea8d709552 Mon Sep 17 00:00:00 2001 From: Deepan Das Date: Thu, 7 Nov 2019 00:43:41 +0100 Subject: [PATCH 4/6] Remove bottleneck parameterization and assignment in setup --- asv_bench/benchmarks/stat_ops.py | 28 ++-------------------------- 1 file changed, 2 insertions(+), 26 deletions(-) diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py index 3cac7a3da66c0..d1d86f64e1773 100644 --- a/asv_bench/benchmarks/stat_ops.py +++ b/asv_bench/benchmarks/stat_ops.py @@ -12,12 +12,6 @@ class FrameOps: def setup(self, op, dtype, axis, use_bottleneck): df = pd.DataFrame(np.random.randn(100000, 4)).astype(dtype) - try: - pd.options.compute.use_bottleneck = use_bottleneck - except TypeError: - from pandas.core import nanops - - nanops._USE_BOTTLENECK = use_bottleneck self.df_func = getattr(df, op) def time_op(self, op, dtype, axis, use_bottleneck): @@ -47,16 +41,10 @@ def time_op(self, level, op): class SeriesOps: params = [ops, ["float", "int"], [True, False]] - param_names = ["op", "dtype", "use_bottleneck"] + param_names = ["op", "dtype"] def setup(self, op, dtype, use_bottleneck): s = pd.Series(np.random.randn(100000)).astype(dtype) - try: - pd.options.compute.use_bottleneck = use_bottleneck - except TypeError: - from pandas.core import nanops - - nanops._USE_BOTTLENECK = use_bottleneck self.s_func = getattr(s, op) def time_op(self, op, dtype, use_bottleneck): @@ -105,12 +93,6 @@ class Correlation: param_names = ["method"] def setup(self, method, use_bottleneck): - try: - pd.options.compute.use_bottleneck = use_bottleneck - except TypeError: - from pandas.core import nanops - - nanops._USE_BOTTLENECK = use_bottleneck self.df = pd.DataFrame(np.random.randn(500, 15)) self.df2 = pd.DataFrame(np.random.randn(500, 15)) self.df_wide = pd.DataFrame(np.random.randn(500, 100)) @@ -143,15 +125,9 @@ def time_corrwith_rows(self, method, use_bottleneck): class Covariance: params = [[True, False]] - param_names = ["use_bottleneck"] + param_names = [] def setup(self, use_bottleneck): - try: - pd.options.compute.use_bottleneck = use_bottleneck - except TypeError: - from pandas.core import nanops - - nanops._USE_BOTTLENECK = use_bottleneck self.s = pd.Series(np.random.randn(100000)) self.s2 = pd.Series(np.random.randn(100000)) From b717f46567e89bf0f3b03f693d8197e793e55539 Mon Sep 17 00:00:00 2001 From: Deepan Das Date: Thu, 7 Nov 2019 22:17:03 +0100 Subject: [PATCH 5/6] removing bottleneck calls from function statements --- asv_bench/benchmarks/stat_ops.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py index d1d86f64e1773..3f263d3e5f366 100644 --- a/asv_bench/benchmarks/stat_ops.py +++ b/asv_bench/benchmarks/stat_ops.py @@ -8,13 +8,13 @@ class FrameOps: params = [ops, ["float", "int"], [0, 1], [True, False]] - param_names = ["op", "dtype", "axis", "use_bottleneck"] + param_names = ["op", "dtype", "axis"] - def setup(self, op, dtype, axis, use_bottleneck): + def setup(self, op, dtype, axis): df = pd.DataFrame(np.random.randn(100000, 4)).astype(dtype) self.df_func = getattr(df, op) - def time_op(self, op, dtype, axis, use_bottleneck): + def time_op(self, op, dtype, axis): self.df_func(axis=axis) @@ -43,11 +43,11 @@ class SeriesOps: params = [ops, ["float", "int"], [True, False]] param_names = ["op", "dtype"] - def setup(self, op, dtype, use_bottleneck): + def setup(self, op, dtype): s = pd.Series(np.random.randn(100000)).astype(dtype) self.s_func = getattr(s, op) - def time_op(self, op, dtype, use_bottleneck): + def time_op(self, op, dtype): self.s_func() @@ -92,7 +92,7 @@ class Correlation: params = [["spearman", "kendall", "pearson"], [True, False]] param_names = ["method"] - def setup(self, method, use_bottleneck): + def setup(self, method): self.df = pd.DataFrame(np.random.randn(500, 15)) self.df2 = pd.DataFrame(np.random.randn(500, 15)) self.df_wide = pd.DataFrame(np.random.randn(500, 100)) @@ -100,25 +100,25 @@ def setup(self, method, use_bottleneck): self.s = pd.Series(np.random.randn(500)) self.s2 = pd.Series(np.random.randn(500)) - def time_corr(self, method, use_bottleneck): + def time_corr(self, method): self.df.corr(method=method) - def time_corr_wide(self, method, use_bottleneck): + def time_corr_wide(self, method): self.df_wide.corr(method=method) - def time_corr_wide_nans(self, method, use_bottleneck): + def time_corr_wide_nans(self, method): self.df_wide_nans.corr(method=method) - def peakmem_corr_wide(self, method, use_bottleneck): + def peakmem_corr_wide(self, method): self.df_wide.corr(method=method) - def time_corr_series(self, method, use_bottleneck): + def time_corr_series(self, method): self.s.corr(self.s2, method=method) - def time_corrwith_cols(self, method, use_bottleneck): + def time_corrwith_cols(self, method): self.df.corrwith(self.df2, method=method) - def time_corrwith_rows(self, method, use_bottleneck): + def time_corrwith_rows(self, method): self.df.corrwith(self.df2, axis=1, method=method) @@ -127,11 +127,11 @@ class Covariance: params = [[True, False]] param_names = [] - def setup(self, use_bottleneck): + def setup(self): self.s = pd.Series(np.random.randn(100000)) self.s2 = pd.Series(np.random.randn(100000)) - def time_cov_series(self, use_bottleneck): + def time_cov_series(self): self.s.cov(self.s2) From d5a0007d9f6b5cb86378109bcc5869af1be089e1 Mon Sep 17 00:00:00 2001 From: Deepan Das Date: Wed, 4 Dec 2019 20:48:46 +0100 Subject: [PATCH 6/6] Fixing CI fail issues --- asv_bench/benchmarks/stat_ops.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py index 3f263d3e5f366..ec67394e55a1e 100644 --- a/asv_bench/benchmarks/stat_ops.py +++ b/asv_bench/benchmarks/stat_ops.py @@ -7,7 +7,7 @@ class FrameOps: - params = [ops, ["float", "int"], [0, 1], [True, False]] + params = [ops, ["float", "int"], [0, 1]] param_names = ["op", "dtype", "axis"] def setup(self, op, dtype, axis): @@ -40,7 +40,7 @@ def time_op(self, level, op): class SeriesOps: - params = [ops, ["float", "int"], [True, False]] + params = [ops, ["float", "int"]] param_names = ["op", "dtype"] def setup(self, op, dtype): @@ -89,7 +89,7 @@ def time_average_old(self, constructor, pct): class Correlation: - params = [["spearman", "kendall", "pearson"], [True, False]] + params = [["spearman", "kendall", "pearson"]] param_names = ["method"] def setup(self, method): @@ -124,7 +124,7 @@ def time_corrwith_rows(self, method): class Covariance: - params = [[True, False]] + params = [] param_names = [] def setup(self):