From a8ed36ee20ae1b406cda78d8baf306be05ea99c6 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 22 Nov 2017 19:31:57 -0800 Subject: [PATCH 1/3] CLN: ASV binary ops benchmark --- asv_bench/benchmarks/binary_ops.py | 90 ++++++++++++++---------------- 1 file changed, 43 insertions(+), 47 deletions(-) diff --git a/asv_bench/benchmarks/binary_ops.py b/asv_bench/benchmarks/binary_ops.py index 0ca21b929ea17..8e1ff8f5e6944 100644 --- a/asv_bench/benchmarks/binary_ops.py +++ b/asv_bench/benchmarks/binary_ops.py @@ -1,4 +1,5 @@ -from .pandas_vb_common import * +import numpy as np +from pandas import DataFrame, Series, date_range try: import pandas.core.computation.expressions as expr except ImportError: @@ -6,12 +7,14 @@ class Ops(object): + goal_time = 0.2 params = [[True, False], ['default', 1]] param_names = ['use_numexpr', 'threads'] def setup(self, use_numexpr, threads): + np.random.seed(1234) self.df = DataFrame(np.random.randn(20000, 100)) self.df2 = DataFrame(np.random.randn(20000, 100)) @@ -20,18 +23,17 @@ def setup(self, use_numexpr, threads): if not use_numexpr: expr.set_use_numexpr(False) - def time_frame_add(self, use_numexpr, threads): - (self.df + self.df2) + self.df + self.df2 def time_frame_mult(self, use_numexpr, threads): - (self.df * self.df2) + self.df * self.df2 def time_frame_multi_and(self, use_numexpr, threads): - self.df[((self.df > 0) & (self.df2 > 0))] + self.df[(self.df > 0) & (self.df2 > 0)] def time_frame_comparison(self, use_numexpr, threads): - (self.df > self.df2) + self.df > self.df2 def teardown(self, use_numexpr, threads): expr.set_use_numexpr(True) @@ -39,75 +41,69 @@ def teardown(self, use_numexpr, threads): class Ops2(object): + goal_time = 0.2 def setup(self): - self.df = DataFrame(np.random.randn(1000, 1000)) - self.df2 = DataFrame(np.random.randn(1000, 1000)) + N = 10**3 + np.random.seed(1234) + self.df = DataFrame(np.random.randn(N, N)) + self.df2 = DataFrame(np.random.randn(N, N)) - self.df_int = DataFrame( - np.random.random_integers(np.iinfo(np.int16).min, - np.iinfo(np.int16).max, - size=(1000, 1000))) - self.df2_int = DataFrame( - np.random.random_integers(np.iinfo(np.int16).min, - np.iinfo(np.int16).max, - size=(1000, 1000))) + self.df_int = DataFrame(np.random.randint(np.iinfo(np.int16).min, + np.iinfo(np.int16).max, + size=(N, N))) + self.df2_int = DataFrame(np.random.randint(np.iinfo(np.int16).min, + np.iinfo(np.int16).max, + size=(N, N))) - ## Division + # Division def time_frame_float_div(self): - (self.df // self.df2) + self.df // self.df2 def time_frame_float_div_by_zero(self): - (self.df / 0) + self.df / 0 def time_frame_float_floor_by_zero(self): - (self.df // 0) + self.df // 0 def time_frame_int_div_by_zero(self): - (self.df_int / 0) + self.df_int / 0 - ## Modulo + # Modulo def time_frame_int_mod(self): - (self.df / self.df2) + self.df_int % self.df2_int def time_frame_float_mod(self): - (self.df / self.df2) + self.df % self.df2 class Timeseries(object): + goal_time = 0.2 - def setup(self): - self.N = 1000000 + params = [None, 'US/Eastern'] + + def setup(self, tz): + self.N = 10**6 self.halfway = ((self.N // 2) - 1) - self.s = Series(date_range('20010101', periods=self.N, freq='T')) + self.s = Series(date_range('20010101', periods=self.N, freq='T', + tz=tz)) self.ts = self.s[self.halfway] - self.s2 = Series(date_range('20010101', periods=self.N, freq='s')) + self.s2 = Series(date_range('20010101', periods=self.N, freq='s', + tz=tz)) - def time_series_timestamp_compare(self): - (self.s <= self.ts) + def time_series_timestamp_compare(self, tz): + self.s <= self.ts - def time_timestamp_series_compare(self): - (self.ts >= self.s) + def time_timestamp_series_compare(self, tz): + self.ts >= self.s - def time_timestamp_ops_diff1(self): + def time_timestamp_ops_diff(self, tz): self.s2.diff() - def time_timestamp_ops_diff2(self): - (self.s - self.s.shift()) - - - -class TimeseriesTZ(Timeseries): - - def setup(self): - self.N = 1000000 - self.halfway = ((self.N // 2) - 1) - self.s = Series(date_range('20010101', periods=self.N, freq='T', tz='US/Eastern')) - self.ts = self.s[self.halfway] - - self.s2 = Series(date_range('20010101', periods=self.N, freq='s', tz='US/Eastern')) + def time_timestamp_ops_diff_with_shift(self, tz): + self.s - self.s.shift() From f9c1c71e1139c82bd662ebeee5278d5e4341a312 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 23 Nov 2017 17:09:58 -0800 Subject: [PATCH 2/3] Add param_names --- asv_bench/benchmarks/binary_ops.py | 1 + 1 file changed, 1 insertion(+) diff --git a/asv_bench/benchmarks/binary_ops.py b/asv_bench/benchmarks/binary_ops.py index 8e1ff8f5e6944..fbe2227a84674 100644 --- a/asv_bench/benchmarks/binary_ops.py +++ b/asv_bench/benchmarks/binary_ops.py @@ -85,6 +85,7 @@ class Timeseries(object): goal_time = 0.2 params = [None, 'US/Eastern'] + param_names = ['timezone'] def setup(self, tz): self.N = 10**6 From 371138ad4e3596a4d763463bd3c9bcaf7c185bab Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 24 Nov 2017 16:34:59 -0800 Subject: [PATCH 3/3] timezone -> tz --- asv_bench/benchmarks/binary_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/binary_ops.py b/asv_bench/benchmarks/binary_ops.py index fbe2227a84674..429965c06cb48 100644 --- a/asv_bench/benchmarks/binary_ops.py +++ b/asv_bench/benchmarks/binary_ops.py @@ -85,7 +85,7 @@ class Timeseries(object): goal_time = 0.2 params = [None, 'US/Eastern'] - param_names = ['timezone'] + param_names = ['tz'] def setup(self, tz): self.N = 10**6