diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index 760db2086b125..239f9aa19f769 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -18,7 +18,7 @@ // If missing or the empty string, the tool will be automatically // determined by looking for tools on the PATH environment // variable. - "environment_type": "conda", + "environment_type": "", // the base URL to show a commit for the project. "show_commit_url": "https://github.com/pydata/pandas/commit/", @@ -26,7 +26,7 @@ // The Pythons you'd like to test against. If not provided, defaults // to the current version of Python used to run `asv`. // "pythons": ["2.7", "3.4"], - "pythons": ["2.7"], + "pythons": ["2.7", "3.4"], // The matrix of dependencies to test. Each key is the name of a // package (in PyPI) and the values are version numbers. An empty @@ -41,7 +41,10 @@ "sqlalchemy": [], "scipy": [], "numexpr": [], - "pytables": [], + "tables": [], + "openpyxl": [], + "xlrd": [], + "xlwt": [] }, // The directory (relative to the current directory) that benchmarks are diff --git a/asv_bench/benchmarks/attrs_caching.py b/asv_bench/benchmarks/attrs_caching.py index ecb91923dc663..2b10cb88a3134 100644 --- a/asv_bench/benchmarks/attrs_caching.py +++ b/asv_bench/benchmarks/attrs_caching.py @@ -1,4 +1,4 @@ -from pandas_vb_common import * +from .pandas_vb_common import * class getattr_dataframe_index(object): diff --git a/asv_bench/benchmarks/binary_ops.py b/asv_bench/benchmarks/binary_ops.py index 13976014ec6f1..187101b1f392b 100644 --- a/asv_bench/benchmarks/binary_ops.py +++ b/asv_bench/benchmarks/binary_ops.py @@ -1,4 +1,4 @@ -from pandas_vb_common import * +from .pandas_vb_common import * import pandas.computation.expressions as expr diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py index 80b277336df7a..a449639f1560e 100644 --- a/asv_bench/benchmarks/categoricals.py +++ b/asv_bench/benchmarks/categoricals.py @@ -1,4 +1,4 @@ -from pandas_vb_common import * +from .pandas_vb_common import * class concat_categorical(object): diff --git a/asv_bench/benchmarks/ctors.py b/asv_bench/benchmarks/ctors.py index b48211b3db83e..265ffbc7261ca 100644 --- a/asv_bench/benchmarks/ctors.py +++ b/asv_bench/benchmarks/ctors.py @@ -1,4 +1,4 @@ -from pandas_vb_common import * +from .pandas_vb_common import * class frame_constructor_ndarray(object): diff --git a/asv_bench/benchmarks/eval.py b/asv_bench/benchmarks/eval.py index 397312355aa47..719d92567a7be 100644 --- a/asv_bench/benchmarks/eval.py +++ b/asv_bench/benchmarks/eval.py @@ -1,6 +1,6 @@ -from pandas_vb_common import * -import pandas.computation.expressions as expr +from .pandas_vb_common import * import pandas as pd +import pandas.computation.expressions as expr class eval_frame_add_all_threads(object): diff --git a/asv_bench/benchmarks/frame_ctor.py b/asv_bench/benchmarks/frame_ctor.py index 2cb337e0e6b9d..85f3c1628bd8b 100644 --- a/asv_bench/benchmarks/frame_ctor.py +++ b/asv_bench/benchmarks/frame_ctor.py @@ -1,4 +1,4 @@ -from pandas_vb_common import * +from .pandas_vb_common import * try: from pandas.tseries.offsets import * except: @@ -9,1611 +9,1611 @@ class frame_ctor_dtindex_BDayx1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(BDay(1, **{})) + self.idx = self.get_index_for_offset(BDay(1, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_BDayx1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_BDayx2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(BDay(2, **{})) + self.idx = self.get_index_for_offset(BDay(2, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_BDayx2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_BMonthBeginx1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(BMonthBegin(1, **{})) + self.idx = self.get_index_for_offset(BMonthBegin(1, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_BMonthBeginx1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_BMonthBeginx2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(BMonthBegin(2, **{})) + self.idx = self.get_index_for_offset(BMonthBegin(2, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_BMonthBeginx2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_BMonthEndx1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(BMonthEnd(1, **{})) + self.idx = self.get_index_for_offset(BMonthEnd(1, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_BMonthEndx1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_BMonthEndx2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(BMonthEnd(2, **{})) + self.idx = self.get_index_for_offset(BMonthEnd(2, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_BMonthEndx2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_BQuarterBeginx1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(BQuarterBegin(1, **{})) + self.idx = self.get_index_for_offset(BQuarterBegin(1, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_BQuarterBeginx1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_BQuarterBeginx2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(BQuarterBegin(2, **{})) + self.idx = self.get_index_for_offset(BQuarterBegin(2, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_BQuarterBeginx2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_BQuarterEndx1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(BQuarterEnd(1, **{})) + self.idx = self.get_index_for_offset(BQuarterEnd(1, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_BQuarterEndx1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_BQuarterEndx2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(BQuarterEnd(2, **{})) + self.idx = self.get_index_for_offset(BQuarterEnd(2, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_BQuarterEndx2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_BYearBeginx1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(BYearBegin(1, **{})) + self.idx = self.get_index_for_offset(BYearBegin(1, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_BYearBeginx1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_BYearBeginx2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(BYearBegin(2, **{})) + self.idx = self.get_index_for_offset(BYearBegin(2, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_BYearBeginx2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_BYearEndx1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(BYearEnd(1, **{})) + self.idx = self.get_index_for_offset(BYearEnd(1, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_BYearEndx1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_BYearEndx2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(BYearEnd(2, **{})) + self.idx = self.get_index_for_offset(BYearEnd(2, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_BYearEndx2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_BusinessDayx1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(BusinessDay(1, **{})) + self.idx = self.get_index_for_offset(BusinessDay(1, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_BusinessDayx1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_BusinessDayx2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(BusinessDay(2, **{})) + self.idx = self.get_index_for_offset(BusinessDay(2, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_BusinessDayx2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_BusinessHourx1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(BusinessHour(1, **{})) + self.idx = self.get_index_for_offset(BusinessHour(1, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_BusinessHourx1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_BusinessHourx2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(BusinessHour(2, **{})) + self.idx = self.get_index_for_offset(BusinessHour(2, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_BusinessHourx2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_CBMonthBeginx1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(CBMonthBegin(1, **{})) + self.idx = self.get_index_for_offset(CBMonthBegin(1, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_CBMonthBeginx1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_CBMonthBeginx2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(CBMonthBegin(2, **{})) + self.idx = self.get_index_for_offset(CBMonthBegin(2, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_CBMonthBeginx2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_CBMonthEndx1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(CBMonthEnd(1, **{})) + self.idx = self.get_index_for_offset(CBMonthEnd(1, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_CBMonthEndx1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_CBMonthEndx2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(CBMonthEnd(2, **{})) + self.idx = self.get_index_for_offset(CBMonthEnd(2, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_CBMonthEndx2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_CDayx1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(CDay(1, **{})) + self.idx = self.get_index_for_offset(CDay(1, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_CDayx1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_CDayx2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(CDay(2, **{})) + self.idx = self.get_index_for_offset(CDay(2, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_CDayx2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_CustomBusinessDayx1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(CustomBusinessDay(1, **{})) + self.idx = self.get_index_for_offset(CustomBusinessDay(1, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_CustomBusinessDayx1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_CustomBusinessDayx2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(CustomBusinessDay(2, **{})) + self.idx = self.get_index_for_offset(CustomBusinessDay(2, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_CustomBusinessDayx2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_DateOffsetx1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(DateOffset(1, **{})) + self.idx = self.get_index_for_offset(DateOffset(1, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_DateOffsetx1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_DateOffsetx2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(DateOffset(2, **{})) + self.idx = self.get_index_for_offset(DateOffset(2, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_DateOffsetx2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_Dayx1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(Day(1, **{})) + self.idx = self.get_index_for_offset(Day(1, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_Dayx1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_Dayx2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(Day(2, **{})) + self.idx = self.get_index_for_offset(Day(2, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_Dayx2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_Easterx1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(Easter(1, **{})) + self.idx = self.get_index_for_offset(Easter(1, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_Easterx1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_Easterx2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(Easter(2, **{})) + self.idx = self.get_index_for_offset(Easter(2, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_Easterx2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_FY5253Quarterx1__variation_last(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(FY5253Quarter(1, **{'startingMonth': 1, 'qtr_with_extra_week': 1, 'weekday': 1, 'variation': 'last', })) + self.idx = self.get_index_for_offset(FY5253Quarter(1, **{'startingMonth': 1, 'qtr_with_extra_week': 1, 'weekday': 1, 'variation': 'last', })) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_FY5253Quarterx1__variation_last(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_FY5253Quarterx1__variation_nearest(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(FY5253Quarter(1, **{'startingMonth': 1, 'qtr_with_extra_week': 1, 'weekday': 1, 'variation': 'nearest', })) + self.idx = self.get_index_for_offset(FY5253Quarter(1, **{'startingMonth': 1, 'qtr_with_extra_week': 1, 'weekday': 1, 'variation': 'nearest', })) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_FY5253Quarterx1__variation_nearest(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_FY5253Quarterx2__variation_last(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(FY5253Quarter(2, **{'startingMonth': 1, 'qtr_with_extra_week': 1, 'weekday': 1, 'variation': 'last', })) + self.idx = self.get_index_for_offset(FY5253Quarter(2, **{'startingMonth': 1, 'qtr_with_extra_week': 1, 'weekday': 1, 'variation': 'last', })) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_FY5253Quarterx2__variation_last(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_FY5253Quarterx2__variation_nearest(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(FY5253Quarter(2, **{'startingMonth': 1, 'qtr_with_extra_week': 1, 'weekday': 1, 'variation': 'nearest', })) + self.idx = self.get_index_for_offset(FY5253Quarter(2, **{'startingMonth': 1, 'qtr_with_extra_week': 1, 'weekday': 1, 'variation': 'nearest', })) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_FY5253Quarterx2__variation_nearest(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_FY5253x1__variation_last(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(FY5253(1, **{'startingMonth': 1, 'weekday': 1, 'variation': 'last', })) + self.idx = self.get_index_for_offset(FY5253(1, **{'startingMonth': 1, 'weekday': 1, 'variation': 'last', })) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_FY5253x1__variation_last(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_FY5253x1__variation_nearest(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(FY5253(1, **{'startingMonth': 1, 'weekday': 1, 'variation': 'nearest', })) + self.idx = self.get_index_for_offset(FY5253(1, **{'startingMonth': 1, 'weekday': 1, 'variation': 'nearest', })) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_FY5253x1__variation_nearest(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_FY5253x2__variation_last(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(FY5253(2, **{'startingMonth': 1, 'weekday': 1, 'variation': 'last', })) + self.idx = self.get_index_for_offset(FY5253(2, **{'startingMonth': 1, 'weekday': 1, 'variation': 'last', })) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_FY5253x2__variation_last(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_FY5253x2__variation_nearest(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(FY5253(2, **{'startingMonth': 1, 'weekday': 1, 'variation': 'nearest', })) + self.idx = self.get_index_for_offset(FY5253(2, **{'startingMonth': 1, 'weekday': 1, 'variation': 'nearest', })) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_FY5253x2__variation_nearest(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_Hourx1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(Hour(1, **{})) + self.idx = self.get_index_for_offset(Hour(1, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_Hourx1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_Hourx2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(Hour(2, **{})) + self.idx = self.get_index_for_offset(Hour(2, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_Hourx2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_LastWeekOfMonthx1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(LastWeekOfMonth(1, **{'week': 1, 'weekday': 1, })) + self.idx = self.get_index_for_offset(LastWeekOfMonth(1, **{'week': 1, 'weekday': 1, })) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_LastWeekOfMonthx1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_LastWeekOfMonthx2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(LastWeekOfMonth(2, **{'week': 1, 'weekday': 1, })) + self.idx = self.get_index_for_offset(LastWeekOfMonth(2, **{'week': 1, 'weekday': 1, })) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_LastWeekOfMonthx2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_Microx1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(Micro(1, **{})) + self.idx = self.get_index_for_offset(Micro(1, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_Microx1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_Microx2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(Micro(2, **{})) + self.idx = self.get_index_for_offset(Micro(2, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_Microx2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_Millix1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(Milli(1, **{})) + self.idx = self.get_index_for_offset(Milli(1, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_Millix1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_Millix2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(Milli(2, **{})) + self.idx = self.get_index_for_offset(Milli(2, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_Millix2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_Minutex1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(Minute(1, **{})) + self.idx = self.get_index_for_offset(Minute(1, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_Minutex1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_Minutex2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(Minute(2, **{})) + self.idx = self.get_index_for_offset(Minute(2, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_Minutex2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_MonthBeginx1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(MonthBegin(1, **{})) + self.idx = self.get_index_for_offset(MonthBegin(1, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_MonthBeginx1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_MonthBeginx2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(MonthBegin(2, **{})) + self.idx = self.get_index_for_offset(MonthBegin(2, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_MonthBeginx2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_MonthEndx1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(MonthEnd(1, **{})) + self.idx = self.get_index_for_offset(MonthEnd(1, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_MonthEndx1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_MonthEndx2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(MonthEnd(2, **{})) + self.idx = self.get_index_for_offset(MonthEnd(2, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_MonthEndx2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_Nanox1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(Nano(1, **{})) + self.idx = self.get_index_for_offset(Nano(1, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_Nanox1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_Nanox2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(Nano(2, **{})) + self.idx = self.get_index_for_offset(Nano(2, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_Nanox2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_QuarterBeginx1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(QuarterBegin(1, **{})) + self.idx = self.get_index_for_offset(QuarterBegin(1, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_QuarterBeginx1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_QuarterBeginx2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(QuarterBegin(2, **{})) + self.idx = self.get_index_for_offset(QuarterBegin(2, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_QuarterBeginx2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_QuarterEndx1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(QuarterEnd(1, **{})) + self.idx = self.get_index_for_offset(QuarterEnd(1, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_QuarterEndx1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_QuarterEndx2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(QuarterEnd(2, **{})) + self.idx = self.get_index_for_offset(QuarterEnd(2, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_QuarterEndx2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_Secondx1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(Second(1, **{})) + self.idx = self.get_index_for_offset(Second(1, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_Secondx1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_Secondx2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(Second(2, **{})) + self.idx = self.get_index_for_offset(Second(2, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_Secondx2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_WeekOfMonthx1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(WeekOfMonth(1, **{'week': 1, 'weekday': 1, })) + self.idx = self.get_index_for_offset(WeekOfMonth(1, **{'week': 1, 'weekday': 1, })) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_WeekOfMonthx1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_WeekOfMonthx2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(WeekOfMonth(2, **{'week': 1, 'weekday': 1, })) + self.idx = self.get_index_for_offset(WeekOfMonth(2, **{'week': 1, 'weekday': 1, })) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_WeekOfMonthx2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_Weekx1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(Week(1, **{})) + self.idx = self.get_index_for_offset(Week(1, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_Weekx1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_Weekx2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(Week(2, **{})) + self.idx = self.get_index_for_offset(Week(2, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_Weekx2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_YearBeginx1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(YearBegin(1, **{})) + self.idx = self.get_index_for_offset(YearBegin(1, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_YearBeginx1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_YearBeginx2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(YearBegin(2, **{})) + self.idx = self.get_index_for_offset(YearBegin(2, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_YearBeginx2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_YearEndx1(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(YearEnd(1, **{})) + self.idx = self.get_index_for_offset(YearEnd(1, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_YearEndx1(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_dtindex_YearEndx2(object): goal_time = 0.2 def setup(self): - - def get_period_count(start_date, off): - self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (self.ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) - - def get_index_for_offset(off): - self.start_date = Timestamp('1/1/1900') - return date_range(self.start_date, periods=min(1000, get_period_count(self.start_date, off)), freq=off) - self.idx = get_index_for_offset(YearEnd(2, **{})) + self.idx = self.get_index_for_offset(YearEnd(2, **{})) self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) self.d = dict([(col, self.df[col]) for col in self.df.columns]) def time_frame_ctor_dtindex_YearEndx2(self): DataFrame(self.d) + def get_period_count(self, start_date, off): + self.ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days + if (self.ten_offsets_in_days == 0): + return 1000 + else: + return min((9 * ((Timestamp.max - start_date).days // self.ten_offsets_in_days)), 1000) + + def get_index_for_offset(self, off): + self.start_date = Timestamp('1/1/1900') + return date_range(self.start_date, periods=min(1000, self.get_period_count(self.start_date, off)), freq=off) + class frame_ctor_list_of_dict(object): goal_time = 0.2 @@ -1657,7 +1657,7 @@ class frame_ctor_nested_dict_int64(object): goal_time = 0.2 def setup(self): - self.data = dict(((i, dict(((j, float(j)) for j in xrange(100)))) for i in xrange(2000))) + self.data = dict(((i, dict(((j, float(j)) for j in range(100)))) for i in xrange(2000))) def time_frame_ctor_nested_dict_int64(self): DataFrame(self.data) diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index 2bd51201b45ca..98b0ec73fb23c 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -1,4 +1,4 @@ -from pandas_vb_common import * +from .pandas_vb_common import * class frame_apply_axis_1(object): @@ -69,12 +69,12 @@ def setup(self): self.idx = date_range('1/1/2000', periods=100000, freq='D') self.df = DataFrame(randn(100000, 1), columns=['A'], index=self.idx) - def f(x): - self.x = self.x.copy() - self.x['date'] = self.x.index - def time_frame_assign_timeseries_index(self): - f(self.df) + self.f(self.df) + + def f(self, df): + self.x = self.df.copy() + self.x['date'] = self.x.index class frame_boolean_row_select(object): @@ -339,80 +339,76 @@ class frame_float_equal(object): goal_time = 0.2 def setup(self): - - def make_pair(frame): - self.df = frame - self.df2 = self.df.copy() - self.df2.ix[((-1), (-1))] = np.nan - return (self.df, self.df2) - - def test_equal(name): - (self.df, self.df2) = pairs[name] - return self.df.equals(self.df) - - def test_unequal(name): - (self.df, self.df2) = pairs[name] - return self.df.equals(self.df2) self.float_df = DataFrame(np.random.randn(1000, 1000)) self.object_df = DataFrame(([(['foo'] * 1000)] * 1000)) self.nonunique_cols = self.object_df.copy() self.nonunique_cols.columns = (['A'] * len(self.nonunique_cols.columns)) - self.pairs = dict([(name, make_pair(frame)) for (name, frame) in (('float_df', self.float_df), ('object_df', self.object_df), ('nonunique_cols', self.nonunique_cols))]) + self.pairs = dict([(name, self.make_pair(frame)) for (name, frame) in (('float_df', self.float_df), ('object_df', self.object_df), ('nonunique_cols', self.nonunique_cols))]) def time_frame_float_equal(self): - test_equal('float_df') + self.test_equal('float_df') + def make_pair(self, frame): + self.df = frame + self.df2 = self.df.copy() + self.df2.ix[((-1), (-1))] = np.nan + return (self.df, self.df2) -class frame_float_unequal(object): - goal_time = 0.2 + def test_equal(self, name): + (self.df, self.df2) = self.pairs[name] + return self.df.equals(self.df) - def setup(self): + def test_unequal(self, name): + (self.df, self.df2) = self.pairs[name] + return self.df.equals(self.df2) - def make_pair(frame): - self.df = frame - self.df2 = self.df.copy() - self.df2.ix[((-1), (-1))] = np.nan - return (self.df, self.df2) - def test_equal(name): - (self.df, self.df2) = pairs[name] - return self.df.equals(self.df) +class frame_float_unequal(object): + goal_time = 0.2 - def test_unequal(name): - (self.df, self.df2) = pairs[name] - return self.df.equals(self.df2) + def setup(self): self.float_df = DataFrame(np.random.randn(1000, 1000)) self.object_df = DataFrame(([(['foo'] * 1000)] * 1000)) self.nonunique_cols = self.object_df.copy() self.nonunique_cols.columns = (['A'] * len(self.nonunique_cols.columns)) - self.pairs = dict([(name, make_pair(frame)) for (name, frame) in (('float_df', self.float_df), ('object_df', self.object_df), ('nonunique_cols', self.nonunique_cols))]) + self.pairs = dict([(name, self.make_pair(frame)) for (name, frame) in (('float_df', self.float_df), ('object_df', self.object_df), ('nonunique_cols', self.nonunique_cols))]) def time_frame_float_unequal(self): - test_unequal('float_df') + self.test_unequal('float_df') + def make_pair(self, frame): + self.df = frame + self.df2 = self.df.copy() + self.df2.ix[((-1), (-1))] = np.nan + return (self.df, self.df2) -class frame_from_records_generator(object): - goal_time = 0.2 + def test_equal(self, name): + (self.df, self.df2) = self.pairs[name] + return self.df.equals(self.df) - def setup(self): + def test_unequal(self, name): + (self.df, self.df2) = self.pairs[name] + return self.df.equals(self.df2) - def get_data(n=100000): - return ((x, (x * 20), (x * 100)) for x in xrange(n)) + +class frame_from_records_generator(object): + goal_time = 0.2 def time_frame_from_records_generator(self): - self.df = DataFrame.from_records(get_data()) + self.df = DataFrame.from_records(self.get_data()) + + def get_data(self, n=100000): + return ((x, (x * 20), (x * 100)) for x in range(n)) class frame_from_records_generator_nrows(object): goal_time = 0.2 - def setup(self): - - def get_data(n=100000): - return ((x, (x * 20), (x * 100)) for x in xrange(n)) - def time_frame_from_records_generator_nrows(self): - self.df = DataFrame.from_records(get_data(), nrows=1000) + self.df = DataFrame.from_records(self.get_data(), nrows=1000) + + def get_data(self, n=100000): + return ((x, (x * 20), (x * 100)) for x in range(n)) class frame_get_dtype_counts(object): @@ -433,26 +429,26 @@ def setup(self): self.df2 = DataFrame(randn(3000, 1), columns=['A']) self.df3 = DataFrame(randn(3000, 1)) - def f(): - if hasattr(self.df, '_item_cache'): - self.df._item_cache.clear() - for (name, col) in self.df.iteritems(): - pass + def time_frame_getitem_single_column(self): + self.h() - def g(): - for (name, col) in self.df.iteritems(): - pass + def f(self): + if hasattr(self.df, '_item_cache'): + self.df._item_cache.clear() + for (name, col) in self.df.iteritems(): + pass - def h(): - for i in xrange(10000): - self.df2['A'] + def g(self): + for (name, col) in self.df.iteritems(): + pass - def j(): - for i in xrange(10000): - self.df3[0] + def h(self): + for i in range(10000): + self.df2['A'] - def time_frame_getitem_single_column(self): - h() + def j(self): + for i in range(10000): + self.df3[0] class frame_getitem_single_column2(object): @@ -463,26 +459,26 @@ def setup(self): self.df2 = DataFrame(randn(3000, 1), columns=['A']) self.df3 = DataFrame(randn(3000, 1)) - def f(): - if hasattr(self.df, '_item_cache'): - self.df._item_cache.clear() - for (name, col) in self.df.iteritems(): - pass + def time_frame_getitem_single_column2(self): + self.j() - def g(): - for (name, col) in self.df.iteritems(): - pass + def f(self): + if hasattr(self.df, '_item_cache'): + self.df._item_cache.clear() + for (name, col) in self.df.iteritems(): + pass - def h(): - for i in xrange(10000): - self.df2['A'] + def g(self): + for (name, col) in self.df.iteritems(): + pass - def j(): - for i in xrange(10000): - self.df3[0] + def h(self): + for i in range(10000): + self.df2['A'] - def time_frame_getitem_single_column2(self): - j() + def j(self): + for i in range(10000): + self.df3[0] class frame_html_repr_trunc_mi(object): @@ -517,14 +513,14 @@ class frame_insert_100_columns_begin(object): def setup(self): self.N = 1000 - def f(K=100): - self.df = DataFrame(index=range(self.N)) - self.new_col = np.random.randn(self.N) - for i in range(K): - self.df.insert(0, i, self.new_col) - def time_frame_insert_100_columns_begin(self): - f() + self.f() + + def f(self, K=100): + self.df = DataFrame(index=range(self.N)) + self.new_col = np.random.randn(self.N) + for i in range(K): + self.df.insert(0, i, self.new_col) class frame_insert_500_columns_end(object): @@ -533,14 +529,14 @@ class frame_insert_500_columns_end(object): def setup(self): self.N = 1000 - def f(K=500): - self.df = DataFrame(index=range(self.N)) - self.new_col = np.random.randn(self.N) - for i in range(K): - self.df[i] = self.new_col - def time_frame_insert_500_columns_end(self): - f() + self.f() + + def f(self, K=500): + self.df = DataFrame(index=range(self.N)) + self.new_col = np.random.randn(self.N) + for i in range(K): + self.df[i] = self.new_col class frame_interpolate(object): @@ -597,26 +593,26 @@ def setup(self): self.df2 = DataFrame(randn(3000, 1), columns=['A']) self.df3 = DataFrame(randn(3000, 1)) - def f(): - if hasattr(self.df, '_item_cache'): - self.df._item_cache.clear() - for (name, col) in self.df.iteritems(): - pass + def time_frame_iteritems(self): + self.f() - def g(): - for (name, col) in self.df.iteritems(): - pass + def f(self): + if hasattr(self.df, '_item_cache'): + self.df._item_cache.clear() + for (name, col) in self.df.iteritems(): + pass - def h(): - for i in xrange(10000): - self.df2['A'] + def g(self): + for (name, col) in self.df.iteritems(): + pass - def j(): - for i in xrange(10000): - self.df3[0] + def h(self): + for i in range(10000): + self.df2['A'] - def time_frame_iteritems(self): - f() + def j(self): + for i in range(10000): + self.df3[0] class frame_iteritems_cached(object): @@ -627,26 +623,26 @@ def setup(self): self.df2 = DataFrame(randn(3000, 1), columns=['A']) self.df3 = DataFrame(randn(3000, 1)) - def f(): - if hasattr(self.df, '_item_cache'): - self.df._item_cache.clear() - for (name, col) in self.df.iteritems(): - pass + def time_frame_iteritems_cached(self): + self.g() - def g(): - for (name, col) in self.df.iteritems(): - pass + def f(self): + if hasattr(self.df, '_item_cache'): + self.df._item_cache.clear() + for (name, col) in self.df.iteritems(): + pass - def h(): - for i in xrange(10000): - self.df2['A'] + def g(self): + for (name, col) in self.df.iteritems(): + pass - def j(): - for i in xrange(10000): - self.df3[0] + def h(self): + for i in range(10000): + self.df2['A'] - def time_frame_iteritems_cached(self): - g() + def j(self): + for i in range(10000): + self.df3[0] class frame_mask_bools(object): @@ -681,112 +677,112 @@ class frame_nonunique_equal(object): goal_time = 0.2 def setup(self): - - def make_pair(frame): - self.df = frame - self.df2 = self.df.copy() - self.df2.ix[((-1), (-1))] = np.nan - return (self.df, self.df2) - - def test_equal(name): - (self.df, self.df2) = pairs[name] - return self.df.equals(self.df) - - def test_unequal(name): - (self.df, self.df2) = pairs[name] - return self.df.equals(self.df2) self.float_df = DataFrame(np.random.randn(1000, 1000)) self.object_df = DataFrame(([(['foo'] * 1000)] * 1000)) self.nonunique_cols = self.object_df.copy() self.nonunique_cols.columns = (['A'] * len(self.nonunique_cols.columns)) - self.pairs = dict([(name, make_pair(frame)) for (name, frame) in (('float_df', self.float_df), ('object_df', self.object_df), ('nonunique_cols', self.nonunique_cols))]) + self.pairs = dict([(name, self.make_pair(frame)) for (name, frame) in (('float_df', self.float_df), ('object_df', self.object_df), ('nonunique_cols', self.nonunique_cols))]) def time_frame_nonunique_equal(self): - test_equal('nonunique_cols') + self.test_equal('nonunique_cols') + def make_pair(self, frame): + self.df = frame + self.df2 = self.df.copy() + self.df2.ix[((-1), (-1))] = np.nan + return (self.df, self.df2) -class frame_nonunique_unequal(object): - goal_time = 0.2 + def test_equal(self, name): + (self.df, self.df2) = self.pairs[name] + return self.df.equals(self.df) - def setup(self): + def test_unequal(self, name): + (self.df, self.df2) = self.pairs[name] + return self.df.equals(self.df2) - def make_pair(frame): - self.df = frame - self.df2 = self.df.copy() - self.df2.ix[((-1), (-1))] = np.nan - return (self.df, self.df2) - def test_equal(name): - (self.df, self.df2) = pairs[name] - return self.df.equals(self.df) +class frame_nonunique_unequal(object): + goal_time = 0.2 - def test_unequal(name): - (self.df, self.df2) = pairs[name] - return self.df.equals(self.df2) + def setup(self): self.float_df = DataFrame(np.random.randn(1000, 1000)) self.object_df = DataFrame(([(['foo'] * 1000)] * 1000)) self.nonunique_cols = self.object_df.copy() self.nonunique_cols.columns = (['A'] * len(self.nonunique_cols.columns)) - self.pairs = dict([(name, make_pair(frame)) for (name, frame) in (('float_df', self.float_df), ('object_df', self.object_df), ('nonunique_cols', self.nonunique_cols))]) + self.pairs = dict([(name, self.make_pair(frame)) for (name, frame) in (('float_df', self.float_df), ('object_df', self.object_df), ('nonunique_cols', self.nonunique_cols))]) def time_frame_nonunique_unequal(self): - test_unequal('nonunique_cols') + self.test_unequal('nonunique_cols') + def make_pair(self, frame): + self.df = frame + self.df2 = self.df.copy() + self.df2.ix[((-1), (-1))] = np.nan + return (self.df, self.df2) -class frame_object_equal(object): - goal_time = 0.2 + def test_equal(self, name): + (self.df, self.df2) = self.pairs[name] + return self.df.equals(self.df) - def setup(self): + def test_unequal(self, name): + (self.df, self.df2) = self.pairs[name] + return self.df.equals(self.df2) - def make_pair(frame): - self.df = frame - self.df2 = self.df.copy() - self.df2.ix[((-1), (-1))] = np.nan - return (self.df, self.df2) - def test_equal(name): - (self.df, self.df2) = pairs[name] - return self.df.equals(self.df) +class frame_object_equal(object): + goal_time = 0.2 - def test_unequal(name): - (self.df, self.df2) = pairs[name] - return self.df.equals(self.df2) + def setup(self): self.float_df = DataFrame(np.random.randn(1000, 1000)) self.object_df = DataFrame(([(['foo'] * 1000)] * 1000)) self.nonunique_cols = self.object_df.copy() self.nonunique_cols.columns = (['A'] * len(self.nonunique_cols.columns)) - self.pairs = dict([(name, make_pair(frame)) for (name, frame) in (('float_df', self.float_df), ('object_df', self.object_df), ('nonunique_cols', self.nonunique_cols))]) + self.pairs = dict([(name, self.make_pair(frame)) for (name, frame) in (('float_df', self.float_df), ('object_df', self.object_df), ('nonunique_cols', self.nonunique_cols))]) def time_frame_object_equal(self): - test_equal('object_df') + self.test_equal('object_df') + def make_pair(self, frame): + self.df = frame + self.df2 = self.df.copy() + self.df2.ix[((-1), (-1))] = np.nan + return (self.df, self.df2) -class frame_object_unequal(object): - goal_time = 0.2 + def test_equal(self, name): + (self.df, self.df2) = self.pairs[name] + return self.df.equals(self.df) - def setup(self): + def test_unequal(self, name): + (self.df, self.df2) = self.pairs[name] + return self.df.equals(self.df2) - def make_pair(frame): - self.df = frame - self.df2 = self.df.copy() - self.df2.ix[((-1), (-1))] = np.nan - return (self.df, self.df2) - def test_equal(name): - (self.df, self.df2) = pairs[name] - return self.df.equals(self.df) +class frame_object_unequal(object): + goal_time = 0.2 - def test_unequal(name): - (self.df, self.df2) = pairs[name] - return self.df.equals(self.df2) + def setup(self): self.float_df = DataFrame(np.random.randn(1000, 1000)) self.object_df = DataFrame(([(['foo'] * 1000)] * 1000)) self.nonunique_cols = self.object_df.copy() self.nonunique_cols.columns = (['A'] * len(self.nonunique_cols.columns)) - self.pairs = dict([(name, make_pair(frame)) for (name, frame) in (('float_df', self.float_df), ('object_df', self.object_df), ('nonunique_cols', self.nonunique_cols))]) + self.pairs = dict([(name, self.make_pair(frame)) for (name, frame) in (('float_df', self.float_df), ('object_df', self.object_df), ('nonunique_cols', self.nonunique_cols))]) def time_frame_object_unequal(self): - test_unequal('object_df') + self.test_unequal('object_df') + + def make_pair(self, frame): + self.df = frame + self.df2 = self.df.copy() + self.df2.ix[((-1), (-1))] = np.nan + return (self.df, self.df2) + + def test_equal(self, name): + (self.df, self.df2) = self.pairs[name] + return self.df.equals(self.df) + + def test_unequal(self, name): + (self.df, self.df2) = self.pairs[name] + return self.df.equals(self.df2) class frame_reindex_axis0(object): diff --git a/asv_bench/benchmarks/gil.py b/asv_bench/benchmarks/gil.py index b0486617a52af..556dd2c364cdf 100644 --- a/asv_bench/benchmarks/gil.py +++ b/asv_bench/benchmarks/gil.py @@ -1,6 +1,16 @@ -from pandas_vb_common import * +from .pandas_vb_common import * from pandas.core import common as com -from pandas.util.testing import test_parallel +try: + from pandas.util.testing import test_parallel + have_real_test_parallel = True +except ImportError: + have_real_test_parallel = False + + def test_parallel(num_threads=1): + + def wrapper(fname): + return fname + return wrapper class nogil_groupby_count_2(object): @@ -11,13 +21,15 @@ def setup(self): self.ngroups = 1000 np.random.seed(1234) self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), }) - - @test_parallel(num_threads=2) - def pg2(): - self.df.groupby('key')['data'].count() + if (not have_real_test_parallel): + raise NotImplementedError def time_nogil_groupby_count_2(self): - pg2() + self.pg2() + + @test_parallel(num_threads=2) + def pg2(self): + self.df.groupby('key')['data'].count() class nogil_groupby_last_2(object): @@ -28,13 +40,15 @@ def setup(self): self.ngroups = 1000 np.random.seed(1234) self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), }) - - @test_parallel(num_threads=2) - def pg2(): - self.df.groupby('key')['data'].last() + if (not have_real_test_parallel): + raise NotImplementedError def time_nogil_groupby_last_2(self): - pg2() + self.pg2() + + @test_parallel(num_threads=2) + def pg2(self): + self.df.groupby('key')['data'].last() class nogil_groupby_max_2(object): @@ -45,13 +59,15 @@ def setup(self): self.ngroups = 1000 np.random.seed(1234) self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), }) - - @test_parallel(num_threads=2) - def pg2(): - self.df.groupby('key')['data'].max() + if (not have_real_test_parallel): + raise NotImplementedError def time_nogil_groupby_max_2(self): - pg2() + self.pg2() + + @test_parallel(num_threads=2) + def pg2(self): + self.df.groupby('key')['data'].max() class nogil_groupby_mean_2(object): @@ -62,13 +78,15 @@ def setup(self): self.ngroups = 1000 np.random.seed(1234) self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), }) - - @test_parallel(num_threads=2) - def pg2(): - self.df.groupby('key')['data'].mean() + if (not have_real_test_parallel): + raise NotImplementedError def time_nogil_groupby_mean_2(self): - pg2() + self.pg2() + + @test_parallel(num_threads=2) + def pg2(self): + self.df.groupby('key')['data'].mean() class nogil_groupby_min_2(object): @@ -79,13 +97,15 @@ def setup(self): self.ngroups = 1000 np.random.seed(1234) self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), }) - - @test_parallel(num_threads=2) - def pg2(): - self.df.groupby('key')['data'].min() + if (not have_real_test_parallel): + raise NotImplementedError def time_nogil_groupby_min_2(self): - pg2() + self.pg2() + + @test_parallel(num_threads=2) + def pg2(self): + self.df.groupby('key')['data'].min() class nogil_groupby_prod_2(object): @@ -96,13 +116,15 @@ def setup(self): self.ngroups = 1000 np.random.seed(1234) self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), }) - - @test_parallel(num_threads=2) - def pg2(): - self.df.groupby('key')['data'].prod() + if (not have_real_test_parallel): + raise NotImplementedError def time_nogil_groupby_prod_2(self): - pg2() + self.pg2() + + @test_parallel(num_threads=2) + def pg2(self): + self.df.groupby('key')['data'].prod() class nogil_groupby_sum_2(object): @@ -113,13 +135,15 @@ def setup(self): self.ngroups = 1000 np.random.seed(1234) self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), }) - - @test_parallel(num_threads=2) - def pg2(): - self.df.groupby('key')['data'].sum() + if (not have_real_test_parallel): + raise NotImplementedError def time_nogil_groupby_sum_2(self): - pg2() + self.pg2() + + @test_parallel(num_threads=2) + def pg2(self): + self.df.groupby('key')['data'].sum() class nogil_groupby_sum_4(object): @@ -130,36 +154,38 @@ def setup(self): self.ngroups = 1000 np.random.seed(1234) self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), }) + if (not have_real_test_parallel): + raise NotImplementedError - def f(): - self.df.groupby('key')['data'].sum() + def time_nogil_groupby_sum_4(self): + self.pg4() - def g2(): - for i in range(2): - f() + def f(self): + self.df.groupby('key')['data'].sum() - def g4(): - for i in range(4): - f() + def g2(self): + for i in range(2): + self.f() - def g8(): - for i in range(8): - f() + def g4(self): + for i in range(4): + self.f() - @test_parallel(num_threads=2) - def pg2(): - f() + def g8(self): + for i in range(8): + self.f() - @test_parallel(num_threads=4) - def pg4(): - f() + @test_parallel(num_threads=2) + def pg2(self): + self.f() - @test_parallel(num_threads=8) - def pg8(): - f() + @test_parallel(num_threads=4) + def pg4(self): + self.f() - def time_nogil_groupby_sum_4(self): - pg4() + @test_parallel(num_threads=8) + def pg8(self): + self.f() class nogil_groupby_sum_8(object): @@ -170,36 +196,38 @@ def setup(self): self.ngroups = 1000 np.random.seed(1234) self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), }) + if (not have_real_test_parallel): + raise NotImplementedError - def f(): - self.df.groupby('key')['data'].sum() + def time_nogil_groupby_sum_8(self): + self.pg8() - def g2(): - for i in range(2): - f() + def f(self): + self.df.groupby('key')['data'].sum() - def g4(): - for i in range(4): - f() + def g2(self): + for i in range(2): + self.f() - def g8(): - for i in range(8): - f() + def g4(self): + for i in range(4): + self.f() - @test_parallel(num_threads=2) - def pg2(): - f() + def g8(self): + for i in range(8): + self.f() - @test_parallel(num_threads=4) - def pg4(): - f() + @test_parallel(num_threads=2) + def pg2(self): + self.f() - @test_parallel(num_threads=8) - def pg8(): - f() + @test_parallel(num_threads=4) + def pg4(self): + self.f() - def time_nogil_groupby_sum_8(self): - pg8() + @test_parallel(num_threads=8) + def pg8(self): + self.f() class nogil_groupby_var_2(object): @@ -210,13 +238,15 @@ def setup(self): self.ngroups = 1000 np.random.seed(1234) self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), }) - - @test_parallel(num_threads=2) - def pg2(): - self.df.groupby('key')['data'].var() + if (not have_real_test_parallel): + raise NotImplementedError def time_nogil_groupby_var_2(self): - pg2() + self.pg2() + + @test_parallel(num_threads=2) + def pg2(self): + self.df.groupby('key')['data'].var() class nogil_take1d_float64(object): @@ -227,20 +257,22 @@ def setup(self): self.ngroups = 1000 np.random.seed(1234) self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), }) + if (not have_real_test_parallel): + raise NotImplementedError self.N = 10000000.0 self.df = DataFrame({'int64': np.arange(self.N, dtype='int64'), 'float64': np.arange(self.N, dtype='float64'), }) self.indexer = np.arange(100, (len(self.df) - 100)) - @test_parallel(num_threads=2) - def take_1d_pg2_int64(): - com.take_1d(self.df.int64.values, self.indexer) + def time_nogil_take1d_float64(self): + self.take_1d_pg2_int64() - @test_parallel(num_threads=2) - def take_1d_pg2_float64(): - com.take_1d(self.df.float64.values, self.indexer) + @test_parallel(num_threads=2) + def take_1d_pg2_int64(self): + com.take_1d(self.df.int64.values, self.indexer) - def time_nogil_take1d_float64(self): - take_1d_pg2_int64() + @test_parallel(num_threads=2) + def take_1d_pg2_float64(self): + com.take_1d(self.df.float64.values, self.indexer) class nogil_take1d_int64(object): @@ -251,17 +283,19 @@ def setup(self): self.ngroups = 1000 np.random.seed(1234) self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), }) + if (not have_real_test_parallel): + raise NotImplementedError self.N = 10000000.0 self.df = DataFrame({'int64': np.arange(self.N, dtype='int64'), 'float64': np.arange(self.N, dtype='float64'), }) self.indexer = np.arange(100, (len(self.df) - 100)) - @test_parallel(num_threads=2) - def take_1d_pg2_int64(): - com.take_1d(self.df.int64.values, self.indexer) + def time_nogil_take1d_int64(self): + self.take_1d_pg2_float64() - @test_parallel(num_threads=2) - def take_1d_pg2_float64(): - com.take_1d(self.df.float64.values, self.indexer) + @test_parallel(num_threads=2) + def take_1d_pg2_int64(self): + com.take_1d(self.df.int64.values, self.indexer) - def time_nogil_take1d_int64(self): - take_1d_pg2_float64() \ No newline at end of file + @test_parallel(num_threads=2) + def take_1d_pg2_float64(self): + com.take_1d(self.df.float64.values, self.indexer) \ No newline at end of file diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index 4f1f4e46b4a31..a84a5373651bb 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -1,6 +1,6 @@ -from pandas_vb_common import * -from itertools import product +from .pandas_vb_common import * from string import ascii_letters, digits +from itertools import product class groupby_agg_builtins1(object): @@ -128,11 +128,11 @@ def setup(self): self.labels2 = np.random.randint(0, 3, size=self.N) self.df = DataFrame({'key': self.labels, 'key2': self.labels2, 'value1': randn(self.N), 'value2': (['foo', 'bar', 'baz', 'qux'] * (self.N / 4)), }) - def f(g): - return 1 - def time_groupby_frame_apply(self): - self.df.groupby(['key', 'key2']).apply(f) + self.df.groupby(['key', 'key2']).apply(self.f) + + def f(self, g): + return 1 class groupby_frame_apply_overhead(object): @@ -144,11 +144,11 @@ def setup(self): self.labels2 = np.random.randint(0, 3, size=self.N) self.df = DataFrame({'key': self.labels, 'key2': self.labels2, 'value1': randn(self.N), 'value2': (['foo', 'bar', 'baz', 'qux'] * (self.N / 4)), }) - def f(g): - return 1 - def time_groupby_frame_apply_overhead(self): - self.df.groupby('key').apply(f) + self.df.groupby('key').apply(self.f) + + def f(self, g): + return 1 class groupby_frame_cython_many_columns(object): @@ -330,24 +330,24 @@ class groupby_multi_cython(object): def setup(self): self.N = 100000 self.ngroups = 100 - - def get_test_data(ngroups=100, n=self.N): - self.unique_groups = range(self.ngroups) - self.arr = np.asarray(np.tile(self.unique_groups, (n / self.ngroups)), dtype=object) - if (len(self.arr) < n): - self.arr = np.asarray((list(self.arr) + self.unique_groups[:(n - len(self.arr))]), dtype=object) - random.shuffle(self.arr) - return self.arr - self.df = DataFrame({'key1': get_test_data(ngroups=self.ngroups), 'key2': get_test_data(ngroups=self.ngroups), 'data1': np.random.randn(self.N), 'data2': np.random.randn(self.N), }) - - def f(): - self.df.groupby(['key1', 'key2']).agg((lambda x: x.values.sum())) + self.df = DataFrame({'key1': self.get_test_data(ngroups=self.ngroups), 'key2': self.get_test_data(ngroups=self.ngroups), 'data1': np.random.randn(self.N), 'data2': np.random.randn(self.N), }) self.simple_series = Series(np.random.randn(self.N)) self.key1 = self.df['key1'] def time_groupby_multi_cython(self): self.df.groupby(['key1', 'key2']).sum() + def get_test_data(self, ngroups=100, n=100000): + self.unique_groups = range(self.ngroups) + self.arr = np.asarray(np.tile(self.unique_groups, (n / self.ngroups)), dtype=object) + if (len(self.arr) < n): + self.arr = np.asarray((list(self.arr) + self.unique_groups[:(n - len(self.arr))]), dtype=object) + random.shuffle(self.arr) + return self.arr + + def f(self): + self.df.groupby(['key1', 'key2']).agg((lambda x: x.values.sum())) + class groupby_multi_different_functions(object): goal_time = 0.2 @@ -395,24 +395,24 @@ class groupby_multi_python(object): def setup(self): self.N = 100000 self.ngroups = 100 - - def get_test_data(ngroups=100, n=self.N): - self.unique_groups = range(self.ngroups) - self.arr = np.asarray(np.tile(self.unique_groups, (n / self.ngroups)), dtype=object) - if (len(self.arr) < n): - self.arr = np.asarray((list(self.arr) + self.unique_groups[:(n - len(self.arr))]), dtype=object) - random.shuffle(self.arr) - return self.arr - self.df = DataFrame({'key1': get_test_data(ngroups=self.ngroups), 'key2': get_test_data(ngroups=self.ngroups), 'data1': np.random.randn(self.N), 'data2': np.random.randn(self.N), }) - - def f(): - self.df.groupby(['key1', 'key2']).agg((lambda x: x.values.sum())) + self.df = DataFrame({'key1': self.get_test_data(ngroups=self.ngroups), 'key2': self.get_test_data(ngroups=self.ngroups), 'data1': np.random.randn(self.N), 'data2': np.random.randn(self.N), }) self.simple_series = Series(np.random.randn(self.N)) self.key1 = self.df['key1'] def time_groupby_multi_python(self): self.df.groupby(['key1', 'key2'])['data1'].agg((lambda x: x.values.sum())) + def get_test_data(self, ngroups=100, n=100000): + self.unique_groups = range(self.ngroups) + self.arr = np.asarray(np.tile(self.unique_groups, (n / self.ngroups)), dtype=object) + if (len(self.arr) < n): + self.arr = np.asarray((list(self.arr) + self.unique_groups[:(n - len(self.arr))]), dtype=object) + random.shuffle(self.arr) + return self.arr + + def f(self): + self.df.groupby(['key1', 'key2']).agg((lambda x: x.values.sum())) + class groupby_multi_series_op(object): goal_time = 0.2 @@ -420,24 +420,24 @@ class groupby_multi_series_op(object): def setup(self): self.N = 100000 self.ngroups = 100 - - def get_test_data(ngroups=100, n=self.N): - self.unique_groups = range(self.ngroups) - self.arr = np.asarray(np.tile(self.unique_groups, (n / self.ngroups)), dtype=object) - if (len(self.arr) < n): - self.arr = np.asarray((list(self.arr) + self.unique_groups[:(n - len(self.arr))]), dtype=object) - random.shuffle(self.arr) - return self.arr - self.df = DataFrame({'key1': get_test_data(ngroups=self.ngroups), 'key2': get_test_data(ngroups=self.ngroups), 'data1': np.random.randn(self.N), 'data2': np.random.randn(self.N), }) - - def f(): - self.df.groupby(['key1', 'key2']).agg((lambda x: x.values.sum())) + self.df = DataFrame({'key1': self.get_test_data(ngroups=self.ngroups), 'key2': self.get_test_data(ngroups=self.ngroups), 'data1': np.random.randn(self.N), 'data2': np.random.randn(self.N), }) self.simple_series = Series(np.random.randn(self.N)) self.key1 = self.df['key1'] def time_groupby_multi_series_op(self): self.df.groupby(['key1', 'key2'])['data1'].agg(np.std) + def get_test_data(self, ngroups=100, n=100000): + self.unique_groups = range(self.ngroups) + self.arr = np.asarray(np.tile(self.unique_groups, (n / self.ngroups)), dtype=object) + if (len(self.arr) < n): + self.arr = np.asarray((list(self.arr) + self.unique_groups[:(n - len(self.arr))]), dtype=object) + random.shuffle(self.arr) + return self.arr + + def f(self): + self.df.groupby(['key1', 'key2']).agg((lambda x: x.values.sum())) + class groupby_multi_size(object): goal_time = 0.2 @@ -1468,24 +1468,24 @@ class groupby_series_simple_cython(object): def setup(self): self.N = 100000 self.ngroups = 100 - - def get_test_data(ngroups=100, n=self.N): - self.unique_groups = range(self.ngroups) - self.arr = np.asarray(np.tile(self.unique_groups, (n / self.ngroups)), dtype=object) - if (len(self.arr) < n): - self.arr = np.asarray((list(self.arr) + self.unique_groups[:(n - len(self.arr))]), dtype=object) - random.shuffle(self.arr) - return self.arr - self.df = DataFrame({'key1': get_test_data(ngroups=self.ngroups), 'key2': get_test_data(ngroups=self.ngroups), 'data1': np.random.randn(self.N), 'data2': np.random.randn(self.N), }) - - def f(): - self.df.groupby(['key1', 'key2']).agg((lambda x: x.values.sum())) + self.df = DataFrame({'key1': self.get_test_data(ngroups=self.ngroups), 'key2': self.get_test_data(ngroups=self.ngroups), 'data1': np.random.randn(self.N), 'data2': np.random.randn(self.N), }) self.simple_series = Series(np.random.randn(self.N)) self.key1 = self.df['key1'] def time_groupby_series_simple_cython(self): self.df.groupby('key1').rank(pct=True) + def get_test_data(self, ngroups=100, n=100000): + self.unique_groups = range(self.ngroups) + self.arr = np.asarray(np.tile(self.unique_groups, (n / self.ngroups)), dtype=object) + if (len(self.arr) < n): + self.arr = np.asarray((list(self.arr) + self.unique_groups[:(n - len(self.arr))]), dtype=object) + random.shuffle(self.arr) + return self.arr + + def f(self): + self.df.groupby(['key1', 'key2']).agg((lambda x: x.values.sum())) + class groupby_simple_compress_timing(object): goal_time = 0.2 @@ -1535,12 +1535,12 @@ def setup(self): self.secid_max = int('F0000000', 16) self.step = ((self.secid_max - self.secid_min) // (self.n_securities - 1)) self.security_ids = map((lambda x: hex(x)[2:10].upper()), range(self.secid_min, (self.secid_max + 1), self.step)) - self.data_index = MultiIndex(levels=[self.dates.values, self.security_ids], labels=[[i for i in xrange(self.n_dates) for _ in xrange(self.n_securities)], (range(self.n_securities) * self.n_dates)], names=['date', 'security_id']) + self.data_index = MultiIndex(levels=[self.dates.values, self.security_ids], labels=[[i for i in range(self.n_dates) for _ in xrange(self.n_securities)], (range(self.n_securities) * self.n_dates)], names=['date', 'security_id']) self.n_data = len(self.data_index) - self.columns = Index(['factor{}'.format(i) for i in xrange(1, (self.n_columns + 1))]) + self.columns = Index(['factor{}'.format(i) for i in range(1, (self.n_columns + 1))]) self.data = DataFrame(np.random.randn(self.n_data, self.n_columns), index=self.data_index, columns=self.columns) self.step = int((self.n_data * self.share_na)) - for column_index in xrange(self.n_columns): + for column_index in range(self.n_columns): self.index = column_index while (self.index < self.n_data): self.data.set_value(self.data_index[self.index], self.columns[column_index], np.nan) @@ -1644,12 +1644,12 @@ def setup(self): self.secid_max = int('F0000000', 16) self.step = ((self.secid_max - self.secid_min) // (self.n_securities - 1)) self.security_ids = map((lambda x: hex(x)[2:10].upper()), range(self.secid_min, (self.secid_max + 1), self.step)) - self.data_index = MultiIndex(levels=[self.dates.values, self.security_ids], labels=[[i for i in xrange(self.n_dates) for _ in xrange(self.n_securities)], (range(self.n_securities) * self.n_dates)], names=['date', 'security_id']) + self.data_index = MultiIndex(levels=[self.dates.values, self.security_ids], labels=[[i for i in range(self.n_dates) for _ in xrange(self.n_securities)], (range(self.n_securities) * self.n_dates)], names=['date', 'security_id']) self.n_data = len(self.data_index) - self.columns = Index(['factor{}'.format(i) for i in xrange(1, (self.n_columns + 1))]) + self.columns = Index(['factor{}'.format(i) for i in range(1, (self.n_columns + 1))]) self.data = DataFrame(np.random.randn(self.n_data, self.n_columns), index=self.data_index, columns=self.columns) self.step = int((self.n_data * self.share_na)) - for column_index in xrange(self.n_columns): + for column_index in range(self.n_columns): self.index = column_index while (self.index < self.n_data): self.data.set_value(self.data_index[self.index], self.columns[column_index], np.nan) @@ -1660,6 +1660,16 @@ def time_groupby_transform_ufunc(self): self.data.groupby(level='date').transform(np.max) +class series_value_counts_float64(object): + goal_time = 0.2 + + def setup(self): + self.s = Series(np.random.randint(0, 1000, size=100000)).astype(float) + + def time_series_value_counts_float64(self): + self.s.value_counts() + + class series_value_counts_int64(object): goal_time = 0.2 diff --git a/asv_bench/benchmarks/hdfstore_bench.py b/asv_bench/benchmarks/hdfstore_bench.py index 9e36f735f8608..7638cc2a0f8df 100644 --- a/asv_bench/benchmarks/hdfstore_bench.py +++ b/asv_bench/benchmarks/hdfstore_bench.py @@ -1,4 +1,4 @@ -from pandas_vb_common import * +from .pandas_vb_common import * import os @@ -7,15 +7,9 @@ class query_store_table(object): def setup(self): self.f = '__test__.h5' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.index = date_range('1/1/2000', periods=25000) self.df = DataFrame({'float1': randn(25000), 'float2': randn(25000), }, index=self.index) - remove(self.f) + self.remove(self.f) self.store = HDFStore(self.f) self.store.append('df12', self.df) @@ -25,21 +19,21 @@ def time_query_store_table(self): def teardown(self): self.store.close() + def remove(self, f): + try: + os.remove(self.f) + except: + pass + class query_store_table_wide(object): goal_time = 0.2 def setup(self): self.f = '__test__.h5' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.index = date_range('1/1/2000', periods=25000) self.df = DataFrame(np.random.randn(25000, 100), index=self.index) - remove(self.f) + self.remove(self.f) self.store = HDFStore(self.f) self.store.append('df11', self.df) @@ -49,21 +43,21 @@ def time_query_store_table_wide(self): def teardown(self): self.store.close() + def remove(self, f): + try: + os.remove(self.f) + except: + pass + class read_store(object): goal_time = 0.2 def setup(self): self.f = '__test__.h5' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.index = tm.makeStringIndex(25000) self.df = DataFrame({'float1': randn(25000), 'float2': randn(25000), }, index=self.index) - remove(self.f) + self.remove(self.f) self.store = HDFStore(self.f) self.store.put('df1', self.df) @@ -73,21 +67,21 @@ def time_read_store(self): def teardown(self): self.store.close() + def remove(self, f): + try: + os.remove(self.f) + except: + pass + class read_store_mixed(object): goal_time = 0.2 def setup(self): self.f = '__test__.h5' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.index = tm.makeStringIndex(25000) self.df = DataFrame({'float1': randn(25000), 'float2': randn(25000), 'string1': (['foo'] * 25000), 'bool1': ([True] * 25000), 'int1': np.random.randint(0, 250000, size=25000), }, index=self.index) - remove(self.f) + self.remove(self.f) self.store = HDFStore(self.f) self.store.put('df3', self.df) @@ -97,21 +91,21 @@ def time_read_store_mixed(self): def teardown(self): self.store.close() + def remove(self, f): + try: + os.remove(self.f) + except: + pass + class read_store_table(object): goal_time = 0.2 def setup(self): self.f = '__test__.h5' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.index = tm.makeStringIndex(25000) self.df = DataFrame({'float1': randn(25000), 'float2': randn(25000), }, index=self.index) - remove(self.f) + self.remove(self.f) self.store = HDFStore(self.f) self.store.append('df7', self.df) @@ -121,22 +115,22 @@ def time_read_store_table(self): def teardown(self): self.store.close() + def remove(self, f): + try: + os.remove(self.f) + except: + pass + class read_store_table_mixed(object): goal_time = 0.2 def setup(self): self.f = '__test__.h5' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.N = 10000 self.index = tm.makeStringIndex(self.N) self.df = DataFrame({'float1': randn(self.N), 'float2': randn(self.N), 'string1': (['foo'] * self.N), 'bool1': ([True] * self.N), 'int1': np.random.randint(0, self.N, size=self.N), }, index=self.index) - remove(self.f) + self.remove(self.f) self.store = HDFStore(self.f) self.store.append('df5', self.df) @@ -146,20 +140,20 @@ def time_read_store_table_mixed(self): def teardown(self): self.store.close() + def remove(self, f): + try: + os.remove(self.f) + except: + pass + class read_store_table_panel(object): goal_time = 0.2 def setup(self): self.f = '__test__.h5' - - def remove(f): - try: - os.remove(self.f) - except: - pass - self.p = Panel(randn(20, 1000, 25), items=[('Item%03d' % i) for i in xrange(20)], major_axis=date_range('1/1/2000', periods=1000), minor_axis=[('E%03d' % i) for i in xrange(25)]) - remove(self.f) + self.p = Panel(randn(20, 1000, 25), items=[('Item%03d' % i) for i in range(20)], major_axis=date_range('1/1/2000', periods=1000), minor_axis=[('E%03d' % i) for i in range(25)]) + self.remove(self.f) self.store = HDFStore(self.f) self.store.append('p1', self.p) @@ -169,20 +163,20 @@ def time_read_store_table_panel(self): def teardown(self): self.store.close() + def remove(self, f): + try: + os.remove(self.f) + except: + pass + class read_store_table_wide(object): goal_time = 0.2 def setup(self): self.f = '__test__.h5' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.df = DataFrame(np.random.randn(25000, 100)) - remove(self.f) + self.remove(self.f) self.store = HDFStore(self.f) self.store.append('df9', self.df) @@ -192,21 +186,21 @@ def time_read_store_table_wide(self): def teardown(self): self.store.close() + def remove(self, f): + try: + os.remove(self.f) + except: + pass + class write_store(object): goal_time = 0.2 def setup(self): self.f = '__test__.h5' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.index = tm.makeStringIndex(25000) self.df = DataFrame({'float1': randn(25000), 'float2': randn(25000), }, index=self.index) - remove(self.f) + self.remove(self.f) self.store = HDFStore(self.f) def time_write_store(self): @@ -215,21 +209,21 @@ def time_write_store(self): def teardown(self): self.store.close() + def remove(self, f): + try: + os.remove(self.f) + except: + pass + class write_store_mixed(object): goal_time = 0.2 def setup(self): self.f = '__test__.h5' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.index = tm.makeStringIndex(25000) self.df = DataFrame({'float1': randn(25000), 'float2': randn(25000), 'string1': (['foo'] * 25000), 'bool1': ([True] * 25000), 'int1': np.random.randint(0, 250000, size=25000), }, index=self.index) - remove(self.f) + self.remove(self.f) self.store = HDFStore(self.f) def time_write_store_mixed(self): @@ -238,21 +232,21 @@ def time_write_store_mixed(self): def teardown(self): self.store.close() + def remove(self, f): + try: + os.remove(self.f) + except: + pass + class write_store_table(object): goal_time = 0.2 def setup(self): self.f = '__test__.h5' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.index = tm.makeStringIndex(25000) self.df = DataFrame({'float1': randn(25000), 'float2': randn(25000), }, index=self.index) - remove(self.f) + self.remove(self.f) self.store = HDFStore(self.f) def time_write_store_table(self): @@ -261,20 +255,20 @@ def time_write_store_table(self): def teardown(self): self.store.close() + def remove(self, f): + try: + os.remove(self.f) + except: + pass + class write_store_table_dc(object): goal_time = 0.2 def setup(self): self.f = '__test__.h5' - - def remove(f): - try: - os.remove(self.f) - except: - pass - self.df = DataFrame(np.random.randn(10000, 10), columns=[('C%03d' % i) for i in xrange(10)]) - remove(self.f) + self.df = DataFrame(np.random.randn(10000, 10), columns=[('C%03d' % i) for i in range(10)]) + self.remove(self.f) self.store = HDFStore(self.f) def time_write_store_table_dc(self): @@ -283,21 +277,21 @@ def time_write_store_table_dc(self): def teardown(self): self.store.close() + def remove(self, f): + try: + os.remove(self.f) + except: + pass + class write_store_table_mixed(object): goal_time = 0.2 def setup(self): self.f = '__test__.h5' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.index = tm.makeStringIndex(25000) self.df = DataFrame({'float1': randn(25000), 'float2': randn(25000), 'string1': (['foo'] * 25000), 'bool1': ([True] * 25000), 'int1': np.random.randint(0, 25000, size=25000), }, index=self.index) - remove(self.f) + self.remove(self.f) self.store = HDFStore(self.f) def time_write_store_table_mixed(self): @@ -306,20 +300,20 @@ def time_write_store_table_mixed(self): def teardown(self): self.store.close() + def remove(self, f): + try: + os.remove(self.f) + except: + pass + class write_store_table_panel(object): goal_time = 0.2 def setup(self): self.f = '__test__.h5' - - def remove(f): - try: - os.remove(self.f) - except: - pass - self.p = Panel(randn(20, 1000, 25), items=[('Item%03d' % i) for i in xrange(20)], major_axis=date_range('1/1/2000', periods=1000), minor_axis=[('E%03d' % i) for i in xrange(25)]) - remove(self.f) + self.p = Panel(randn(20, 1000, 25), items=[('Item%03d' % i) for i in range(20)], major_axis=date_range('1/1/2000', periods=1000), minor_axis=[('E%03d' % i) for i in range(25)]) + self.remove(self.f) self.store = HDFStore(self.f) def time_write_store_table_panel(self): @@ -328,24 +322,30 @@ def time_write_store_table_panel(self): def teardown(self): self.store.close() + def remove(self, f): + try: + os.remove(self.f) + except: + pass + class write_store_table_wide(object): goal_time = 0.2 def setup(self): self.f = '__test__.h5' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.df = DataFrame(np.random.randn(25000, 100)) - remove(self.f) + self.remove(self.f) self.store = HDFStore(self.f) def time_write_store_table_wide(self): self.store.append('df10', self.df) def teardown(self): - self.store.close() \ No newline at end of file + self.store.close() + + def remove(self, f): + try: + os.remove(self.f) + except: + pass \ No newline at end of file diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py index 9c181c92195ea..8c65f09937df4 100644 --- a/asv_bench/benchmarks/index_object.py +++ b/asv_bench/benchmarks/index_object.py @@ -1,4 +1,4 @@ -from pandas_vb_common import * +from .pandas_vb_common import * class datetime_index_intersection(object): @@ -248,7 +248,7 @@ class multiindex_from_product(object): goal_time = 0.2 def setup(self): - self.iterables = [tm.makeStringIndex(10000), xrange(20)] + self.iterables = [tm.makeStringIndex(10000), range(20)] def time_multiindex_from_product(self): MultiIndex.from_product(self.iterables) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index e76a87ab881c9..32d80a7913234 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -1,5 +1,8 @@ -from pandas_vb_common import * -import pandas.computation.expressions as expr +from .pandas_vb_common import * +try: + import pandas.computation.expressions as expr +except: + expr = None class dataframe_getitem_scalar(object): @@ -121,6 +124,8 @@ class indexing_dataframe_boolean_no_ne(object): goal_time = 0.2 def setup(self): + if (expr is None): + raise NotImplementedError self.df = DataFrame(np.random.randn(50000, 100)) self.df2 = DataFrame(np.random.randn(50000, 100)) expr.set_use_numexpr(False) @@ -160,6 +165,8 @@ class indexing_dataframe_boolean_st(object): goal_time = 0.2 def setup(self): + if (expr is None): + raise NotImplementedError self.df = DataFrame(np.random.randn(50000, 100)) self.df2 = DataFrame(np.random.randn(50000, 100)) expr.set_numexpr_threads(1) @@ -421,6 +428,30 @@ def time_series_loc_slice(self): self.s.loc[:800000] +class series_take_dtindex(object): + goal_time = 0.2 + + def setup(self): + self.s = Series(np.random.rand(100000)) + self.ts = Series(np.random.rand(100000), index=date_range('2011-01-01', freq='S', periods=100000)) + self.indexer = ([True, False, True, True, False] * 20000) + + def time_series_take_dtindex(self): + self.ts.take(self.indexer) + + +class series_take_intindex(object): + goal_time = 0.2 + + def setup(self): + self.s = Series(np.random.rand(100000)) + self.ts = Series(np.random.rand(100000), index=date_range('2011-01-01', freq='S', periods=100000)) + self.indexer = ([True, False, True, True, False] * 20000) + + def time_series_take_intindex(self): + self.s.take(self.indexer) + + class series_xs_mi_ix(object): goal_time = 0.2 diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py index 2addc810a218f..3fceed087facb 100644 --- a/asv_bench/benchmarks/inference.py +++ b/asv_bench/benchmarks/inference.py @@ -1,4 +1,4 @@ -from pandas_vb_common import * +from .pandas_vb_common import * import pandas as pd diff --git a/asv_bench/benchmarks/io_bench.py b/asv_bench/benchmarks/io_bench.py index 9eee932de8b7c..a171641502d3c 100644 --- a/asv_bench/benchmarks/io_bench.py +++ b/asv_bench/benchmarks/io_bench.py @@ -1,6 +1,9 @@ -from pandas_vb_common import * +from .pandas_vb_common import * from pandas import concat, Timestamp -from StringIO import StringIO +try: + from StringIO import StringIO +except ImportError: + from io import StringIO class frame_to_csv(object): @@ -41,20 +44,20 @@ class frame_to_csv_mixed(object): goal_time = 0.2 def setup(self): - - def create_cols(name): - return [('%s%03d' % (name, i)) for i in xrange(5)] - self.df_float = DataFrame(np.random.randn(5000, 5), dtype='float64', columns=create_cols('float')) - self.df_int = DataFrame(np.random.randn(5000, 5), dtype='int64', columns=create_cols('int')) - self.df_bool = DataFrame(True, index=self.df_float.index, columns=create_cols('bool')) - self.df_object = DataFrame('foo', index=self.df_float.index, columns=create_cols('object')) - self.df_dt = DataFrame(Timestamp('20010101'), index=self.df_float.index, columns=create_cols('date')) + self.df_float = DataFrame(np.random.randn(5000, 5), dtype='float64', columns=self.create_cols('float')) + self.df_int = DataFrame(np.random.randn(5000, 5), dtype='int64', columns=self.create_cols('int')) + self.df_bool = DataFrame(True, index=self.df_float.index, columns=self.create_cols('bool')) + self.df_object = DataFrame('foo', index=self.df_float.index, columns=self.create_cols('object')) + self.df_dt = DataFrame(Timestamp('20010101'), index=self.df_float.index, columns=self.create_cols('date')) self.df_float.ix[30:500, 1:3] = np.nan self.df = concat([self.df_float, self.df_int, self.df_bool, self.df_object, self.df_dt], axis=1) def time_frame_to_csv_mixed(self): self.df.to_csv('__test__.csv') + def create_cols(self, name): + return [('%s%03d' % (name, i)) for i in range(5)] + class read_csv_infer_datetime_format_custom(object): goal_time = 0.2 diff --git a/asv_bench/benchmarks/io_sql.py b/asv_bench/benchmarks/io_sql.py index e75e691b61c96..9a6b21f9e067a 100644 --- a/asv_bench/benchmarks/io_sql.py +++ b/asv_bench/benchmarks/io_sql.py @@ -1,7 +1,7 @@ -from pandas_vb_common import * -from sqlalchemy import create_engine -import sqlite3 import sqlalchemy +from .pandas_vb_common import * +import sqlite3 +from sqlalchemy import create_engine class sql_datetime_read_and_parse_sqlalchemy(object): diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py index 08ae439e8fd5d..1da0d37d4a8dd 100644 --- a/asv_bench/benchmarks/join_merge.py +++ b/asv_bench/benchmarks/join_merge.py @@ -1,4 +1,4 @@ -from pandas_vb_common import * +from .pandas_vb_common import * class append_frame_single_homogenous(object): @@ -322,38 +322,38 @@ class series_align_int64_index(object): def setup(self): self.n = 1000000 - - def sample(values, k): - self.sampler = np.random.permutation(len(values)) - return values.take(self.sampler[:k]) self.sz = 500000 self.rng = np.arange(0, 10000000000000, 10000000) self.stamps = (np.datetime64(datetime.now()).view('i8') + self.rng) - self.idx1 = np.sort(sample(self.stamps, self.sz)) - self.idx2 = np.sort(sample(self.stamps, self.sz)) + self.idx1 = np.sort(self.sample(self.stamps, self.sz)) + self.idx2 = np.sort(self.sample(self.stamps, self.sz)) self.ts1 = Series(np.random.randn(self.sz), self.idx1) self.ts2 = Series(np.random.randn(self.sz), self.idx2) def time_series_align_int64_index(self): (self.ts1 + self.ts2) + def sample(self, values, k): + self.sampler = np.random.permutation(len(values)) + return values.take(self.sampler[:k]) + class series_align_left_monotonic(object): goal_time = 0.2 def setup(self): self.n = 1000000 - - def sample(values, k): - self.sampler = np.random.permutation(len(values)) - return values.take(self.sampler[:k]) self.sz = 500000 self.rng = np.arange(0, 10000000000000, 10000000) self.stamps = (np.datetime64(datetime.now()).view('i8') + self.rng) - self.idx1 = np.sort(sample(self.stamps, self.sz)) - self.idx2 = np.sort(sample(self.stamps, self.sz)) + self.idx1 = np.sort(self.sample(self.stamps, self.sz)) + self.idx2 = np.sort(self.sample(self.stamps, self.sz)) self.ts1 = Series(np.random.randn(self.sz), self.idx1) self.ts2 = Series(np.random.randn(self.sz), self.idx2) def time_series_align_left_monotonic(self): - self.ts1.align(self.ts2, join='left') \ No newline at end of file + self.ts1.align(self.ts2, join='left') + + def sample(self, values, k): + self.sampler = np.random.permutation(len(values)) + return values.take(self.sampler[:k]) \ No newline at end of file diff --git a/asv_bench/benchmarks/miscellaneous.py b/asv_bench/benchmarks/miscellaneous.py index b9c02c85fb096..fe610ef4cb376 100644 --- a/asv_bench/benchmarks/miscellaneous.py +++ b/asv_bench/benchmarks/miscellaneous.py @@ -1,4 +1,4 @@ -from pandas_vb_common import * +from .pandas_vb_common import * from pandas.util.decorators import cache_readonly diff --git a/asv_bench/benchmarks/packers.py b/asv_bench/benchmarks/packers.py index 81fa7c2238d16..12e48295d8d05 100644 --- a/asv_bench/benchmarks/packers.py +++ b/asv_bench/benchmarks/packers.py @@ -1,9 +1,9 @@ +from .pandas_vb_common import * from numpy.random import randint import pandas as pd from collections import OrderedDict from pandas.compat import BytesIO import sqlite3 -from pandas_vb_common import * import os from sqlalchemy import create_engine import numpy as np @@ -16,12 +16,6 @@ class packers_read_csv(object): def setup(self): self.f = '__test__.msg' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') @@ -31,24 +25,24 @@ def remove(f): self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] - remove(self.f) + self.remove(self.f) self.df.to_csv(self.f) def time_packers_read_csv(self): pd.read_csv(self.f) + def remove(self, f): + try: + os.remove(self.f) + except: + pass + class packers_read_excel(object): goal_time = 0.2 def setup(self): self.f = '__test__.msg' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') @@ -58,7 +52,7 @@ def remove(f): self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] - remove(self.f) + self.remove(self.f) self.bio = BytesIO() self.writer = pd.io.excel.ExcelWriter(self.bio, engine='xlsxwriter') self.df[:2000].to_excel(self.writer) @@ -68,18 +62,18 @@ def time_packers_read_excel(self): self.bio.seek(0) pd.read_excel(self.bio) + def remove(self, f): + try: + os.remove(self.f) + except: + pass + class packers_read_hdf_store(object): goal_time = 0.2 def setup(self): self.f = '__test__.msg' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') @@ -89,24 +83,24 @@ def remove(f): self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] - remove(self.f) + self.remove(self.f) self.df2.to_hdf(self.f, 'df') def time_packers_read_hdf_store(self): pd.read_hdf(self.f, 'df') + def remove(self, f): + try: + os.remove(self.f) + except: + pass + class packers_read_hdf_table(object): goal_time = 0.2 def setup(self): self.f = '__test__.msg' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') @@ -116,24 +110,24 @@ def remove(f): self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] - remove(self.f) + self.remove(self.f) self.df2.to_hdf(self.f, 'df', format='table') def time_packers_read_hdf_table(self): pd.read_hdf(self.f, 'df') + def remove(self, f): + try: + os.remove(self.f) + except: + pass + class packers_read_json(object): goal_time = 0.2 def setup(self): self.f = '__test__.msg' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') @@ -143,25 +137,25 @@ def remove(f): self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] - remove(self.f) + self.remove(self.f) self.df.to_json(self.f, orient='split') self.df.index = np.arange(self.N) def time_packers_read_json(self): pd.read_json(self.f, orient='split') + def remove(self, f): + try: + os.remove(self.f) + except: + pass + class packers_read_json_date_index(object): goal_time = 0.2 def setup(self): self.f = '__test__.msg' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') @@ -171,24 +165,24 @@ def remove(f): self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] - remove(self.f) + self.remove(self.f) self.df.to_json(self.f, orient='split') def time_packers_read_json_date_index(self): pd.read_json(self.f, orient='split') + def remove(self, f): + try: + os.remove(self.f) + except: + pass + class packers_read_pack(object): goal_time = 0.2 def setup(self): self.f = '__test__.msg' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') @@ -198,24 +192,24 @@ def remove(f): self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] - remove(self.f) + self.remove(self.f) self.df2.to_msgpack(self.f) def time_packers_read_pack(self): pd.read_msgpack(self.f) + def remove(self, f): + try: + os.remove(self.f) + except: + pass + class packers_read_pickle(object): goal_time = 0.2 def setup(self): self.f = '__test__.msg' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') @@ -225,24 +219,24 @@ def remove(f): self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] - remove(self.f) + self.remove(self.f) self.df2.to_pickle(self.f) def time_packers_read_pickle(self): pd.read_pickle(self.f) + def remove(self, f): + try: + os.remove(self.f) + except: + pass + class packers_read_sql(object): goal_time = 0.2 def setup(self): self.f = '__test__.msg' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') @@ -252,25 +246,25 @@ def remove(f): self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] - remove(self.f) + self.remove(self.f) self.engine = create_engine('sqlite:///:memory:') self.df2.to_sql('table', self.engine, if_exists='replace') def time_packers_read_sql(self): pd.read_sql_table('table', self.engine) + def remove(self, f): + try: + os.remove(self.f) + except: + pass + class packers_read_stata(object): goal_time = 0.2 def setup(self): self.f = '__test__.msg' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') @@ -280,24 +274,24 @@ def remove(f): self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] - remove(self.f) + self.remove(self.f) self.df.to_stata(self.f, {'index': 'tc', }) def time_packers_read_stata(self): pd.read_stata(self.f) + def remove(self, f): + try: + os.remove(self.f) + except: + pass + class packers_read_stata_with_validation(object): goal_time = 0.2 def setup(self): self.f = '__test__.msg' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') @@ -307,7 +301,7 @@ def remove(f): self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] - remove(self.f) + self.remove(self.f) self.df['int8_'] = [randint(np.iinfo(np.int8).min, (np.iinfo(np.int8).max - 27)) for _ in range(self.N)] self.df['int16_'] = [randint(np.iinfo(np.int16).min, (np.iinfo(np.int16).max - 27)) for _ in range(self.N)] self.df['int32_'] = [randint(np.iinfo(np.int32).min, (np.iinfo(np.int32).max - 27)) for _ in range(self.N)] @@ -317,18 +311,18 @@ def remove(f): def time_packers_read_stata_with_validation(self): pd.read_stata(self.f) + def remove(self, f): + try: + os.remove(self.f) + except: + pass + class packers_write_csv(object): goal_time = 0.2 def setup(self): self.f = '__test__.msg' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') @@ -338,13 +332,19 @@ def remove(f): self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] - remove(self.f) + self.remove(self.f) def time_packers_write_csv(self): self.df.to_csv(self.f) def teardown(self): - remove(self.f) + self.remove(self.f) + + def remove(self, f): + try: + os.remove(self.f) + except: + pass class packers_write_excel_openpyxl(object): @@ -352,12 +352,6 @@ class packers_write_excel_openpyxl(object): def setup(self): self.f = '__test__.msg' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') @@ -367,7 +361,7 @@ def remove(f): self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] - remove(self.f) + self.remove(self.f) self.bio = BytesIO() def time_packers_write_excel_openpyxl(self): @@ -376,18 +370,18 @@ def time_packers_write_excel_openpyxl(self): self.df[:2000].to_excel(self.writer) self.writer.save() + def remove(self, f): + try: + os.remove(self.f) + except: + pass + class packers_write_excel_xlsxwriter(object): goal_time = 0.2 def setup(self): self.f = '__test__.msg' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') @@ -397,7 +391,7 @@ def remove(f): self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] - remove(self.f) + self.remove(self.f) self.bio = BytesIO() def time_packers_write_excel_xlsxwriter(self): @@ -406,18 +400,18 @@ def time_packers_write_excel_xlsxwriter(self): self.df[:2000].to_excel(self.writer) self.writer.save() + def remove(self, f): + try: + os.remove(self.f) + except: + pass + class packers_write_excel_xlwt(object): goal_time = 0.2 def setup(self): self.f = '__test__.msg' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') @@ -427,7 +421,7 @@ def remove(f): self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] - remove(self.f) + self.remove(self.f) self.bio = BytesIO() def time_packers_write_excel_xlwt(self): @@ -436,18 +430,18 @@ def time_packers_write_excel_xlwt(self): self.df[:2000].to_excel(self.writer) self.writer.save() + def remove(self, f): + try: + os.remove(self.f) + except: + pass + class packers_write_hdf_store(object): goal_time = 0.2 def setup(self): self.f = '__test__.msg' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') @@ -457,13 +451,19 @@ def remove(f): self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] - remove(self.f) + self.remove(self.f) def time_packers_write_hdf_store(self): self.df2.to_hdf(self.f, 'df') def teardown(self): - remove(self.f) + self.remove(self.f) + + def remove(self, f): + try: + os.remove(self.f) + except: + pass class packers_write_hdf_table(object): @@ -471,12 +471,6 @@ class packers_write_hdf_table(object): def setup(self): self.f = '__test__.msg' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') @@ -486,13 +480,19 @@ def remove(f): self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] - remove(self.f) + self.remove(self.f) def time_packers_write_hdf_table(self): self.df2.to_hdf(self.f, 'df', table=True) def teardown(self): - remove(self.f) + self.remove(self.f) + + def remove(self, f): + try: + os.remove(self.f) + except: + pass class packers_write_json(object): @@ -500,12 +500,6 @@ class packers_write_json(object): def setup(self): self.f = '__test__.msg' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') @@ -515,14 +509,20 @@ def remove(f): self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] - remove(self.f) + self.remove(self.f) self.df.index = np.arange(self.N) def time_packers_write_json(self): self.df.to_json(self.f, orient='split') def teardown(self): - remove(self.f) + self.remove(self.f) + + def remove(self, f): + try: + os.remove(self.f) + except: + pass class packers_write_json_T(object): @@ -530,12 +530,6 @@ class packers_write_json_T(object): def setup(self): self.f = '__test__.msg' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') @@ -545,14 +539,20 @@ def remove(f): self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] - remove(self.f) + self.remove(self.f) self.df.index = np.arange(self.N) def time_packers_write_json_T(self): self.df.to_json(self.f, orient='columns') def teardown(self): - remove(self.f) + self.remove(self.f) + + def remove(self, f): + try: + os.remove(self.f) + except: + pass class packers_write_json_date_index(object): @@ -560,12 +560,6 @@ class packers_write_json_date_index(object): def setup(self): self.f = '__test__.msg' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') @@ -575,13 +569,19 @@ def remove(f): self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] - remove(self.f) + self.remove(self.f) def time_packers_write_json_date_index(self): self.df.to_json(self.f, orient='split') def teardown(self): - remove(self.f) + self.remove(self.f) + + def remove(self, f): + try: + os.remove(self.f) + except: + pass class packers_write_json_mixed_delta_int_tstamp(object): @@ -589,12 +589,6 @@ class packers_write_json_mixed_delta_int_tstamp(object): def setup(self): self.f = '__test__.msg' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') @@ -604,7 +598,7 @@ def remove(f): self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] - remove(self.f) + self.remove(self.f) self.cols = [(lambda i: ('{0}_timedelta'.format(i), [pd.Timedelta(('%d seconds' % randrange(1000000.0))) for _ in range(self.N)])), (lambda i: ('{0}_int'.format(i), randint(100000000.0, size=self.N))), (lambda i: ('{0}_timestamp'.format(i), [pd.Timestamp((1418842918083256000 + randrange(1000000000.0, 1e+18, 200))) for _ in range(self.N)]))] self.df_mixed = DataFrame(OrderedDict([self.cols[(i % len(self.cols))](i) for i in range(self.C)]), index=self.index) @@ -612,7 +606,13 @@ def time_packers_write_json_mixed_delta_int_tstamp(self): self.df_mixed.to_json(self.f, orient='split') def teardown(self): - remove(self.f) + self.remove(self.f) + + def remove(self, f): + try: + os.remove(self.f) + except: + pass class packers_write_json_mixed_float_int(object): @@ -620,12 +620,6 @@ class packers_write_json_mixed_float_int(object): def setup(self): self.f = '__test__.msg' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') @@ -635,7 +629,7 @@ def remove(f): self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] - remove(self.f) + self.remove(self.f) self.cols = [(lambda i: ('{0}_float'.format(i), randn(self.N))), (lambda i: ('{0}_int'.format(i), randint(100000000.0, size=self.N)))] self.df_mixed = DataFrame(OrderedDict([self.cols[(i % len(self.cols))](i) for i in range(self.C)]), index=self.index) @@ -643,7 +637,13 @@ def time_packers_write_json_mixed_float_int(self): self.df_mixed.to_json(self.f, orient='index') def teardown(self): - remove(self.f) + self.remove(self.f) + + def remove(self, f): + try: + os.remove(self.f) + except: + pass class packers_write_json_mixed_float_int_T(object): @@ -651,12 +651,6 @@ class packers_write_json_mixed_float_int_T(object): def setup(self): self.f = '__test__.msg' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') @@ -666,7 +660,7 @@ def remove(f): self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] - remove(self.f) + self.remove(self.f) self.cols = [(lambda i: ('{0}_float'.format(i), randn(self.N))), (lambda i: ('{0}_int'.format(i), randint(100000000.0, size=self.N)))] self.df_mixed = DataFrame(OrderedDict([self.cols[(i % len(self.cols))](i) for i in range(self.C)]), index=self.index) @@ -674,7 +668,13 @@ def time_packers_write_json_mixed_float_int_T(self): self.df_mixed.to_json(self.f, orient='columns') def teardown(self): - remove(self.f) + self.remove(self.f) + + def remove(self, f): + try: + os.remove(self.f) + except: + pass class packers_write_json_mixed_float_int_str(object): @@ -682,12 +682,6 @@ class packers_write_json_mixed_float_int_str(object): def setup(self): self.f = '__test__.msg' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') @@ -697,7 +691,7 @@ def remove(f): self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] - remove(self.f) + self.remove(self.f) self.cols = [(lambda i: ('{0}_float'.format(i), randn(self.N))), (lambda i: ('{0}_int'.format(i), randint(100000000.0, size=self.N))), (lambda i: ('{0}_str'.format(i), [('%08x' % randrange((16 ** 8))) for _ in range(self.N)]))] self.df_mixed = DataFrame(OrderedDict([self.cols[(i % len(self.cols))](i) for i in range(self.C)]), index=self.index) @@ -705,7 +699,13 @@ def time_packers_write_json_mixed_float_int_str(self): self.df_mixed.to_json(self.f, orient='split') def teardown(self): - remove(self.f) + self.remove(self.f) + + def remove(self, f): + try: + os.remove(self.f) + except: + pass class packers_write_pack(object): @@ -713,12 +713,6 @@ class packers_write_pack(object): def setup(self): self.f = '__test__.msg' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') @@ -728,13 +722,19 @@ def remove(f): self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] - remove(self.f) + self.remove(self.f) def time_packers_write_pack(self): self.df2.to_msgpack(self.f) def teardown(self): - remove(self.f) + self.remove(self.f) + + def remove(self, f): + try: + os.remove(self.f) + except: + pass class packers_write_pickle(object): @@ -742,12 +742,6 @@ class packers_write_pickle(object): def setup(self): self.f = '__test__.msg' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') @@ -757,13 +751,19 @@ def remove(f): self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] - remove(self.f) + self.remove(self.f) def time_packers_write_pickle(self): self.df2.to_pickle(self.f) def teardown(self): - remove(self.f) + self.remove(self.f) + + def remove(self, f): + try: + os.remove(self.f) + except: + pass class packers_write_sql(object): @@ -771,12 +771,6 @@ class packers_write_sql(object): def setup(self): self.f = '__test__.msg' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') @@ -786,24 +780,24 @@ def remove(f): self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] - remove(self.f) + self.remove(self.f) self.engine = create_engine('sqlite:///:memory:') def time_packers_write_sql(self): self.df2.to_sql('table', self.engine, if_exists='replace') + def remove(self, f): + try: + os.remove(self.f) + except: + pass + class packers_write_stata(object): goal_time = 0.2 def setup(self): self.f = '__test__.msg' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') @@ -813,14 +807,20 @@ def remove(f): self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] - remove(self.f) + self.remove(self.f) self.df.to_stata(self.f, {'index': 'tc', }) def time_packers_write_stata(self): self.df.to_stata(self.f, {'index': 'tc', }) def teardown(self): - remove(self.f) + self.remove(self.f) + + def remove(self, f): + try: + os.remove(self.f) + except: + pass class packers_write_stata_with_validation(object): @@ -828,12 +828,6 @@ class packers_write_stata_with_validation(object): def setup(self): self.f = '__test__.msg' - - def remove(f): - try: - os.remove(self.f) - except: - pass self.N = 100000 self.C = 5 self.index = date_range('20000101', periods=self.N, freq='H') @@ -843,7 +837,7 @@ def remove(f): self.index = date_range('20000101', periods=self.N, freq='H') self.df2 = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] - remove(self.f) + self.remove(self.f) self.df['int8_'] = [randint(np.iinfo(np.int8).min, (np.iinfo(np.int8).max - 27)) for _ in range(self.N)] self.df['int16_'] = [randint(np.iinfo(np.int16).min, (np.iinfo(np.int16).max - 27)) for _ in range(self.N)] self.df['int32_'] = [randint(np.iinfo(np.int32).min, (np.iinfo(np.int32).max - 27)) for _ in range(self.N)] @@ -854,4 +848,10 @@ def time_packers_write_stata_with_validation(self): self.df.to_stata(self.f, {'index': 'tc', }) def teardown(self): - remove(self.f) \ No newline at end of file + self.remove(self.f) + + def remove(self, f): + try: + os.remove(self.f) + except: + pass \ No newline at end of file diff --git a/asv_bench/benchmarks/pandas_vb_common.py b/asv_bench/benchmarks/pandas_vb_common.py deleted file mode 120000 index 6e2e449a4c00a..0000000000000 --- a/asv_bench/benchmarks/pandas_vb_common.py +++ /dev/null @@ -1 +0,0 @@ -../../vb_suite/pandas_vb_common.py \ No newline at end of file diff --git a/asv_bench/benchmarks/pandas_vb_common.py b/asv_bench/benchmarks/pandas_vb_common.py new file mode 100644 index 0000000000000..a1326d63a112a --- /dev/null +++ b/asv_bench/benchmarks/pandas_vb_common.py @@ -0,0 +1,30 @@ +from pandas import * +import pandas as pd +from datetime import timedelta +from numpy.random import randn +from numpy.random import randint +from numpy.random import permutation +import pandas.util.testing as tm +import random +import numpy as np +try: + from pandas.compat import range +except ImportError: + pass + +np.random.seed(1234) +try: + import pandas._tseries as lib +except: + import pandas.lib as lib + +try: + Panel = WidePanel +except Exception: + pass + +# didn't add to namespace until later +try: + from pandas.core.index import MultiIndex +except ImportError: + pass diff --git a/asv_bench/benchmarks/panel_ctor.py b/asv_bench/benchmarks/panel_ctor.py index c755cb122a0bf..0b0e73847aa96 100644 --- a/asv_bench/benchmarks/panel_ctor.py +++ b/asv_bench/benchmarks/panel_ctor.py @@ -1,4 +1,4 @@ -from pandas_vb_common import * +from .pandas_vb_common import * class panel_from_dict_all_different_indexes(object): @@ -8,7 +8,7 @@ def setup(self): self.data_frames = {} self.start = datetime(1990, 1, 1) self.end = datetime(2012, 1, 1) - for x in xrange(100): + for x in range(100): self.end += timedelta(days=1) self.dr = np.asarray(date_range(self.start, self.end)) self.df = DataFrame({'a': ([0] * len(self.dr)), 'b': ([1] * len(self.dr)), 'c': ([2] * len(self.dr)), }, index=self.dr) @@ -23,7 +23,7 @@ class panel_from_dict_equiv_indexes(object): def setup(self): self.data_frames = {} - for x in xrange(100): + for x in range(100): self.dr = np.asarray(DatetimeIndex(start=datetime(1990, 1, 1), end=datetime(2012, 1, 1), freq=datetools.Day(1))) self.df = DataFrame({'a': ([0] * len(self.dr)), 'b': ([1] * len(self.dr)), 'c': ([2] * len(self.dr)), }, index=self.dr) self.data_frames[x] = self.df @@ -38,7 +38,7 @@ class panel_from_dict_same_index(object): def setup(self): self.dr = np.asarray(DatetimeIndex(start=datetime(1990, 1, 1), end=datetime(2012, 1, 1), freq=datetools.Day(1))) self.data_frames = {} - for x in xrange(100): + for x in range(100): self.df = DataFrame({'a': ([0] * len(self.dr)), 'b': ([1] * len(self.dr)), 'c': ([2] * len(self.dr)), }, index=self.dr) self.data_frames[x] = self.df @@ -53,7 +53,7 @@ def setup(self): self.data_frames = {} self.start = datetime(1990, 1, 1) self.end = datetime(2012, 1, 1) - for x in xrange(100): + for x in range(100): if (x == 50): self.end += timedelta(days=1) self.dr = np.asarray(date_range(self.start, self.end)) diff --git a/asv_bench/benchmarks/panel_methods.py b/asv_bench/benchmarks/panel_methods.py index 4145b68dca997..90118eaf6e407 100644 --- a/asv_bench/benchmarks/panel_methods.py +++ b/asv_bench/benchmarks/panel_methods.py @@ -1,4 +1,4 @@ -from pandas_vb_common import * +from .pandas_vb_common import * class panel_pct_change_items(object): diff --git a/asv_bench/benchmarks/parser_vb.py b/asv_bench/benchmarks/parser_vb.py index 46167dc2bb33c..18cd4de6cc9c5 100644 --- a/asv_bench/benchmarks/parser_vb.py +++ b/asv_bench/benchmarks/parser_vb.py @@ -1,7 +1,10 @@ -from cStringIO import StringIO -from pandas_vb_common import * +from .pandas_vb_common import * import os from pandas import read_csv, read_table +try: + from cStringIO import StringIO +except ImportError: + from io import StringIO class read_csv_comment2(object): diff --git a/asv_bench/benchmarks/plotting.py b/asv_bench/benchmarks/plotting.py index d1df1b429c656..f46082ac6f288 100644 --- a/asv_bench/benchmarks/plotting.py +++ b/asv_bench/benchmarks/plotting.py @@ -1,4 +1,4 @@ -from pandas_vb_common import * +from .pandas_vb_common import * try: from pandas import date_range except ImportError: diff --git a/asv_bench/benchmarks/reindex.py b/asv_bench/benchmarks/reindex.py index 03e654b4886cc..b1c039058ff8f 100644 --- a/asv_bench/benchmarks/reindex.py +++ b/asv_bench/benchmarks/reindex.py @@ -1,4 +1,4 @@ -from pandas_vb_common import * +from .pandas_vb_common import * from random import shuffle @@ -168,20 +168,20 @@ def setup(self): self.ts3 = self.ts2.reindex(self.ts.index) self.ts4 = self.ts3.astype('float32') - def pad(source_series, target_index): - try: - source_series.reindex(target_index, method='pad') - except: - source_series.reindex(target_index, fillMethod='pad') + def time_reindex_daterange_backfill(self): + self.backfill(self.ts2, self.ts.index) - def backfill(source_series, target_index): - try: - source_series.reindex(target_index, method='backfill') - except: - source_series.reindex(target_index, fillMethod='backfill') + def pad(self, source_series, target_index): + try: + source_series.reindex(target_index, method='pad') + except: + source_series.reindex(target_index, fillMethod='pad') - def time_reindex_daterange_backfill(self): - backfill(self.ts2, self.ts.index) + def backfill(self, source_series, target_index): + try: + source_series.reindex(target_index, method='backfill') + except: + source_series.reindex(target_index, fillMethod='backfill') class reindex_daterange_pad(object): @@ -194,20 +194,20 @@ def setup(self): self.ts3 = self.ts2.reindex(self.ts.index) self.ts4 = self.ts3.astype('float32') - def pad(source_series, target_index): - try: - source_series.reindex(target_index, method='pad') - except: - source_series.reindex(target_index, fillMethod='pad') + def time_reindex_daterange_pad(self): + self.pad(self.ts2, self.ts.index) - def backfill(source_series, target_index): - try: - source_series.reindex(target_index, method='backfill') - except: - source_series.reindex(target_index, fillMethod='backfill') + def pad(self, source_series, target_index): + try: + source_series.reindex(target_index, method='pad') + except: + source_series.reindex(target_index, fillMethod='pad') - def time_reindex_daterange_pad(self): - pad(self.ts2, self.ts.index) + def backfill(self, source_series, target_index): + try: + source_series.reindex(target_index, method='backfill') + except: + source_series.reindex(target_index, fillMethod='backfill') class reindex_fillna_backfill(object): @@ -220,21 +220,21 @@ def setup(self): self.ts3 = self.ts2.reindex(self.ts.index) self.ts4 = self.ts3.astype('float32') - def pad(source_series, target_index): - try: - source_series.reindex(target_index, method='pad') - except: - source_series.reindex(target_index, fillMethod='pad') - - def backfill(source_series, target_index): - try: - source_series.reindex(target_index, method='backfill') - except: - source_series.reindex(target_index, fillMethod='backfill') - def time_reindex_fillna_backfill(self): self.ts3.fillna(method='backfill') + def pad(self, source_series, target_index): + try: + source_series.reindex(target_index, method='pad') + except: + source_series.reindex(target_index, fillMethod='pad') + + def backfill(self, source_series, target_index): + try: + source_series.reindex(target_index, method='backfill') + except: + source_series.reindex(target_index, fillMethod='backfill') + class reindex_fillna_backfill_float32(object): goal_time = 0.2 @@ -246,21 +246,21 @@ def setup(self): self.ts3 = self.ts2.reindex(self.ts.index) self.ts4 = self.ts3.astype('float32') - def pad(source_series, target_index): - try: - source_series.reindex(target_index, method='pad') - except: - source_series.reindex(target_index, fillMethod='pad') - - def backfill(source_series, target_index): - try: - source_series.reindex(target_index, method='backfill') - except: - source_series.reindex(target_index, fillMethod='backfill') - def time_reindex_fillna_backfill_float32(self): self.ts4.fillna(method='backfill') + def pad(self, source_series, target_index): + try: + source_series.reindex(target_index, method='pad') + except: + source_series.reindex(target_index, fillMethod='pad') + + def backfill(self, source_series, target_index): + try: + source_series.reindex(target_index, method='backfill') + except: + source_series.reindex(target_index, fillMethod='backfill') + class reindex_fillna_pad(object): goal_time = 0.2 @@ -272,21 +272,21 @@ def setup(self): self.ts3 = self.ts2.reindex(self.ts.index) self.ts4 = self.ts3.astype('float32') - def pad(source_series, target_index): - try: - source_series.reindex(target_index, method='pad') - except: - source_series.reindex(target_index, fillMethod='pad') - - def backfill(source_series, target_index): - try: - source_series.reindex(target_index, method='backfill') - except: - source_series.reindex(target_index, fillMethod='backfill') - def time_reindex_fillna_pad(self): self.ts3.fillna(method='pad') + def pad(self, source_series, target_index): + try: + source_series.reindex(target_index, method='pad') + except: + source_series.reindex(target_index, fillMethod='pad') + + def backfill(self, source_series, target_index): + try: + source_series.reindex(target_index, method='backfill') + except: + source_series.reindex(target_index, fillMethod='backfill') + class reindex_fillna_pad_float32(object): goal_time = 0.2 @@ -298,21 +298,21 @@ def setup(self): self.ts3 = self.ts2.reindex(self.ts.index) self.ts4 = self.ts3.astype('float32') - def pad(source_series, target_index): - try: - source_series.reindex(target_index, method='pad') - except: - source_series.reindex(target_index, fillMethod='pad') - - def backfill(source_series, target_index): - try: - source_series.reindex(target_index, method='backfill') - except: - source_series.reindex(target_index, fillMethod='backfill') - def time_reindex_fillna_pad_float32(self): self.ts4.fillna(method='pad') + def pad(self, source_series, target_index): + try: + source_series.reindex(target_index, method='pad') + except: + source_series.reindex(target_index, fillMethod='pad') + + def backfill(self, source_series, target_index): + try: + source_series.reindex(target_index, method='backfill') + except: + source_series.reindex(target_index, fillMethod='backfill') + class reindex_frame_level_align(object): goal_time = 0.2 @@ -362,18 +362,18 @@ class series_align_irregular_string(object): def setup(self): self.n = 50000 self.indices = tm.makeStringIndex(self.n) - - def sample(values, k): - self.sampler = np.arange(len(values)) - shuffle(self.sampler) - return values.take(self.sampler[:k]) self.subsample_size = 40000 self.x = Series(np.random.randn(50000), self.indices) - self.y = Series(np.random.randn(self.subsample_size), index=sample(self.indices, self.subsample_size)) + self.y = Series(np.random.randn(self.subsample_size), index=self.sample(self.indices, self.subsample_size)) def time_series_align_irregular_string(self): (self.x + self.y) + def sample(self, values, k): + self.sampler = np.arange(len(values)) + shuffle(self.sampler) + return values.take(self.sampler[:k]) + class series_drop_duplicates_int(object): goal_time = 0.2 diff --git a/asv_bench/benchmarks/replace.py b/asv_bench/benchmarks/replace.py index 9b78c287c5ad4..e9f33ebfce0bd 100644 --- a/asv_bench/benchmarks/replace.py +++ b/asv_bench/benchmarks/replace.py @@ -1,4 +1,4 @@ -from pandas_vb_common import * +from .pandas_vb_common import * from pandas.compat import range from datetime import timedelta diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py index b4081957af97b..604fa5092a231 100644 --- a/asv_bench/benchmarks/reshape.py +++ b/asv_bench/benchmarks/reshape.py @@ -1,4 +1,4 @@ -from pandas_vb_common import * +from .pandas_vb_common import * from pandas.core.reshape import melt @@ -22,19 +22,19 @@ class reshape_pivot_time_series(object): def setup(self): self.index = MultiIndex.from_arrays([np.arange(100).repeat(100), np.roll(np.tile(np.arange(100), 100), 25)]) self.df = DataFrame(np.random.randn(10000, 4), index=self.index) - - def unpivot(frame): - (N, K) = frame.shape - self.data = {'value': frame.values.ravel('F'), 'variable': np.asarray(frame.columns).repeat(N), 'date': np.tile(np.asarray(frame.index), K), } - return DataFrame(self.data, columns=['date', 'variable', 'value']) self.index = date_range('1/1/2000', periods=10000, freq='h') self.df = DataFrame(randn(10000, 50), index=self.index, columns=range(50)) - self.pdf = unpivot(self.df) + self.pdf = self.unpivot(self.df) self.f = (lambda : self.pdf.pivot('date', 'variable', 'value')) def time_reshape_pivot_time_series(self): self.f() + def unpivot(self, frame): + (N, K) = frame.shape + self.data = {'value': frame.values.ravel('F'), 'variable': np.asarray(frame.columns).repeat(N), 'date': np.tile(np.asarray(frame.index), K), } + return DataFrame(self.data, columns=['date', 'variable', 'value']) + class reshape_stack_simple(object): goal_time = 0.2 diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py index 9cd61c741dae1..d2167a8b6e9e1 100644 --- a/asv_bench/benchmarks/series_methods.py +++ b/asv_bench/benchmarks/series_methods.py @@ -1,4 +1,4 @@ -from pandas_vb_common import * +from .pandas_vb_common import * class series_isin_int64(object): diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py index dbf35f5e40f55..d7ee58fc978ea 100644 --- a/asv_bench/benchmarks/sparse.py +++ b/asv_bench/benchmarks/sparse.py @@ -1,6 +1,6 @@ -from pandas_vb_common import * -import scipy.sparse +from .pandas_vb_common import * import pandas.sparse.series +import scipy.sparse from pandas.core.sparse import SparseSeries, SparseDataFrame from pandas.core.sparse import SparseDataFrame diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py index 98e2bbfce1a44..4125357455d2e 100644 --- a/asv_bench/benchmarks/stat_ops.py +++ b/asv_bench/benchmarks/stat_ops.py @@ -1,4 +1,4 @@ -from pandas_vb_common import * +from .pandas_vb_common import * class stat_ops_frame_mean_float_axis_0(object): diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py index 5adfbf4c2557d..e4f91b1b9c0c6 100644 --- a/asv_bench/benchmarks/strings.py +++ b/asv_bench/benchmarks/strings.py @@ -1,4 +1,4 @@ -from pandas_vb_common import * +from .pandas_vb_common import * import string import itertools as IT import pandas.util.testing as testing @@ -8,99 +8,99 @@ class strings_cat(object): goal_time = 0.2 def setup(self): - - def make_series(letters, strlen, size): - return Series(np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))) - self.many = make_series(('matchthis' + string.uppercase), strlen=19, size=10000) - self.few = make_series(('matchthis' + (string.uppercase * 42)), strlen=19, size=10000) + self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) + self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) def time_strings_cat(self): self.many.str.cat(sep=',') + def make_series(self, letters, strlen, size): + return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) + class strings_center(object): goal_time = 0.2 def setup(self): - - def make_series(letters, strlen, size): - return Series(np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))) - self.many = make_series(('matchthis' + string.uppercase), strlen=19, size=10000) - self.few = make_series(('matchthis' + (string.uppercase * 42)), strlen=19, size=10000) + self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) + self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) def time_strings_center(self): self.many.str.center(100) + def make_series(self, letters, strlen, size): + return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) + class strings_contains_few(object): goal_time = 0.2 def setup(self): - - def make_series(letters, strlen, size): - return Series(np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))) - self.many = make_series(('matchthis' + string.uppercase), strlen=19, size=10000) - self.few = make_series(('matchthis' + (string.uppercase * 42)), strlen=19, size=10000) + self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) + self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) def time_strings_contains_few(self): self.few.str.contains('matchthis') + def make_series(self, letters, strlen, size): + return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) + class strings_contains_few_noregex(object): goal_time = 0.2 def setup(self): - - def make_series(letters, strlen, size): - return Series(np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))) - self.many = make_series(('matchthis' + string.uppercase), strlen=19, size=10000) - self.few = make_series(('matchthis' + (string.uppercase * 42)), strlen=19, size=10000) + self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) + self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) def time_strings_contains_few_noregex(self): self.few.str.contains('matchthis', regex=False) + def make_series(self, letters, strlen, size): + return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) + class strings_contains_many(object): goal_time = 0.2 def setup(self): - - def make_series(letters, strlen, size): - return Series(np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))) - self.many = make_series(('matchthis' + string.uppercase), strlen=19, size=10000) - self.few = make_series(('matchthis' + (string.uppercase * 42)), strlen=19, size=10000) + self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) + self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) def time_strings_contains_many(self): self.many.str.contains('matchthis') + def make_series(self, letters, strlen, size): + return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) + class strings_contains_many_noregex(object): goal_time = 0.2 def setup(self): - - def make_series(letters, strlen, size): - return Series(np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))) - self.many = make_series(('matchthis' + string.uppercase), strlen=19, size=10000) - self.few = make_series(('matchthis' + (string.uppercase * 42)), strlen=19, size=10000) + self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) + self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) def time_strings_contains_many_noregex(self): self.many.str.contains('matchthis', regex=False) + def make_series(self, letters, strlen, size): + return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) + class strings_count(object): goal_time = 0.2 def setup(self): - - def make_series(letters, strlen, size): - return Series(np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))) - self.many = make_series(('matchthis' + string.uppercase), strlen=19, size=10000) - self.few = make_series(('matchthis' + (string.uppercase * 42)), strlen=19, size=10000) + self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) + self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) def time_strings_count(self): self.many.str.count('matchthis') + def make_series(self, letters, strlen, size): + return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) + class strings_encode_decode(object): goal_time = 0.2 @@ -116,278 +116,278 @@ class strings_endswith(object): goal_time = 0.2 def setup(self): - - def make_series(letters, strlen, size): - return Series(np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))) - self.many = make_series(('matchthis' + string.uppercase), strlen=19, size=10000) - self.few = make_series(('matchthis' + (string.uppercase * 42)), strlen=19, size=10000) + self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) + self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) def time_strings_endswith(self): self.many.str.endswith('matchthis') + def make_series(self, letters, strlen, size): + return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) + class strings_extract(object): goal_time = 0.2 def setup(self): - - def make_series(letters, strlen, size): - return Series(np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))) - self.many = make_series(('matchthis' + string.uppercase), strlen=19, size=10000) - self.few = make_series(('matchthis' + (string.uppercase * 42)), strlen=19, size=10000) + self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) + self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) def time_strings_extract(self): self.many.str.extract('(\\w*)matchthis(\\w*)') + def make_series(self, letters, strlen, size): + return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) + class strings_findall(object): goal_time = 0.2 def setup(self): - - def make_series(letters, strlen, size): - return Series(np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))) - self.many = make_series(('matchthis' + string.uppercase), strlen=19, size=10000) - self.few = make_series(('matchthis' + (string.uppercase * 42)), strlen=19, size=10000) + self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) + self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) def time_strings_findall(self): self.many.str.findall('[A-Z]+') + def make_series(self, letters, strlen, size): + return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) + class strings_get(object): goal_time = 0.2 def setup(self): - - def make_series(letters, strlen, size): - return Series(np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))) - self.many = make_series(('matchthis' + string.uppercase), strlen=19, size=10000) - self.few = make_series(('matchthis' + (string.uppercase * 42)), strlen=19, size=10000) + self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) + self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) def time_strings_get(self): self.many.str.get(0) + def make_series(self, letters, strlen, size): + return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) + class strings_get_dummies(object): goal_time = 0.2 def setup(self): - - def make_series(letters, strlen, size): - return Series(np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))) - self.many = make_series(('matchthis' + string.uppercase), strlen=19, size=10000) - self.few = make_series(('matchthis' + (string.uppercase * 42)), strlen=19, size=10000) - self.s = make_series(string.uppercase, strlen=10, size=10000).str.join('|') + self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) + self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) + self.s = self.make_series(string.ascii_uppercase, strlen=10, size=10000).str.join('|') def time_strings_get_dummies(self): self.s.str.get_dummies('|') + def make_series(self, letters, strlen, size): + return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) + class strings_join_split(object): goal_time = 0.2 def setup(self): - - def make_series(letters, strlen, size): - return Series(np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))) - self.many = make_series(('matchthis' + string.uppercase), strlen=19, size=10000) - self.few = make_series(('matchthis' + (string.uppercase * 42)), strlen=19, size=10000) + self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) + self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) def time_strings_join_split(self): self.many.str.join('--').str.split('--') + def make_series(self, letters, strlen, size): + return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) + class strings_join_split_expand(object): goal_time = 0.2 def setup(self): - - def make_series(letters, strlen, size): - return Series(np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))) - self.many = make_series(('matchthis' + string.uppercase), strlen=19, size=10000) - self.few = make_series(('matchthis' + (string.uppercase * 42)), strlen=19, size=10000) + self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) + self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) def time_strings_join_split_expand(self): self.many.str.join('--').str.split('--', expand=True) + def make_series(self, letters, strlen, size): + return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) + class strings_len(object): goal_time = 0.2 def setup(self): - - def make_series(letters, strlen, size): - return Series(np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))) - self.many = make_series(('matchthis' + string.uppercase), strlen=19, size=10000) - self.few = make_series(('matchthis' + (string.uppercase * 42)), strlen=19, size=10000) + self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) + self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) def time_strings_len(self): self.many.str.len() + def make_series(self, letters, strlen, size): + return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) + class strings_lower(object): goal_time = 0.2 def setup(self): - - def make_series(letters, strlen, size): - return Series(np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))) - self.many = make_series(('matchthis' + string.uppercase), strlen=19, size=10000) - self.few = make_series(('matchthis' + (string.uppercase * 42)), strlen=19, size=10000) + self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) + self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) def time_strings_lower(self): self.many.str.lower() + def make_series(self, letters, strlen, size): + return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) + class strings_lstrip(object): goal_time = 0.2 def setup(self): - - def make_series(letters, strlen, size): - return Series(np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))) - self.many = make_series(('matchthis' + string.uppercase), strlen=19, size=10000) - self.few = make_series(('matchthis' + (string.uppercase * 42)), strlen=19, size=10000) + self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) + self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) def time_strings_lstrip(self): self.many.str.lstrip('matchthis') + def make_series(self, letters, strlen, size): + return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) + class strings_match(object): goal_time = 0.2 def setup(self): - - def make_series(letters, strlen, size): - return Series(np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))) - self.many = make_series(('matchthis' + string.uppercase), strlen=19, size=10000) - self.few = make_series(('matchthis' + (string.uppercase * 42)), strlen=19, size=10000) + self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) + self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) def time_strings_match(self): self.many.str.match('mat..this') + def make_series(self, letters, strlen, size): + return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) + class strings_pad(object): goal_time = 0.2 def setup(self): - - def make_series(letters, strlen, size): - return Series(np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))) - self.many = make_series(('matchthis' + string.uppercase), strlen=19, size=10000) - self.few = make_series(('matchthis' + (string.uppercase * 42)), strlen=19, size=10000) + self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) + self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) def time_strings_pad(self): self.many.str.pad(100, side='both') + def make_series(self, letters, strlen, size): + return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) + class strings_repeat(object): goal_time = 0.2 def setup(self): - - def make_series(letters, strlen, size): - return Series(np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))) - self.many = make_series(('matchthis' + string.uppercase), strlen=19, size=10000) - self.few = make_series(('matchthis' + (string.uppercase * 42)), strlen=19, size=10000) + self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) + self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) def time_strings_repeat(self): self.many.str.repeat(list(IT.islice(IT.cycle(range(1, 4)), len(self.many)))) + def make_series(self, letters, strlen, size): + return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) + class strings_replace(object): goal_time = 0.2 def setup(self): - - def make_series(letters, strlen, size): - return Series(np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))) - self.many = make_series(('matchthis' + string.uppercase), strlen=19, size=10000) - self.few = make_series(('matchthis' + (string.uppercase * 42)), strlen=19, size=10000) + self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) + self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) def time_strings_replace(self): self.many.str.replace('(matchthis)', '\x01\x01') + def make_series(self, letters, strlen, size): + return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) + class strings_rstrip(object): goal_time = 0.2 def setup(self): - - def make_series(letters, strlen, size): - return Series(np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))) - self.many = make_series(('matchthis' + string.uppercase), strlen=19, size=10000) - self.few = make_series(('matchthis' + (string.uppercase * 42)), strlen=19, size=10000) + self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) + self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) def time_strings_rstrip(self): self.many.str.rstrip('matchthis') + def make_series(self, letters, strlen, size): + return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) + class strings_slice(object): goal_time = 0.2 def setup(self): - - def make_series(letters, strlen, size): - return Series(np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))) - self.many = make_series(('matchthis' + string.uppercase), strlen=19, size=10000) - self.few = make_series(('matchthis' + (string.uppercase * 42)), strlen=19, size=10000) + self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) + self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) def time_strings_slice(self): self.many.str.slice(5, 15, 2) + def make_series(self, letters, strlen, size): + return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) + class strings_startswith(object): goal_time = 0.2 def setup(self): - - def make_series(letters, strlen, size): - return Series(np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))) - self.many = make_series(('matchthis' + string.uppercase), strlen=19, size=10000) - self.few = make_series(('matchthis' + (string.uppercase * 42)), strlen=19, size=10000) + self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) + self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) def time_strings_startswith(self): self.many.str.startswith('matchthis') + def make_series(self, letters, strlen, size): + return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) + class strings_strip(object): goal_time = 0.2 def setup(self): - - def make_series(letters, strlen, size): - return Series(np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))) - self.many = make_series(('matchthis' + string.uppercase), strlen=19, size=10000) - self.few = make_series(('matchthis' + (string.uppercase * 42)), strlen=19, size=10000) + self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) + self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) def time_strings_strip(self): self.many.str.strip('matchthis') + def make_series(self, letters, strlen, size): + return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) + class strings_title(object): goal_time = 0.2 def setup(self): - - def make_series(letters, strlen, size): - return Series(np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))) - self.many = make_series(('matchthis' + string.uppercase), strlen=19, size=10000) - self.few = make_series(('matchthis' + (string.uppercase * 42)), strlen=19, size=10000) + self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) + self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) def time_strings_title(self): self.many.str.title() + def make_series(self, letters, strlen, size): + return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) + class strings_upper(object): goal_time = 0.2 def setup(self): - - def make_series(letters, strlen, size): - return Series(np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))) - self.many = make_series(('matchthis' + string.uppercase), strlen=19, size=10000) - self.few = make_series(('matchthis' + (string.uppercase * 42)), strlen=19, size=10000) + self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) + self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) def time_strings_upper(self): - self.many.str.upper() \ No newline at end of file + self.many.str.upper() + + def make_series(self, letters, strlen, size): + return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) \ No newline at end of file diff --git a/asv_bench/benchmarks/timedelta.py b/asv_bench/benchmarks/timedelta.py index 36a0f98e3f5ef..2f252a4d3e1dc 100644 --- a/asv_bench/benchmarks/timedelta.py +++ b/asv_bench/benchmarks/timedelta.py @@ -1,4 +1,4 @@ -from pandas_vb_common import * +from .pandas_vb_common import * from pandas import to_timedelta diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index 266c198de1455..db0c526f25c7b 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -1,10 +1,13 @@ from pandas.tseries.converter import DatetimeConverter +from .pandas_vb_common import * import pandas as pd from datetime import timedelta import datetime as dt -from pandas_vb_common import * +try: + import pandas.tseries.holiday +except ImportError: + pass from pandas.tseries.frequencies import infer_freq -import pandas.tseries.holiday import numpy as np @@ -631,6 +634,63 @@ def time_timeseries_custom_bmonthend_incr_n(self): (self.date + (10 * self.cme)) +class timeseries_datetimeindex_offset_delta(object): + goal_time = 0.2 + + def setup(self): + self.N = 100000 + self.rng = date_range(start='1/1/2000', periods=self.N, freq='T') + if hasattr(Series, 'convert'): + Series.resample = Series.convert + self.ts = Series(np.random.randn(self.N), index=self.rng) + self.N = 100000 + self.idx1 = date_range(start='20140101', freq='T', periods=self.N) + self.delta_offset = pd.offsets.Day() + self.fast_offset = pd.offsets.DateOffset(months=2, days=2) + self.slow_offset = pd.offsets.BusinessDay() + + def time_timeseries_datetimeindex_offset_delta(self): + (self.idx1 + self.delta_offset) + + +class timeseries_datetimeindex_offset_fast(object): + goal_time = 0.2 + + def setup(self): + self.N = 100000 + self.rng = date_range(start='1/1/2000', periods=self.N, freq='T') + if hasattr(Series, 'convert'): + Series.resample = Series.convert + self.ts = Series(np.random.randn(self.N), index=self.rng) + self.N = 100000 + self.idx1 = date_range(start='20140101', freq='T', periods=self.N) + self.delta_offset = pd.offsets.Day() + self.fast_offset = pd.offsets.DateOffset(months=2, days=2) + self.slow_offset = pd.offsets.BusinessDay() + + def time_timeseries_datetimeindex_offset_fast(self): + (self.idx1 + self.fast_offset) + + +class timeseries_datetimeindex_offset_slow(object): + goal_time = 0.2 + + def setup(self): + self.N = 100000 + self.rng = date_range(start='1/1/2000', periods=self.N, freq='T') + if hasattr(Series, 'convert'): + Series.resample = Series.convert + self.ts = Series(np.random.randn(self.N), index=self.rng) + self.N = 100000 + self.idx1 = date_range(start='20140101', freq='T', periods=self.N) + self.delta_offset = pd.offsets.Day() + self.fast_offset = pd.offsets.DateOffset(months=2, days=2) + self.slow_offset = pd.offsets.BusinessDay() + + def time_timeseries_datetimeindex_offset_slow(self): + (self.idx1 + self.slow_offset) + + class timeseries_day_apply(object): goal_time = 0.2 @@ -723,15 +783,15 @@ def setup(self): self.idx1 = date_range(start='20140101', freq='T', periods=self.N) self.idx2 = period_range(start='20140101', freq='T', periods=self.N) - def iter_n(iterable, n=None): - self.i = 0 - for _ in iterable: - self.i += 1 - if ((n is not None) and (self.i > n)): - break - def time_timeseries_iter_datetimeindex(self): - iter_n(self.idx1) + self.iter_n(self.idx1) + + def iter_n(self, iterable, n=None): + self.i = 0 + for _ in iterable: + self.i += 1 + if ((n is not None) and (self.i > n)): + break class timeseries_iter_datetimeindex_preexit(object): @@ -748,15 +808,15 @@ def setup(self): self.idx1 = date_range(start='20140101', freq='T', periods=self.N) self.idx2 = period_range(start='20140101', freq='T', periods=self.N) - def iter_n(iterable, n=None): - self.i = 0 - for _ in iterable: - self.i += 1 - if ((n is not None) and (self.i > n)): - break - def time_timeseries_iter_datetimeindex_preexit(self): - iter_n(self.idx1, self.M) + self.iter_n(self.idx1, self.M) + + def iter_n(self, iterable, n=None): + self.i = 0 + for _ in iterable: + self.i += 1 + if ((n is not None) and (self.i > n)): + break class timeseries_iter_periodindex(object): @@ -773,15 +833,15 @@ def setup(self): self.idx1 = date_range(start='20140101', freq='T', periods=self.N) self.idx2 = period_range(start='20140101', freq='T', periods=self.N) - def iter_n(iterable, n=None): - self.i = 0 - for _ in iterable: - self.i += 1 - if ((n is not None) and (self.i > n)): - break - def time_timeseries_iter_periodindex(self): - iter_n(self.idx2) + self.iter_n(self.idx2) + + def iter_n(self, iterable, n=None): + self.i = 0 + for _ in iterable: + self.i += 1 + if ((n is not None) and (self.i > n)): + break class timeseries_iter_periodindex_preexit(object): @@ -798,15 +858,15 @@ def setup(self): self.idx1 = date_range(start='20140101', freq='T', periods=self.N) self.idx2 = period_range(start='20140101', freq='T', periods=self.N) - def iter_n(iterable, n=None): - self.i = 0 - for _ in iterable: - self.i += 1 - if ((n is not None) and (self.i > n)): - break - def time_timeseries_iter_periodindex_preexit(self): - iter_n(self.idx2, self.M) + self.iter_n(self.idx2, self.M) + + def iter_n(self, iterable, n=None): + self.i = 0 + for _ in iterable: + self.i += 1 + if ((n is not None) and (self.i > n)): + break class timeseries_large_lookup_value(object): @@ -859,6 +919,63 @@ def time_timeseries_resample_datetime64(self): self.ts.resample('1S', how='last') +class timeseries_series_offset_delta(object): + goal_time = 0.2 + + def setup(self): + self.N = 100000 + self.rng = date_range(start='1/1/2000', periods=self.N, freq='T') + if hasattr(Series, 'convert'): + Series.resample = Series.convert + self.ts = Series(np.random.randn(self.N), index=self.rng) + self.N = 100000 + self.s = Series(date_range(start='20140101', freq='T', periods=self.N)) + self.delta_offset = pd.offsets.Day() + self.fast_offset = pd.offsets.DateOffset(months=2, days=2) + self.slow_offset = pd.offsets.BusinessDay() + + def time_timeseries_series_offset_delta(self): + (self.s + self.delta_offset) + + +class timeseries_series_offset_fast(object): + goal_time = 0.2 + + def setup(self): + self.N = 100000 + self.rng = date_range(start='1/1/2000', periods=self.N, freq='T') + if hasattr(Series, 'convert'): + Series.resample = Series.convert + self.ts = Series(np.random.randn(self.N), index=self.rng) + self.N = 100000 + self.s = Series(date_range(start='20140101', freq='T', periods=self.N)) + self.delta_offset = pd.offsets.Day() + self.fast_offset = pd.offsets.DateOffset(months=2, days=2) + self.slow_offset = pd.offsets.BusinessDay() + + def time_timeseries_series_offset_fast(self): + (self.s + self.fast_offset) + + +class timeseries_series_offset_slow(object): + goal_time = 0.2 + + def setup(self): + self.N = 100000 + self.rng = date_range(start='1/1/2000', periods=self.N, freq='T') + if hasattr(Series, 'convert'): + Series.resample = Series.convert + self.ts = Series(np.random.randn(self.N), index=self.rng) + self.N = 100000 + self.s = Series(date_range(start='20140101', freq='T', periods=self.N)) + self.delta_offset = pd.offsets.Day() + self.fast_offset = pd.offsets.DateOffset(months=2, days=2) + self.slow_offset = pd.offsets.BusinessDay() + + def time_timeseries_series_offset_slow(self): + (self.s + self.slow_offset) + + class timeseries_slice_minutely(object): goal_time = 0.2 diff --git a/asv_bench/vbench_to_asv.py b/asv_bench/vbench_to_asv.py index b3980ffed1a57..c3041ec2b1ba1 100644 --- a/asv_bench/vbench_to_asv.py +++ b/asv_bench/vbench_to_asv.py @@ -43,7 +43,29 @@ def __init__(self): def visit_ClassDef(self, node): self.transforms = {} self.in_class_define = True + + functions_to_promote = [] + setup_func = None + + for class_func in ast.iter_child_nodes(node): + if isinstance(class_func, ast.FunctionDef): + if class_func.name == 'setup': + setup_func = class_func + for anon_func in ast.iter_child_nodes(class_func): + if isinstance(anon_func, ast.FunctionDef): + functions_to_promote.append(anon_func) + + if setup_func: + for func in functions_to_promote: + setup_func.body.remove(func) + func.args.args.insert(0, ast.Name(id='self', ctx=ast.Load())) + node.body.append(func) + self.transforms[func.name] = 'self.' + func.name + + ast.fix_missing_locations(node) + self.generic_visit(node) + return node def visit_TryExcept(self, node): @@ -81,18 +103,8 @@ def visit_FunctionDef(self, node): """Delete functions that are empty due to imports being moved""" self.in_class_define = False - if self.in_setup: - node.col_offset -= 4 - ast.increment_lineno(node, -1) - - if node.name == 'setup': - self.in_setup = True - self.generic_visit(node) - if node.name == 'setup': - self.in_setup = False - if node.body: return node diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 4ec2258df56f2..2c9b6a0a889f4 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -512,9 +512,49 @@ entire suite. This is done using one of the following constructs: Running the performance test suite ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Performance matters and it is worth considering that your code has not introduced +performance regressions. *pandas* is in the process of migrating to the +`asv library `__ +to enable easy monitoring of the performance of critical *pandas* operations. +These benchmarks are all found in the ``pandas/asv_bench`` directory. *asv* +supports both python2 and python3. + +.. note:: + + The *asv* benchmark suite was translated from the previous framework, vbench, + so many stylistic issues are likely a result of automated transformation of the + code. + +To install asv:: + + pip install git+https://github.com/spacetelescope/asv + +If you need to run a benchmark, change your directory to asv_bench/ and run +the following if you have been developing on master:: + + asv continuous master + +Otherwise, if you are working on another branch, either of the following can be used:: + + asv continuous master HEAD + asv continuous master your_branch + +This will checkout the master revision and run the suite on both master and +your commit. Running the full test suite can take up to one hour and use up +to 3GB of RAM. Usually it is sufficient to paste a subset of the results in +to the Pull Request to show that the committed changes do not cause unexpected +performance regressions. + +You can run specific benchmarks using the *-b* flag which takes a regular expression. + +Information on how to write a benchmark can be found in +`*asv*'s documentation http://asv.readthedocs.org/en/latest/writing_benchmarks.html`. + +Running the vbench performance test suite (phasing out) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Performance matters and it is worth considering that your code has not introduced -performance regressions. Currently *pandas* uses the `vbench library `__ +performance regressions. Historically, *pandas* used `vbench library `__ to enable easy monitoring of the performance of critical *pandas* operations. These benchmarks are all found in the ``pandas/vb_suite`` directory. vbench currently only works on python2. @@ -530,7 +570,7 @@ using pip. If you need to run a benchmark, change your directory to the *pandas This will checkout the master revision and run the suite on both master and your commit. Running the full test suite can take up to one hour and use up -to 3GB of RAM. Usually it is sufficient to past a subset of the results in +to 3GB of RAM. Usually it is sufficient to paste a subset of the results in to the Pull Request to show that the committed changes do not cause unexpected performance regressions. diff --git a/vb_suite/attrs_caching.py b/vb_suite/attrs_caching.py index e196546e632fe..a7e3ed7094ed6 100644 --- a/vb_suite/attrs_caching.py +++ b/vb_suite/attrs_caching.py @@ -1,6 +1,6 @@ from vbench.benchmark import Benchmark -common_setup = """from pandas_vb_common import * +common_setup = """from .pandas_vb_common import * """ #---------------------------------------------------------------------- diff --git a/vb_suite/binary_ops.py b/vb_suite/binary_ops.py index cd8d1ad93b6e1..4c74688ce660e 100644 --- a/vb_suite/binary_ops.py +++ b/vb_suite/binary_ops.py @@ -1,7 +1,7 @@ from vbench.benchmark import Benchmark from datetime import datetime -common_setup = """from pandas_vb_common import * +common_setup = """from .pandas_vb_common import * """ SECTION = 'Binary ops' diff --git a/vb_suite/categoricals.py b/vb_suite/categoricals.py index cb33f1bb6c0b1..a08d479df20cb 100644 --- a/vb_suite/categoricals.py +++ b/vb_suite/categoricals.py @@ -1,7 +1,7 @@ from vbench.benchmark import Benchmark from datetime import datetime -common_setup = """from pandas_vb_common import * +common_setup = """from .pandas_vb_common import * """ #---------------------------------------------------------------------- diff --git a/vb_suite/ctors.py b/vb_suite/ctors.py index 6af8e65b8f57d..8123322383f0a 100644 --- a/vb_suite/ctors.py +++ b/vb_suite/ctors.py @@ -1,7 +1,7 @@ from vbench.benchmark import Benchmark from datetime import datetime -common_setup = """from pandas_vb_common import * +common_setup = """from .pandas_vb_common import * """ #---------------------------------------------------------------------- diff --git a/vb_suite/eval.py b/vb_suite/eval.py index a350cdc54cd17..bf80aad956184 100644 --- a/vb_suite/eval.py +++ b/vb_suite/eval.py @@ -1,7 +1,7 @@ from vbench.benchmark import Benchmark from datetime import datetime -common_setup = """from pandas_vb_common import * +common_setup = """from .pandas_vb_common import * import pandas as pd df = DataFrame(np.random.randn(20000, 100)) df2 = DataFrame(np.random.randn(20000, 100)) @@ -112,7 +112,7 @@ start_date=datetime(2013, 7, 26)) -common_setup = """from pandas_vb_common import * +common_setup = """from .pandas_vb_common import * """ setup = common_setup + """ diff --git a/vb_suite/frame_ctor.py b/vb_suite/frame_ctor.py index 8ad63fc556c2e..0d57da7b88d3b 100644 --- a/vb_suite/frame_ctor.py +++ b/vb_suite/frame_ctor.py @@ -5,7 +5,7 @@ except: import pandas.core.datetools as offsets -common_setup = """from pandas_vb_common import * +common_setup = """from .pandas_vb_common import * try: from pandas.tseries.offsets import * except: @@ -40,7 +40,7 @@ # nested dict, integer indexes, regression described in #621 setup = common_setup + """ -data = dict((i,dict((j,float(j)) for j in xrange(100))) for i in xrange(2000)) +data = dict((i,dict((j,float(j)) for j in range(100))) for i in xrange(2000)) """ frame_ctor_nested_dict_int64 = Benchmark("DataFrame(data)", setup) diff --git a/vb_suite/frame_methods.py b/vb_suite/frame_methods.py index ce5109efe8f6d..46343e9c607fd 100644 --- a/vb_suite/frame_methods.py +++ b/vb_suite/frame_methods.py @@ -1,7 +1,7 @@ from vbench.api import Benchmark from datetime import datetime -common_setup = """from pandas_vb_common import * +common_setup = """from .pandas_vb_common import * """ #---------------------------------------------------------------------- @@ -98,11 +98,11 @@ def g(): pass def h(): - for i in xrange(10000): + for i in range(10000): df2['A'] def j(): - for i in xrange(10000): + for i in range(10000): df3[0] """ @@ -126,8 +126,8 @@ def j(): setup = common_setup + """ idx = date_range('1/1/2000', periods=100000, freq='D') df = DataFrame(randn(100000, 1),columns=['A'],index=idx) -def f(x): - x = x.copy() +def f(df): + x = df.copy() x['date'] = x.index """ @@ -494,7 +494,7 @@ def test_unequal(name): setup = common_setup + """ def get_data(n=100000): - return ((x, x*20, x*100) for x in xrange(n)) + return ((x, x*20, x*100) for x in range(n)) """ frame_from_records_generator = Benchmark('df = DataFrame.from_records(get_data())', diff --git a/vb_suite/gil.py b/vb_suite/gil.py index d5aec7c3e2917..df2bd2dcd8db4 100644 --- a/vb_suite/gil.py +++ b/vb_suite/gil.py @@ -1,11 +1,20 @@ from vbench.api import Benchmark from datetime import datetime -common_setup = """from pandas_vb_common import * +common_setup = """from .pandas_vb_common import * """ basic = common_setup + """ -from pandas.util.testing import test_parallel +try: + from pandas.util.testing import test_parallel + have_real_test_parallel = True +except ImportError: + have_real_test_parallel = False + def test_parallel(num_threads=1): + def wrapper(fname): + return fname + + return wrapper N = 1000000 ngroups = 1000 @@ -13,6 +22,9 @@ df = DataFrame({'key' : np.random.randint(0,ngroups,size=N), 'data' : np.random.randn(N) }) + +if not have_real_test_parallel: + raise NotImplementedError """ setup = basic + """ diff --git a/vb_suite/groupby.py b/vb_suite/groupby.py index 3e3b0241545e5..bc21372225322 100644 --- a/vb_suite/groupby.py +++ b/vb_suite/groupby.py @@ -1,14 +1,14 @@ from vbench.api import Benchmark from datetime import datetime -common_setup = """from pandas_vb_common import * +common_setup = """from .pandas_vb_common import * """ setup = common_setup + """ N = 100000 ngroups = 100 -def get_test_data(ngroups=100, n=N): +def get_test_data(ngroups=100, n=100000): unique_groups = range(ngroups) arr = np.asarray(np.tile(unique_groups, n / ngroups), dtype=object) @@ -429,16 +429,16 @@ def f(g): security_ids = map(lambda x: hex(x)[2:10].upper(), range(secid_min, secid_max + 1, step)) data_index = MultiIndex(levels=[dates.values, security_ids], - labels=[[i for i in xrange(n_dates) for _ in xrange(n_securities)], range(n_securities) * n_dates], + labels=[[i for i in range(n_dates) for _ in xrange(n_securities)], range(n_securities) * n_dates], names=['date', 'security_id']) n_data = len(data_index) -columns = Index(['factor{}'.format(i) for i in xrange(1, n_columns + 1)]) +columns = Index(['factor{}'.format(i) for i in range(1, n_columns + 1)]) data = DataFrame(np.random.randn(n_data, n_columns), index=data_index, columns=columns) step = int(n_data * share_na) -for column_index in xrange(n_columns): +for column_index in range(n_columns): index = column_index while index < n_data: data.set_value(data_index[index], columns[column_index], np.nan) diff --git a/vb_suite/hdfstore_bench.py b/vb_suite/hdfstore_bench.py index a822ad1c614be..393fd4cc77e66 100644 --- a/vb_suite/hdfstore_bench.py +++ b/vb_suite/hdfstore_bench.py @@ -3,7 +3,7 @@ start_date = datetime(2012, 7, 1) -common_setup = """from pandas_vb_common import * +common_setup = """from .pandas_vb_common import * import os f = '__test__.h5' @@ -234,8 +234,8 @@ def remove(f): # select from a panel table setup13 = common_setup + """ -p = Panel(randn(20, 1000, 25), items= [ 'Item%03d' % i for i in xrange(20) ], - major_axis=date_range('1/1/2000', periods=1000), minor_axis = [ 'E%03d' % i for i in xrange(25) ]) +p = Panel(randn(20, 1000, 25), items= [ 'Item%03d' % i for i in range(20) ], + major_axis=date_range('1/1/2000', periods=1000), minor_axis = [ 'E%03d' % i for i in range(25) ]) remove(f) store = HDFStore(f) @@ -251,8 +251,8 @@ def remove(f): # write to a panel table setup14 = common_setup + """ -p = Panel(randn(20, 1000, 25), items= [ 'Item%03d' % i for i in xrange(20) ], - major_axis=date_range('1/1/2000', periods=1000), minor_axis = [ 'E%03d' % i for i in xrange(25) ]) +p = Panel(randn(20, 1000, 25), items= [ 'Item%03d' % i for i in range(20) ], + major_axis=date_range('1/1/2000', periods=1000), minor_axis = [ 'E%03d' % i for i in range(25) ]) remove(f) store = HDFStore(f) @@ -266,7 +266,7 @@ def remove(f): # write to a table (data_columns) setup15 = common_setup + """ -df = DataFrame(np.random.randn(10000,10),columns = [ 'C%03d' % i for i in xrange(10) ]) +df = DataFrame(np.random.randn(10000,10),columns = [ 'C%03d' % i for i in range(10) ]) remove(f) store = HDFStore(f) diff --git a/vb_suite/index_object.py b/vb_suite/index_object.py index 768eb2658af8f..2ab2bc15f3853 100644 --- a/vb_suite/index_object.py +++ b/vb_suite/index_object.py @@ -4,7 +4,7 @@ SECTION = "Index / MultiIndex objects" -common_setup = """from pandas_vb_common import * +common_setup = """from .pandas_vb_common import * """ #---------------------------------------------------------------------- @@ -111,7 +111,7 @@ # setup = common_setup + """ -iterables = [tm.makeStringIndex(10000), xrange(20)] +iterables = [tm.makeStringIndex(10000), range(20)] """ multiindex_from_product = Benchmark('MultiIndex.from_product(iterables)', diff --git a/vb_suite/indexing.py b/vb_suite/indexing.py index f2236c48fb002..3d95d52dccd71 100644 --- a/vb_suite/indexing.py +++ b/vb_suite/indexing.py @@ -3,7 +3,7 @@ SECTION = 'Indexing and scalar value access' -common_setup = """from pandas_vb_common import * +common_setup = """from .pandas_vb_common import * """ #---------------------------------------------------------------------- @@ -140,7 +140,13 @@ start_date=datetime(2012, 1, 1)) setup = common_setup + """ -import pandas.computation.expressions as expr +try: + import pandas.computation.expressions as expr +except: + expr = None + +if expr is None: + raise NotImplementedError df = DataFrame(np.random.randn(50000, 100)) df2 = DataFrame(np.random.randn(50000, 100)) expr.set_numexpr_threads(1) @@ -152,7 +158,13 @@ setup = common_setup + """ -import pandas.computation.expressions as expr +try: + import pandas.computation.expressions as expr +except: + expr = None + +if expr is None: + raise NotImplementedError df = DataFrame(np.random.randn(50000, 100)) df2 = DataFrame(np.random.randn(50000, 100)) expr.set_use_numexpr(False) diff --git a/vb_suite/inference.py b/vb_suite/inference.py index 8855f7e654bb1..aaa51aa5163ce 100644 --- a/vb_suite/inference.py +++ b/vb_suite/inference.py @@ -4,7 +4,7 @@ # from GH 7332 -setup = """from pandas_vb_common import * +setup = """from .pandas_vb_common import * import pandas as pd N = 500000 df_int64 = DataFrame(dict(A = np.arange(N,dtype='int64'), B = np.arange(N,dtype='int64'))) diff --git a/vb_suite/io_bench.py b/vb_suite/io_bench.py index 483d61387898d..af5f6076515cc 100644 --- a/vb_suite/io_bench.py +++ b/vb_suite/io_bench.py @@ -1,8 +1,8 @@ from vbench.api import Benchmark from datetime import datetime -common_setup = """from pandas_vb_common import * -from StringIO import StringIO +common_setup = """from .pandas_vb_common import * +from io import StringIO """ #---------------------------------------------------------------------- @@ -77,7 +77,7 @@ from pandas import concat, Timestamp def create_cols(name): - return [ "%s%03d" % (name,i) for i in xrange(5) ] + return [ "%s%03d" % (name,i) for i in range(5) ] df_float = DataFrame(np.random.randn(5000, 5),dtype='float64',columns=create_cols('float')) df_int = DataFrame(np.random.randn(5000, 5),dtype='int64',columns=create_cols('int')) df_bool = DataFrame(True,index=df_float.index,columns=create_cols('bool')) diff --git a/vb_suite/io_sql.py b/vb_suite/io_sql.py index 7f580165939bb..ba8367e7e356b 100644 --- a/vb_suite/io_sql.py +++ b/vb_suite/io_sql.py @@ -1,7 +1,7 @@ from vbench.api import Benchmark from datetime import datetime -common_setup = """from pandas_vb_common import * +common_setup = """from .pandas_vb_common import * import sqlite3 import sqlalchemy from sqlalchemy import create_engine diff --git a/vb_suite/join_merge.py b/vb_suite/join_merge.py index 244c6abe71b05..238a129552e90 100644 --- a/vb_suite/join_merge.py +++ b/vb_suite/join_merge.py @@ -1,7 +1,7 @@ from vbench.benchmark import Benchmark from datetime import datetime -common_setup = """from pandas_vb_common import * +common_setup = """from .pandas_vb_common import * """ setup = common_setup + """ diff --git a/vb_suite/miscellaneous.py b/vb_suite/miscellaneous.py index 27efadc7acfe0..da2c736e79ea7 100644 --- a/vb_suite/miscellaneous.py +++ b/vb_suite/miscellaneous.py @@ -1,7 +1,7 @@ from vbench.benchmark import Benchmark from datetime import datetime -common_setup = """from pandas_vb_common import * +common_setup = """from .pandas_vb_common import * """ #---------------------------------------------------------------------- diff --git a/vb_suite/packers.py b/vb_suite/packers.py index 60738a62bd287..69ec10822b392 100644 --- a/vb_suite/packers.py +++ b/vb_suite/packers.py @@ -3,7 +3,7 @@ start_date = datetime(2013, 5, 1) -common_setup = """from pandas_vb_common import * +common_setup = """from .pandas_vb_common import * import os import pandas as pd from pandas.core import common as com diff --git a/vb_suite/pandas_vb_common.py b/vb_suite/pandas_vb_common.py index 128e262d45d66..a1326d63a112a 100644 --- a/vb_suite/pandas_vb_common.py +++ b/vb_suite/pandas_vb_common.py @@ -7,6 +7,10 @@ import pandas.util.testing as tm import random import numpy as np +try: + from pandas.compat import range +except ImportError: + pass np.random.seed(1234) try: diff --git a/vb_suite/panel_ctor.py b/vb_suite/panel_ctor.py index b6637bb1e61ec..9f497e7357a61 100644 --- a/vb_suite/panel_ctor.py +++ b/vb_suite/panel_ctor.py @@ -1,7 +1,7 @@ from vbench.benchmark import Benchmark from datetime import datetime -common_setup = """from pandas_vb_common import * +common_setup = """from .pandas_vb_common import * """ #---------------------------------------------------------------------- @@ -14,7 +14,7 @@ dr = np.asarray(DatetimeIndex(start=datetime(1990,1,1), end=datetime(2012,1,1), freq=datetools.Day(1))) data_frames = {} -for x in xrange(100): +for x in range(100): df = DataFrame({"a": [0]*len(dr), "b": [1]*len(dr), "c": [2]*len(dr)}, index=dr) data_frames[x] = df @@ -27,7 +27,7 @@ setup_equiv_indexes = common_setup + """ data_frames = {} -for x in xrange(100): +for x in range(100): dr = np.asarray(DatetimeIndex(start=datetime(1990,1,1), end=datetime(2012,1,1), freq=datetools.Day(1))) df = DataFrame({"a": [0]*len(dr), "b": [1]*len(dr), @@ -44,7 +44,7 @@ data_frames = {} start = datetime(1990,1,1) end = datetime(2012,1,1) -for x in xrange(100): +for x in range(100): end += timedelta(days=1) dr = np.asarray(date_range(start, end)) df = DataFrame({"a": [0]*len(dr), "b": [1]*len(dr), @@ -61,7 +61,7 @@ data_frames = {} start = datetime(1990,1,1) end = datetime(2012,1,1) -for x in xrange(100): +for x in range(100): if x == 50: end += timedelta(days=1) dr = np.asarray(date_range(start, end)) diff --git a/vb_suite/panel_methods.py b/vb_suite/panel_methods.py index 5e88671a23707..28586422a66e3 100644 --- a/vb_suite/panel_methods.py +++ b/vb_suite/panel_methods.py @@ -1,7 +1,7 @@ from vbench.api import Benchmark from datetime import datetime -common_setup = """from pandas_vb_common import * +common_setup = """from .pandas_vb_common import * """ #---------------------------------------------------------------------- diff --git a/vb_suite/parser_vb.py b/vb_suite/parser_vb.py index 96da3fac2de5e..bb9ccbdb5e854 100644 --- a/vb_suite/parser_vb.py +++ b/vb_suite/parser_vb.py @@ -1,7 +1,7 @@ from vbench.api import Benchmark from datetime import datetime -common_setup = """from pandas_vb_common import * +common_setup = """from .pandas_vb_common import * from pandas import read_csv, read_table """ @@ -44,7 +44,11 @@ start_date=datetime(2011, 11, 1)) setup = common_setup + """ -from cStringIO import StringIO +try: + from cStringIO import StringIO +except ImportError: + from io import StringIO + import os N = 10000 K = 8 @@ -63,7 +67,11 @@ read_table_multiple_date = Benchmark(cmd, setup, start_date=sdate) setup = common_setup + """ -from cStringIO import StringIO +try: + from cStringIO import StringIO +except ImportError: + from io import StringIO + import os N = 10000 K = 8 @@ -81,7 +89,11 @@ read_table_multiple_date_baseline = Benchmark(cmd, setup, start_date=sdate) setup = common_setup + """ -from cStringIO import StringIO +try: + from cStringIO import StringIO +except ImportError: + from io import StringIO + data = '''\ 0.1213700904466425978256438611,0.0525708283766902484401839501,0.4174092731488769913994474336 0.4096341697147408700274695547,0.1587830198973579909349496119,0.1292545832485494372576795285 diff --git a/vb_suite/plotting.py b/vb_suite/plotting.py index 88d272e7be4b3..79e81e9eea8f4 100644 --- a/vb_suite/plotting.py +++ b/vb_suite/plotting.py @@ -1,7 +1,7 @@ from vbench.benchmark import Benchmark from datetime import datetime -common_setup = """from pandas_vb_common import * +common_setup = """from .pandas_vb_common import * try: from pandas import date_range diff --git a/vb_suite/reindex.py b/vb_suite/reindex.py index 07f0e0f7e1bff..443eb43835745 100644 --- a/vb_suite/reindex.py +++ b/vb_suite/reindex.py @@ -1,7 +1,7 @@ from vbench.benchmark import Benchmark from datetime import datetime -common_setup = """from pandas_vb_common import * +common_setup = """from .pandas_vb_common import * """ #---------------------------------------------------------------------- diff --git a/vb_suite/replace.py b/vb_suite/replace.py index 23d41e7c8e632..9326aa5becca9 100644 --- a/vb_suite/replace.py +++ b/vb_suite/replace.py @@ -1,7 +1,7 @@ from vbench.api import Benchmark from datetime import datetime -common_setup = """from pandas_vb_common import * +common_setup = """from .pandas_vb_common import * from datetime import timedelta N = 1000000 @@ -15,7 +15,7 @@ ts = Series(np.random.randn(N), index=rng) """ -large_dict_setup = """from pandas_vb_common import * +large_dict_setup = """from .pandas_vb_common import * from pandas.compat import range n = 10 ** 6 start_value = 10 ** 5 diff --git a/vb_suite/reshape.py b/vb_suite/reshape.py index f6eaeb353acb5..daab96103f2c5 100644 --- a/vb_suite/reshape.py +++ b/vb_suite/reshape.py @@ -1,7 +1,7 @@ from vbench.api import Benchmark from datetime import datetime -common_setup = """from pandas_vb_common import * +common_setup = """from .pandas_vb_common import * index = MultiIndex.from_arrays([np.arange(100).repeat(100), np.roll(np.tile(np.arange(100), 100), 25)]) df = DataFrame(np.random.randn(10000, 4), index=index) diff --git a/vb_suite/series_methods.py b/vb_suite/series_methods.py index d0c31cb04ca6a..cd8688495fa09 100644 --- a/vb_suite/series_methods.py +++ b/vb_suite/series_methods.py @@ -1,7 +1,7 @@ from vbench.api import Benchmark from datetime import datetime -common_setup = """from pandas_vb_common import * +common_setup = """from .pandas_vb_common import * """ setup = common_setup + """ diff --git a/vb_suite/sparse.py b/vb_suite/sparse.py index 5da06451fe2d1..53e2778ee0865 100644 --- a/vb_suite/sparse.py +++ b/vb_suite/sparse.py @@ -1,7 +1,7 @@ from vbench.benchmark import Benchmark from datetime import datetime -common_setup = """from pandas_vb_common import * +common_setup = """from .pandas_vb_common import * """ #---------------------------------------------------------------------- diff --git a/vb_suite/stat_ops.py b/vb_suite/stat_ops.py index 544ad6d00ed37..8d7c30dc9fdcf 100644 --- a/vb_suite/stat_ops.py +++ b/vb_suite/stat_ops.py @@ -1,7 +1,7 @@ from vbench.benchmark import Benchmark from datetime import datetime -common_setup = """from pandas_vb_common import * +common_setup = """from .pandas_vb_common import * """ #---------------------------------------------------------------------- diff --git a/vb_suite/strings.py b/vb_suite/strings.py index f229e0ddedbae..0948df5673a0d 100644 --- a/vb_suite/strings.py +++ b/vb_suite/strings.py @@ -1,6 +1,6 @@ from vbench.api import Benchmark -common_setup = """from pandas_vb_common import * +common_setup = """from .pandas_vb_common import * """ setup = common_setup + """ @@ -9,11 +9,11 @@ def make_series(letters, strlen, size): return Series( - np.fromiter(IT.cycle(letters), count=size*strlen, dtype='|S1') - .view('|S{}'.format(strlen))) + [str(x) for x in np.fromiter(IT.cycle(letters), count=size*strlen, dtype='|S1') + .view('|S{}'.format(strlen))]) -many = make_series('matchthis'+string.uppercase, strlen=19, size=10000) # 31% matches -few = make_series('matchthis'+string.uppercase*42, strlen=19, size=10000) # 1% matches +many = make_series('matchthis'+string.ascii_uppercase, strlen=19, size=10000) # 31% matches +few = make_series('matchthis'+string.ascii_uppercase*42, strlen=19, size=10000) # 1% matches """ strings_cat = Benchmark("many.str.cat(sep=',')", setup) @@ -47,7 +47,7 @@ def make_series(letters, strlen, size): strings_get = Benchmark("many.str.get(0)", setup) setup = setup + """ -s = make_series(string.uppercase, strlen=10, size=10000).str.join('|') +s = make_series(string.ascii_uppercase, strlen=10, size=10000).str.join('|') """ strings_get_dummies = Benchmark("s.str.get_dummies('|')", setup) diff --git a/vb_suite/suite.py b/vb_suite/suite.py index ca7a4a9b70836..70a6278c0852d 100644 --- a/vb_suite/suite.py +++ b/vb_suite/suite.py @@ -136,7 +136,7 @@ def generate_rst_files(benchmarks): These historical benchmark graphs were produced with `vbench `__. -The ``pandas_vb_common`` setup script can be found here_ +The ``.pandas_vb_common`` setup script can be found here_ .. _here: https://github.com/pydata/pandas/tree/master/vb_suite diff --git a/vb_suite/timedelta.py b/vb_suite/timedelta.py index febd70739b2c9..378968ea1379a 100644 --- a/vb_suite/timedelta.py +++ b/vb_suite/timedelta.py @@ -1,7 +1,7 @@ from vbench.api import Benchmark from datetime import datetime -common_setup = """from pandas_vb_common import * +common_setup = """from .pandas_vb_common import * from pandas import to_timedelta """ diff --git a/vb_suite/timeseries.py b/vb_suite/timeseries.py index 6a99bd0dfdc65..7e10b333d5c56 100644 --- a/vb_suite/timeseries.py +++ b/vb_suite/timeseries.py @@ -11,7 +11,7 @@ def date_range(start=None, end=None, periods=None, freq=None): return DatetimeIndex(start=start, end=end, periods=periods, offset=freq) -common_setup = """from pandas_vb_common import * +common_setup = """from .pandas_vb_common import * from datetime import timedelta N = 100000 @@ -312,7 +312,10 @@ def date_range(start=None, end=None, periods=None, freq=None): setup = common_setup + """ import datetime as dt import pandas as pd -import pandas.tseries.holiday +try: + import pandas.tseries.holiday +except ImportError: + pass import numpy as np date = dt.datetime(2011,1,1) @@ -417,9 +420,9 @@ def iter_n(iterable, n=None): setup = common_setup + """ N = 100000 idx1 = date_range(start='20140101', freq='T', periods=N) -delta_offset = Day() -fast_offset = DateOffset(months=2, days=2) -slow_offset = offsets.BusinessDay() +delta_offset = pd.offsets.Day() +fast_offset = pd.offsets.DateOffset(months=2, days=2) +slow_offset = pd.offsets.BusinessDay() """ @@ -431,9 +434,9 @@ def iter_n(iterable, n=None): setup = common_setup + """ N = 100000 s = Series(date_range(start='20140101', freq='T', periods=N)) -delta_offset = Day() -fast_offset = DateOffset(months=2, days=2) -slow_offset = offsets.BusinessDay() +delta_offset = pd.offsets.Day() +fast_offset = pd.offsets.DateOffset(months=2, days=2) +slow_offset = pd.offsets.BusinessDay() """