From 010e80dd5646d6820d947bdede9c7f9ab599e9ac Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 13 Nov 2017 18:04:55 -0800 Subject: [PATCH 1/5] Cleanup/parametrize asv --- asv_bench/benchmarks/period.py | 146 ++++++++++++++---------------- asv_bench/benchmarks/timestamp.py | 35 ++++--- 2 files changed, 88 insertions(+), 93 deletions(-) diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py index df3c2bf3e4b46..c089c1a625677 100644 --- a/asv_bench/benchmarks/period.py +++ b/asv_bench/benchmarks/period.py @@ -3,84 +3,11 @@ class PeriodProperties(object): - def setup(self): - self.per = Period('2012-06-01', freq='M') - - def time_year(self): - self.per.year - - def time_month(self): - self.per.month - - def time_quarter(self): - self.per.quarter + params = ['M', 'min'] + param_names = ['freq'] - def time_day(self): - self.per.day - - def time_hour(self): - self.per.hour - - def time_minute(self): - self.per.second - - def time_second(self): - self.per.second - - def time_leap_year(self): - self.per.is_leapyear - - -class Constructor(object): - goal_time = 0.2 - - def setup(self): - self.rng = date_range('1985', periods=1000) - self.rng2 = date_range('1985', periods=1000).to_pydatetime() - - def time_from_date_range(self): - PeriodIndex(self.rng, freq='D') - - def time_from_pydatetime(self): - PeriodIndex(self.rng2, freq='D') - - -class DataFrame(object): - goal_time = 0.2 - - def setup(self): - self.rng = pd.period_range(start='1/1/1990', freq='S', periods=20000) - self.df = pd.DataFrame(index=range(len(self.rng))) - - def time_setitem_period_column(self): - self.df['col'] = self.rng - - -class Algorithms(object): - goal_time = 0.2 - - def setup(self): - data = [Period('2011-01', freq='M'), Period('2011-02', freq='M'), - Period('2011-03', freq='M'), Period('2011-04', freq='M')] - self.s = Series(data * 1000) - self.i = PeriodIndex(data, freq='M') - - def time_drop_duplicates_pseries(self): - self.s.drop_duplicates() - - def time_drop_duplicates_pindex(self): - self.i.drop_duplicates() - - def time_value_counts_pseries(self): - self.s.value_counts() - - def time_value_counts_pindex(self): - self.i.value_counts() - - -class Properties(object): - def setup(self): - self.per = Period('2017-09-06 08:28', freq='min') + def setup(self, freq): + self.per = Period('2012-06-01', freq=freq) def time_year(self): self.per.year @@ -101,7 +28,7 @@ def time_second(self): self.per.second def time_is_leap_year(self): - self.per.is_leap_year + self.per.is_leapyear def time_quarter(self): self.per.quarter @@ -137,7 +64,68 @@ def time_asfreq(): self.per.asfreq('A') -class period_standard_indexing(object): +class PeriodIndexConstructor(object): + goal_time = 0.2 + + params = ['D'] + param_names = ['freq'] + + def setup(self, freq): + self.freq = freq + self.rng = date_range('1985', periods=1000) + self.rng2 = date_range('1985', periods=1000).to_pydatetime() + + def time_from_date_range(self): + PeriodIndex(self.rng, freq=self.freq) + + def time_from_pydatetime(self): + PeriodIndex(self.rng2, freq=self.freq) + + +class DataFramePeriodColumn(object): + goal_time = 0.2 + + def setup_cache(self): + rng = pd.period_range(start='1/1/1990', freq='S', periods=20000) + df = pd.DataFrame(index=range(len(rng))) + return rng, df + + def time_setitem_period_column(self, tup): + rng, df = tup + df['col'] = rng + + +class PeriodIndexAlgorithms(object): + goal_time = 0.2 + + def setup(self): + data = [Period('2011-01', freq='M'), Period('2011-02', freq='M'), + Period('2011-03', freq='M'), Period('2011-04', freq='M')] + self.index = PeriodIndex(data, freq='M') + + def time_drop_duplicates(self): + self.index.drop_duplicates() + + def time_value_counts(self): + self.index.value_counts() + + +class PeriodSeriesAlgorithms(object): + goal_time = 0.2 + + def setup(self): + data = [Period('2011-01', freq='M'), Period('2011-02', freq='M'), + Period('2011-03', freq='M'), Period('2011-04', freq='M')] + self.series = Series(data * 1000) + + def time_drop_duplicates(self): + self.series.drop_duplicates() + + def time_value_counts(self): + self.series.value_counts() + + +class PeriodStandardIndexing(object): goal_time = 0.2 def setup(self): diff --git a/asv_bench/benchmarks/timestamp.py b/asv_bench/benchmarks/timestamp.py index e8cb4c9d1c75b..b8ef309e6a464 100644 --- a/asv_bench/benchmarks/timestamp.py +++ b/asv_bench/benchmarks/timestamp.py @@ -7,8 +7,11 @@ class TimestampProperties(object): goal_time = 0.2 - def setup(self): - self.ts = Timestamp('2017-08-25 08:16:14') + params = [None, pytz.timezone('Europe/Amsterdam')] + param_names = ['tz'] + + def setup(self, tz): + self.ts = Timestamp('2017-08-25 08:16:14', tzinfo=tz) def time_tz(self): self.ts.tz @@ -65,25 +68,29 @@ def time_microsecond(self): class TimestampOps(object): goal_time = 0.2 - def setup(self): - self.ts = Timestamp('2017-08-25 08:16:14') - self.ts_tz = Timestamp('2017-08-25 08:16:14', tz='US/Eastern') + params = [None, 'US/Eastern'] + param_names = ['tz'] - dt = datetime.datetime(2016, 3, 27, 1) - self.tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo - self.ts2 = Timestamp(dt) + def setup(self, tz): + self.ts = Timestamp('2017-08-25 08:16:14', tz=tz) def time_replace_tz(self): self.ts.replace(tzinfo=pytz.timezone('US/Eastern')) - def time_replace_across_dst(self): - self.ts2.replace(tzinfo=self.tzinfo) - def time_replace_None(self): - self.ts_tz.replace(tzinfo=None) + self.ts.replace(tzinfo=None) def time_to_pydatetime(self): self.ts.to_pydatetime() - def time_to_pydatetime_tz(self): - self.ts_tz.to_pydatetime() + +class TimestampAcrossDst(object): + goal_time = 0.2 + + def setup(self): + dt = datetime.datetime(2016, 3, 27, 1) + self.tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo + self.ts2 = Timestamp(dt) + + def time_replace_across_dst(self): + self.ts2.replace(tzinfo=self.tzinfo) From b6551f7c1d9f54b127d1f518e0e3dc7c966284df Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 13 Nov 2017 18:24:47 -0800 Subject: [PATCH 2/5] Fix typo --- asv_bench/benchmarks/timedelta.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/asv_bench/benchmarks/timedelta.py b/asv_bench/benchmarks/timedelta.py index 0f8c8458628b1..2d1ff3a24f787 100644 --- a/asv_bench/benchmarks/timedelta.py +++ b/asv_bench/benchmarks/timedelta.py @@ -31,14 +31,14 @@ def time_convert_ignore(self): to_timedelta(self.arr4, errors='ignore') -class Ops(object): +class TimedeltaOps(object): goal_time = 0.2 def setup(self): self.td = to_timedelta(np.arange(1000000)) self.ts = Timestamp('2000') - def test_add_td_ts(self): + def time_add_td_ts(self): self.td + self.ts From 41ec6e05ebb55e44dd1e52079cec862084a65167 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 14 Nov 2017 08:05:13 -0800 Subject: [PATCH 3/5] reviewer comments, separate UnaryMethods, flake8 fixup --- asv_bench/benchmarks/period.py | 46 +++++++++++++++++----------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py index c089c1a625677..962038b22bd1d 100644 --- a/asv_bench/benchmarks/period.py +++ b/asv_bench/benchmarks/period.py @@ -28,7 +28,7 @@ def time_second(self): self.per.second def time_is_leap_year(self): - self.per.is_leapyear + self.per.is_leap_year def time_quarter(self): self.per.quarter @@ -54,13 +54,21 @@ def time_start_time(self): def time_end_time(self): self.per.end_time - def time_to_timestamp(): + +class PeriodUnaryMethods(object): + params = ['M', 'min'] + param_names = ['freq'] + + def setup(self, freq): + self.per = Period('2012-06-01', freq=freq) + + def time_to_timestamp(self): self.per.to_timestamp() - def time_now(): + def time_now(self): self.per.now() - def time_asfreq(): + def time_asfreq(self): self.per.asfreq('A') @@ -95,34 +103,26 @@ def time_setitem_period_column(self, tup): df['col'] = rng -class PeriodIndexAlgorithms(object): +class Algorithms(object): goal_time = 0.2 - def setup(self): - data = [Period('2011-01', freq='M'), Period('2011-02', freq='M'), - Period('2011-03', freq='M'), Period('2011-04', freq='M')] - self.index = PeriodIndex(data, freq='M') + params = [PeriodIndex, Series] + param_names = ['box_cls'] - def time_drop_duplicates(self): - self.index.drop_duplicates() - - def time_value_counts(self): - self.index.value_counts() - - -class PeriodSeriesAlgorithms(object): - goal_time = 0.2 - - def setup(self): + def setup(self, box_cls): data = [Period('2011-01', freq='M'), Period('2011-02', freq='M'), Period('2011-03', freq='M'), Period('2011-04', freq='M')] - self.series = Series(data * 1000) + + if box_cls is PeriodIndex: + self.vector = PeriodIndex(data, freq='M') + else: + self.vector = Series(data * 1000) def time_drop_duplicates(self): - self.series.drop_duplicates() + self.vector.drop_duplicates() def time_value_counts(self): - self.series.value_counts() + self.vector.value_counts() class PeriodStandardIndexing(object): From e19d39771003574048f229b94076ead386c8ea09 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 22 Nov 2017 07:26:06 -0800 Subject: [PATCH 4/5] pass params to all methods --- asv_bench/benchmarks/period.py | 61 +++++++++++++++++----------------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py index 962038b22bd1d..5237380d8f33b 100644 --- a/asv_bench/benchmarks/period.py +++ b/asv_bench/benchmarks/period.py @@ -9,49 +9,49 @@ class PeriodProperties(object): def setup(self, freq): self.per = Period('2012-06-01', freq=freq) - def time_year(self): + def time_year(self, freq): self.per.year - def time_month(self): + def time_month(self, freq): self.per.month - def time_day(self): + def time_day(self, freq): self.per.day - def time_hour(self): + def time_hour(self, freq): self.per.hour - def time_minute(self): + def time_minute(self, freq): self.per.minute - def time_second(self): + def time_second(self, freq): self.per.second - def time_is_leap_year(self): + def time_is_leap_year(self, freq): self.per.is_leap_year - def time_quarter(self): + def time_quarter(self, freq): self.per.quarter - def time_qyear(self): + def time_qyear(self, freq): self.per.qyear - def time_week(self): + def time_week(self, freq): self.per.week - def time_daysinmonth(self): + def time_daysinmonth(self, freq): self.per.daysinmonth - def time_dayofweek(self): + def time_dayofweek(self, freq): self.per.dayofweek - def time_dayofyear(self): + def time_dayofyear(self, freq): self.per.dayofyear - def time_start_time(self): + def time_start_time(self, freq): self.per.start_time - def time_end_time(self): + def time_end_time(self, freq): self.per.end_time @@ -62,13 +62,13 @@ class PeriodUnaryMethods(object): def setup(self, freq): self.per = Period('2012-06-01', freq=freq) - def time_to_timestamp(self): + def time_to_timestamp(self, freq): self.per.to_timestamp() - def time_now(self): + def time_now(self, freq): self.per.now() - def time_asfreq(self): + def time_asfreq(self, freq): self.per.asfreq('A') @@ -79,15 +79,14 @@ class PeriodIndexConstructor(object): param_names = ['freq'] def setup(self, freq): - self.freq = freq self.rng = date_range('1985', periods=1000) self.rng2 = date_range('1985', periods=1000).to_pydatetime() - def time_from_date_range(self): - PeriodIndex(self.rng, freq=self.freq) + def time_from_date_range(self, freq): + PeriodIndex(self.rng, freq=freq) - def time_from_pydatetime(self): - PeriodIndex(self.rng2, freq=self.freq) + def time_from_pydatetime(self, freq): + PeriodIndex(self.rng2, freq=freq) class DataFramePeriodColumn(object): @@ -106,26 +105,26 @@ def time_setitem_period_column(self, tup): class Algorithms(object): goal_time = 0.2 - params = [PeriodIndex, Series] - param_names = ['box_cls'] + params = ['index', 'series'] + param_names = ['typ'] - def setup(self, box_cls): + def setup(self, typ): data = [Period('2011-01', freq='M'), Period('2011-02', freq='M'), Period('2011-03', freq='M'), Period('2011-04', freq='M')] - if box_cls is PeriodIndex: + if typ == 'index': self.vector = PeriodIndex(data, freq='M') - else: + elif typ == 'series': self.vector = Series(data * 1000) - def time_drop_duplicates(self): + def time_drop_duplicates(self, typ): self.vector.drop_duplicates() - def time_value_counts(self): + def time_value_counts(self, typ): self.vector.value_counts() -class PeriodStandardIndexing(object): +class Indexing(object): goal_time = 0.2 def setup(self): From 27d75c3b90fad51f3f6388e01c9629a56628073a Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 22 Nov 2017 17:02:55 -0800 Subject: [PATCH 5/5] fix missing freq arg --- asv_bench/benchmarks/period.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py index 5237380d8f33b..15d7655293ea3 100644 --- a/asv_bench/benchmarks/period.py +++ b/asv_bench/benchmarks/period.py @@ -66,7 +66,7 @@ def time_to_timestamp(self, freq): self.per.to_timestamp() def time_now(self, freq): - self.per.now() + self.per.now(freq) def time_asfreq(self, freq): self.per.asfreq('A') @@ -113,7 +113,7 @@ def setup(self, typ): Period('2011-03', freq='M'), Period('2011-04', freq='M')] if typ == 'index': - self.vector = PeriodIndex(data, freq='M') + self.vector = PeriodIndex(data * 1000, freq='M') elif typ == 'series': self.vector = Series(data * 1000)