Skip to content

ASV/CLN: cleanup period benchmarks #18275

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Nov 23, 2017
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 67 additions & 79 deletions asv_bench/benchmarks/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,84 +3,11 @@


class PeriodProperties(object):
def setup(self):
self.per = Period('2012-06-01', freq='M')

def time_year(self):
self.per.year

def time_month(self):
self.per.month

def time_quarter(self):
self.per.quarter
params = ['M', 'min']
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this have to be double brackets ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not in my admittedly-limited experience. Though the only examples that use both params and param_names do have 2+ param_names, so do have double double-brackets for params.

param_names = ['freq']

def time_day(self):
self.per.day

def time_hour(self):
self.per.hour

def time_minute(self):
self.per.second

def time_second(self):
self.per.second

def time_leap_year(self):
self.per.is_leapyear


class Constructor(object):
goal_time = 0.2

def setup(self):
self.rng = date_range('1985', periods=1000)
self.rng2 = date_range('1985', periods=1000).to_pydatetime()

def time_from_date_range(self):
PeriodIndex(self.rng, freq='D')

def time_from_pydatetime(self):
PeriodIndex(self.rng2, freq='D')


class DataFrame(object):
goal_time = 0.2

def setup(self):
self.rng = pd.period_range(start='1/1/1990', freq='S', periods=20000)
self.df = pd.DataFrame(index=range(len(self.rng)))

def time_setitem_period_column(self):
self.df['col'] = self.rng


class Algorithms(object):
goal_time = 0.2

def setup(self):
data = [Period('2011-01', freq='M'), Period('2011-02', freq='M'),
Period('2011-03', freq='M'), Period('2011-04', freq='M')]
self.s = Series(data * 1000)
self.i = PeriodIndex(data, freq='M')

def time_drop_duplicates_pseries(self):
self.s.drop_duplicates()

def time_drop_duplicates_pindex(self):
self.i.drop_duplicates()

def time_value_counts_pseries(self):
self.s.value_counts()

def time_value_counts_pindex(self):
self.i.value_counts()


class Properties(object):
def setup(self):
self.per = Period('2017-09-06 08:28', freq='min')
def setup(self, freq):
self.per = Period('2012-06-01', freq=freq)

def time_year(self):
self.per.year
Expand All @@ -101,7 +28,7 @@ def time_second(self):
self.per.second

def time_is_leap_year(self):
self.per.is_leap_year
self.per.is_leapyear
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think is_leap_year was actually correct ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, you're right. One of the two existing benchmarks used each and I guessed wrong.


def time_quarter(self):
self.per.quarter
Expand Down Expand Up @@ -137,7 +64,68 @@ def time_asfreq():
self.per.asfreq('A')


class period_standard_indexing(object):
class PeriodIndexConstructor(object):
goal_time = 0.2

params = ['D']
param_names = ['freq']

def setup(self, freq):
self.freq = freq
self.rng = date_range('1985', periods=1000)
self.rng2 = date_range('1985', periods=1000).to_pydatetime()

def time_from_date_range(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you also need to pass the param in the actual time_ functions as well. So time_from_date_range(self, freq) in this case (and also for all other benchmarks)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this in addition to or instead of setting self.freq=freq in setup?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that doesn't matter, it is just needed to let the code actually run.
But since you have to do that, the self.freq = freq is a bit superfluous as you can also do PeriodIndex(.. freq=freq). So to conclude: instead

PeriodIndex(self.rng, freq=self.freq)

def time_from_pydatetime(self):
PeriodIndex(self.rng2, freq=self.freq)


class DataFramePeriodColumn(object):
goal_time = 0.2

def setup_cache(self):
rng = pd.period_range(start='1/1/1990', freq='S', periods=20000)
df = pd.DataFrame(index=range(len(rng)))
return rng, df

def time_setitem_period_column(self, tup):
rng, df = tup
df['col'] = rng


class PeriodIndexAlgorithms(object):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you could also param this one (I would just call it Algorithms, the 'period' is already in the file name) for ['series', 'index']

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought about that but decided against it because the two cases treat the data differently, would need a if cls is Series: ... in the setup method. But heck, let's go for it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would rather do a data = ...; if typ == 'series': data = pd.Series(data)

goal_time = 0.2

def setup(self):
data = [Period('2011-01', freq='M'), Period('2011-02', freq='M'),
Period('2011-03', freq='M'), Period('2011-04', freq='M')]
self.index = PeriodIndex(data, freq='M')

def time_drop_duplicates(self):
self.index.drop_duplicates()

def time_value_counts(self):
self.index.value_counts()


class PeriodSeriesAlgorithms(object):
goal_time = 0.2

def setup(self):
data = [Period('2011-01', freq='M'), Period('2011-02', freq='M'),
Period('2011-03', freq='M'), Period('2011-04', freq='M')]
self.series = Series(data * 1000)

def time_drop_duplicates(self):
self.series.drop_duplicates()

def time_value_counts(self):
self.series.value_counts()


class PeriodStandardIndexing(object):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would just do 'Indexing' ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Name used to be period_standard_indexing

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, and the one in your PR is certainly an improvement (since it follows PEP8), but I think the 'period' and 'standard' are just redundant (unless there is another class with non-standard indexing)

goal_time = 0.2

def setup(self):
Expand Down
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ def time_convert_ignore(self):
to_timedelta(self.arr4, errors='ignore')


class Ops(object):
class TimedeltaOps(object):
goal_time = 0.2

def setup(self):
self.td = to_timedelta(np.arange(1000000))
self.ts = Timestamp('2000')

def test_add_td_ts(self):
def time_add_td_ts(self):
self.td + self.ts


Expand Down
35 changes: 21 additions & 14 deletions asv_bench/benchmarks/timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@
class TimestampProperties(object):
goal_time = 0.2

def setup(self):
self.ts = Timestamp('2017-08-25 08:16:14')
params = [None, pytz.timezone('Europe/Amsterdam')]
param_names = ['tz']

def setup(self, tz):
self.ts = Timestamp('2017-08-25 08:16:14', tzinfo=tz)

def time_tz(self):
self.ts.tz
Expand Down Expand Up @@ -65,25 +68,29 @@ def time_microsecond(self):
class TimestampOps(object):
goal_time = 0.2

def setup(self):
self.ts = Timestamp('2017-08-25 08:16:14')
self.ts_tz = Timestamp('2017-08-25 08:16:14', tz='US/Eastern')
params = [None, 'US/Eastern']
param_names = ['tz']

dt = datetime.datetime(2016, 3, 27, 1)
self.tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo
self.ts2 = Timestamp(dt)
def setup(self, tz):
self.ts = Timestamp('2017-08-25 08:16:14', tz=tz)

def time_replace_tz(self):
self.ts.replace(tzinfo=pytz.timezone('US/Eastern'))

def time_replace_across_dst(self):
self.ts2.replace(tzinfo=self.tzinfo)

def time_replace_None(self):
self.ts_tz.replace(tzinfo=None)
self.ts.replace(tzinfo=None)

def time_to_pydatetime(self):
self.ts.to_pydatetime()

def time_to_pydatetime_tz(self):
self.ts_tz.to_pydatetime()

class TimestampAcrossDst(object):
goal_time = 0.2

def setup(self):
dt = datetime.datetime(2016, 3, 27, 1)
self.tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo
self.ts2 = Timestamp(dt)

def time_replace_across_dst(self):
self.ts2.replace(tzinfo=self.tzinfo)