Skip to content

ASV/CLN: cleanup period benchmarks #18275

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Nov 23, 2017
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 69 additions & 81 deletions asv_bench/benchmarks/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,84 +3,11 @@


class PeriodProperties(object):
def setup(self):
self.per = Period('2012-06-01', freq='M')

def time_year(self):
self.per.year

def time_month(self):
self.per.month

def time_quarter(self):
self.per.quarter

def time_day(self):
self.per.day

def time_hour(self):
self.per.hour

def time_minute(self):
self.per.second

def time_second(self):
self.per.second

def time_leap_year(self):
self.per.is_leapyear


class Constructor(object):
goal_time = 0.2

def setup(self):
self.rng = date_range('1985', periods=1000)
self.rng2 = date_range('1985', periods=1000).to_pydatetime()

def time_from_date_range(self):
PeriodIndex(self.rng, freq='D')

def time_from_pydatetime(self):
PeriodIndex(self.rng2, freq='D')


class DataFrame(object):
goal_time = 0.2

def setup(self):
self.rng = pd.period_range(start='1/1/1990', freq='S', periods=20000)
self.df = pd.DataFrame(index=range(len(self.rng)))

def time_setitem_period_column(self):
self.df['col'] = self.rng


class Algorithms(object):
goal_time = 0.2

def setup(self):
data = [Period('2011-01', freq='M'), Period('2011-02', freq='M'),
Period('2011-03', freq='M'), Period('2011-04', freq='M')]
self.s = Series(data * 1000)
self.i = PeriodIndex(data, freq='M')
params = ['M', 'min']
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this have to be double brackets ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not in my admittedly-limited experience. Though the only examples that use both params and param_names do have 2+ param_names, so do have double double-brackets for params.

param_names = ['freq']

def time_drop_duplicates_pseries(self):
self.s.drop_duplicates()

def time_drop_duplicates_pindex(self):
self.i.drop_duplicates()

def time_value_counts_pseries(self):
self.s.value_counts()

def time_value_counts_pindex(self):
self.i.value_counts()


class Properties(object):
def setup(self):
self.per = Period('2017-09-06 08:28', freq='min')
def setup(self, freq):
self.per = Period('2012-06-01', freq=freq)

def time_year(self):
self.per.year
Expand Down Expand Up @@ -127,17 +54,78 @@ def time_start_time(self):
def time_end_time(self):
self.per.end_time

def time_to_timestamp():

class PeriodUnaryMethods(object):
params = ['M', 'min']
param_names = ['freq']

def setup(self, freq):
self.per = Period('2012-06-01', freq=freq)

def time_to_timestamp(self):
self.per.to_timestamp()

def time_now():
def time_now(self):
self.per.now()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one is failing (needs a freq)


def time_asfreq():
def time_asfreq(self):
self.per.asfreq('A')


class period_standard_indexing(object):
class PeriodIndexConstructor(object):
goal_time = 0.2

params = ['D']
param_names = ['freq']

def setup(self, freq):
self.freq = freq
self.rng = date_range('1985', periods=1000)
self.rng2 = date_range('1985', periods=1000).to_pydatetime()

def time_from_date_range(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you also need to pass the param in the actual time_ functions as well. So time_from_date_range(self, freq) in this case (and also for all other benchmarks)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this in addition to or instead of setting self.freq=freq in setup?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that doesn't matter, it is just needed to let the code actually run.
But since you have to do that, the self.freq = freq is a bit superfluous as you can also do PeriodIndex(.. freq=freq). So to conclude: instead

PeriodIndex(self.rng, freq=self.freq)

def time_from_pydatetime(self):
PeriodIndex(self.rng2, freq=self.freq)


class DataFramePeriodColumn(object):
goal_time = 0.2

def setup_cache(self):
rng = pd.period_range(start='1/1/1990', freq='S', periods=20000)
df = pd.DataFrame(index=range(len(rng)))
return rng, df

def time_setitem_period_column(self, tup):
rng, df = tup
df['col'] = rng


class Algorithms(object):
goal_time = 0.2

params = [PeriodIndex, Series]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what does this give in the asv run output as name for the benchmark? (just 'PeriodIndex' or the full path? in the last case I would just do ['index', 'series'])

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what does this give in the asv run output as name for the benchmark?

I'm not sure. Unfortunately I'm not aware of a quick-feedback way of running these.

Going to put a pin in this PR for a bit, much rather focus on offsets where current PRs need to get resolved before I can do actual bugfixes.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Check 'asv dev', you can select only this file and specify a quick run

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Quickly checked this (with asv dev -b period and it used the full path, so please use ['index', 'series'] as the params

param_names = ['box_cls']

def setup(self, box_cls):
data = [Period('2011-01', freq='M'), Period('2011-02', freq='M'),
Period('2011-03', freq='M'), Period('2011-04', freq='M')]

if box_cls is PeriodIndex:
self.vector = PeriodIndex(data, freq='M')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would do here data * 1000 as well like you did for series (to have more data)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Happy to make that change, but the *1000 for series was there when I got here. Im implicitly assuming someone had a reason for doing it that way.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think there was a reason

else:
self.vector = Series(data * 1000)

def time_drop_duplicates(self):
self.vector.drop_duplicates()

def time_value_counts(self):
self.vector.value_counts()


class PeriodStandardIndexing(object):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would just do 'Indexing' ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Name used to be period_standard_indexing

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, and the one in your PR is certainly an improvement (since it follows PEP8), but I think the 'period' and 'standard' are just redundant (unless there is another class with non-standard indexing)

goal_time = 0.2

def setup(self):
Expand Down
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ def time_convert_ignore(self):
to_timedelta(self.arr4, errors='ignore')


class Ops(object):
class TimedeltaOps(object):
goal_time = 0.2

def setup(self):
self.td = to_timedelta(np.arange(1000000))
self.ts = Timestamp('2000')

def test_add_td_ts(self):
def time_add_td_ts(self):
self.td + self.ts


Expand Down
35 changes: 21 additions & 14 deletions asv_bench/benchmarks/timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@
class TimestampProperties(object):
goal_time = 0.2

def setup(self):
self.ts = Timestamp('2017-08-25 08:16:14')
params = [None, pytz.timezone('Europe/Amsterdam')]
param_names = ['tz']

def setup(self, tz):
self.ts = Timestamp('2017-08-25 08:16:14', tzinfo=tz)

def time_tz(self):
self.ts.tz
Expand Down Expand Up @@ -65,25 +68,29 @@ def time_microsecond(self):
class TimestampOps(object):
goal_time = 0.2

def setup(self):
self.ts = Timestamp('2017-08-25 08:16:14')
self.ts_tz = Timestamp('2017-08-25 08:16:14', tz='US/Eastern')
params = [None, 'US/Eastern']
param_names = ['tz']

dt = datetime.datetime(2016, 3, 27, 1)
self.tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo
self.ts2 = Timestamp(dt)
def setup(self, tz):
self.ts = Timestamp('2017-08-25 08:16:14', tz=tz)

def time_replace_tz(self):
self.ts.replace(tzinfo=pytz.timezone('US/Eastern'))

def time_replace_across_dst(self):
self.ts2.replace(tzinfo=self.tzinfo)

def time_replace_None(self):
self.ts_tz.replace(tzinfo=None)
self.ts.replace(tzinfo=None)

def time_to_pydatetime(self):
self.ts.to_pydatetime()

def time_to_pydatetime_tz(self):
self.ts_tz.to_pydatetime()

class TimestampAcrossDst(object):
goal_time = 0.2

def setup(self):
dt = datetime.datetime(2016, 3, 27, 1)
self.tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo
self.ts2 = Timestamp(dt)

def time_replace_across_dst(self):
self.ts2.replace(tzinfo=self.tzinfo)