Skip to content

CLN/PERF: clean-up of the benchmarks #14099

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Dec 10, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 14 additions & 7 deletions asv_bench/benchmarks/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pandas.util import testing as tm


class algorithm(object):
class Algorithms(object):
goal_time = 0.2

def setup(self):
Expand All @@ -24,21 +24,28 @@ def setup(self):
self.arrneg = np.arange(-1000000, 0)
self.arrmixed = np.array([1, -1]).repeat(500000)

def time_int_factorize(self):
# match
self.uniques = tm.makeStringIndex(1000).values
self.all = self.uniques.repeat(10)

def time_factorize_int(self):
self.int.factorize()

def time_float_factorize(self):
def time_factorize_float(self):
self.int.factorize()

def time_int_unique_duplicated(self):
def time_duplicated_int_unique(self):
self.int_unique.duplicated()

def time_int_duplicated(self):
def time_duplicated_int(self):
self.int.duplicated()

def time_float_duplicated(self):
def time_duplicated_float(self):
self.float.duplicated()

def time_match_strings(self):
pd.match(self.all, self.uniques)

def time_add_overflow_pos_scalar(self):
self.checked_add(self.arr, 1)

Expand All @@ -58,7 +65,7 @@ def time_add_overflow_mixed_arr(self):
self.checked_add(self.arr, self.arrmixed)


class hashing(object):
class Hashing(object):
goal_time = 0.2

def setup(self):
Expand Down
23 changes: 16 additions & 7 deletions asv_bench/benchmarks/attrs_caching.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,32 @@
from .pandas_vb_common import *
from pandas.util.decorators import cache_readonly


class getattr_dataframe_index(object):
class DataFrameAttributes(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.randn(10, 6))
self.cur_index = self.df.index

def time_getattr_dataframe_index(self):
def time_get_index(self):
self.foo = self.df.index

def time_set_index(self):
self.df.index = self.cur_index


class setattr_dataframe_index(object):
class CacheReadonly(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.randn(10, 6))
self.cur_index = self.df.index

def time_setattr_dataframe_index(self):
self.df.index = self.cur_index
class Foo:

@cache_readonly
def prop(self):
return 5
self.obj = Foo()

def time_cache_readonly(self):
self.obj.prop
239 changes: 44 additions & 195 deletions asv_bench/benchmarks/binary_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,193 +2,79 @@
import pandas.computation.expressions as expr


class frame_add(object):
class Ops(object):
goal_time = 0.2

def setup(self):
params = [[True, False], ['default', 1]]
param_names = ['use_numexpr', 'threads']

def setup(self, use_numexpr, threads):
self.df = DataFrame(np.random.randn(20000, 100))
self.df2 = DataFrame(np.random.randn(20000, 100))

def time_frame_add(self):
(self.df + self.df2)
if threads != 'default':
expr.set_numexpr_threads(threads)
if not use_numexpr:
expr.set_use_numexpr(False)


class frame_add_no_ne(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.randn(20000, 100))
self.df2 = DataFrame(np.random.randn(20000, 100))
expr.set_use_numexpr(False)

def time_frame_add_no_ne(self):
def time_frame_add(self, use_numexpr, threads):
(self.df + self.df2)

def teardown(self):
expr.set_use_numexpr(True)
def time_frame_mult(self, use_numexpr, threads):
(self.df * self.df2)

def time_frame_multi_and(self, use_numexpr, threads):
self.df[((self.df > 0) & (self.df2 > 0))]

class frame_add_st(object):
goal_time = 0.2
def time_frame_comparison(self, use_numexpr, threads):
(self.df > self.df2)

def setup(self):
self.df = DataFrame(np.random.randn(20000, 100))
self.df2 = DataFrame(np.random.randn(20000, 100))
expr.set_numexpr_threads(1)

def time_frame_add_st(self):
(self.df + self.df2)

def teardown(self):
def teardown(self, use_numexpr, threads):
expr.set_use_numexpr(True)
expr.set_numexpr_threads()


class frame_float_div(object):
class Ops2(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.randn(1000, 1000))
self.df2 = DataFrame(np.random.randn(1000, 1000))

def time_frame_float_div(self):
(self.df // self.df2)
self.df_int = DataFrame(
np.random.random_integers(np.iinfo(np.int16).min,
np.iinfo(np.int16).max,
size=(1000, 1000)))
self.df2_int = DataFrame(
np.random.random_integers(np.iinfo(np.int16).min,
np.iinfo(np.int16).max,
size=(1000, 1000)))

## Division

class frame_float_div_by_zero(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.randn(1000, 1000))
def time_frame_float_div(self):
(self.df // self.df2)

def time_frame_float_div_by_zero(self):
(self.df / 0)


class frame_float_floor_by_zero(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.randn(1000, 1000))

def time_frame_float_floor_by_zero(self):
(self.df // 0)


class frame_float_mod(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.randn(1000, 1000))
self.df2 = DataFrame(np.random.randn(1000, 1000))

def time_frame_float_mod(self):
(self.df / self.df2)


class frame_int_div_by_zero(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.random_integers(np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(1000, 1000)))

def time_frame_int_div_by_zero(self):
(self.df / 0)

(self.df_int / 0)

class frame_int_mod(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.random_integers(np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(1000, 1000)))
self.df2 = DataFrame(np.random.random_integers(np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(1000, 1000)))
## Modulo

def time_frame_int_mod(self):
(self.df / self.df2)


class frame_mult(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.randn(20000, 100))
self.df2 = DataFrame(np.random.randn(20000, 100))

def time_frame_mult(self):
(self.df * self.df2)


class frame_mult_no_ne(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.randn(20000, 100))
self.df2 = DataFrame(np.random.randn(20000, 100))
expr.set_use_numexpr(False)

def time_frame_mult_no_ne(self):
(self.df * self.df2)

def teardown(self):
expr.set_use_numexpr(True)


class frame_mult_st(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.randn(20000, 100))
self.df2 = DataFrame(np.random.randn(20000, 100))
expr.set_numexpr_threads(1)

def time_frame_mult_st(self):
(self.df * self.df2)

def teardown(self):
expr.set_numexpr_threads()


class frame_multi_and(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.randn(20000, 100))
self.df2 = DataFrame(np.random.randn(20000, 100))

def time_frame_multi_and(self):
self.df[((self.df > 0) & (self.df2 > 0))]


class frame_multi_and_no_ne(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.randn(20000, 100))
self.df2 = DataFrame(np.random.randn(20000, 100))
expr.set_use_numexpr(False)

def time_frame_multi_and_no_ne(self):
self.df[((self.df > 0) & (self.df2 > 0))]

def teardown(self):
expr.set_use_numexpr(True)


class frame_multi_and_st(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.randn(20000, 100))
self.df2 = DataFrame(np.random.randn(20000, 100))
expr.set_numexpr_threads(1)

def time_frame_multi_and_st(self):
self.df[((self.df > 0) & (self.df2 > 0))]

def teardown(self):
expr.set_numexpr_threads()
def time_frame_float_mod(self):
(self.df / self.df2)


class series_timestamp_compare(object):
class Timeseries(object):
goal_time = 0.2

def setup(self):
Expand All @@ -197,65 +83,28 @@ def setup(self):
self.s = Series(date_range('20010101', periods=self.N, freq='T'))
self.ts = self.s[self.halfway]

self.s2 = Series(date_range('20010101', periods=self.N, freq='s'))

def time_series_timestamp_compare(self):
(self.s <= self.ts)


class timestamp_ops_diff1(object):
goal_time = 0.2
N = 1000000

def setup(self):
self.s = self.create()

def create(self):
return Series(date_range('20010101', periods=self.N, freq='s'))
def time_timestamp_series_compare(self):
(self.ts >= self.s)

def time_timestamp_ops_diff1(self):
self.s.diff()

class timestamp_tz_ops_diff1(timestamp_ops_diff1):
N = 10000

def create(self):
return Series(date_range('20010101', periods=self.N, freq='s', tz='US/Eastern'))

class timestamp_ops_diff2(object):
goal_time = 0.2
N = 1000000

def setup(self):
self.s = self.create()

def create(self):
return Series(date_range('20010101', periods=self.N, freq='s'))
self.s2.diff()

def time_timestamp_ops_diff2(self):
(self.s - self.s.shift())

class timestamp_tz_ops_diff2(timestamp_ops_diff2):
N = 10000

def create(self):
return Series(date_range('20010101', periods=self.N, freq='s', tz='US/Eastern'))

class timestamp_series_compare(object):
goal_time = 0.2
N = 1000000
class TimeseriesTZ(Timeseries):

def setup(self):
self.N = 1000000
self.halfway = ((self.N // 2) - 1)
self.s = self.create()
self.s = Series(date_range('20010101', periods=self.N, freq='T', tz='US/Eastern'))
self.ts = self.s[self.halfway]

def create(self):
return Series(date_range('20010101', periods=self.N, freq='T'))

def time_timestamp_series_compare(self):
(self.ts >= self.s)

class timestamp_tz_series_compare(timestamp_series_compare):
N = 10000

def create(self):
return Series(date_range('20010101', periods=self.N, freq='T', tz='US/Eastern'))
self.s2 = Series(date_range('20010101', periods=self.N, freq='s', tz='US/Eastern'))
Loading