Skip to content

PERF: add initial asv config and vbench->asv conversion script #9715

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 19, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions asv_bench/asv.conf.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
{
// The version of the config file format. Do not change, unless
// you know what you are doing.
"version": 1,

// The name of the project being benchmarked
"project": "pandas",

// The project's homepage
"project_url": "http://pandas.pydata.org/",

// The URL of the source code repository for the project being
// benchmarked
"repo": "..",

// The tool to use to create environments. May be "conda",
// "virtualenv" or other value depending on the plugins in use.
// If missing or the empty string, the tool will be automatically
// determined by looking for tools on the PATH environment
// variable.
"environment_type": "conda",

// the base URL to show a commit for the project.
"show_commit_url": "https://github.com/pydata/pandas/commit/",

// The Pythons you'd like to test against. If not provided, defaults
// to the current version of Python used to run `asv`.
"pythons": ["2.7", "3.4"],

// The matrix of dependencies to test. Each key is the name of a
// package (in PyPI) and the values are version numbers. An empty
// list indicates to just test against the default (latest)
// version.
"matrix": {
// To run against multiple versions, replace with
// "numpy": ["1.7", "1.9"],
"numpy": [],
"Cython": [],
"matplotlib": [],
"sqlalchemy": [],
"scipy": [],
"pytables": [],
},

// The directory (relative to the current directory) that benchmarks are
// stored in. If not provided, defaults to "benchmarks"
// "benchmark_dir": "benchmarks",

// The directory (relative to the current directory) to cache the Python
// environments in. If not provided, defaults to "env"
// "env_dir": "env",


// The directory (relative to the current directory) that raw benchmark
// results are stored in. If not provided, defaults to "results".
// "results_dir": "results",

// The directory (relative to the current directory) that the html tree
// should be written to. If not provided, defaults to "html".
// "html_dir": "html",

// The number of characters to retain in the commit hashes.
// "hash_length": 8
}
Empty file.
23 changes: 23 additions & 0 deletions asv_bench/benchmarks/attrs_caching.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from pandas_vb_common import *


class getattr_dataframe_index(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.randn(10, 6))
self.cur_index = self.df.index

def time_getattr_dataframe_index(self):
self.foo = self.df.index


class setattr_dataframe_index(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.randn(10, 6))
self.cur_index = self.df.index

def time_setattr_dataframe_index(self):
self.df.index = self.cur_index
236 changes: 236 additions & 0 deletions asv_bench/benchmarks/binary_ops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
from pandas_vb_common import *
import pandas.computation.expressions as expr


class frame_add(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.randn(20000, 100))
self.df2 = DataFrame(np.random.randn(20000, 100))

def time_frame_add(self):
(self.df + self.df2)


class frame_add_no_ne(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.randn(20000, 100))
self.df2 = DataFrame(np.random.randn(20000, 100))
expr.set_use_numexpr(False)

def time_frame_add_no_ne(self):
(self.df + self.df2)

def teardown(self):
expr.set_use_numexpr(True)


class frame_add_st(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.randn(20000, 100))
self.df2 = DataFrame(np.random.randn(20000, 100))
expr.set_numexpr_threads(1)

def time_frame_add_st(self):
(self.df + self.df2)

def teardown(self):
expr.set_numexpr_threads()


class frame_float_div(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.randn(1000, 1000))
self.df2 = DataFrame(np.random.randn(1000, 1000))

def time_frame_float_div(self):
(self.df // self.df2)


class frame_float_div_by_zero(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.randn(1000, 1000))

def time_frame_float_div_by_zero(self):
(self.df / 0)


class frame_float_floor_by_zero(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.randn(1000, 1000))

def time_frame_float_floor_by_zero(self):
(self.df // 0)


class frame_float_mod(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.randn(1000, 1000))
self.df2 = DataFrame(np.random.randn(1000, 1000))

def time_frame_float_mod(self):
(self.df / self.df2)


class frame_int_div_by_zero(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.random_integers(np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(1000, 1000)))

def time_frame_int_div_by_zero(self):
(self.df / 0)


class frame_int_mod(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.random_integers(np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(1000, 1000)))
self.df2 = DataFrame(np.random.random_integers(np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(1000, 1000)))

def time_frame_int_mod(self):
(self.df / self.df2)


class frame_mult(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.randn(20000, 100))
self.df2 = DataFrame(np.random.randn(20000, 100))

def time_frame_mult(self):
(self.df * self.df2)


class frame_mult_no_ne(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.randn(20000, 100))
self.df2 = DataFrame(np.random.randn(20000, 100))
expr.set_use_numexpr(False)

def time_frame_mult_no_ne(self):
(self.df * self.df2)

def teardown(self):
expr.set_use_numexpr(True)


class frame_mult_st(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.randn(20000, 100))
self.df2 = DataFrame(np.random.randn(20000, 100))
expr.set_numexpr_threads(1)

def time_frame_mult_st(self):
(self.df * self.df2)

def teardown(self):
expr.set_numexpr_threads()


class frame_multi_and(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.randn(20000, 100))
self.df2 = DataFrame(np.random.randn(20000, 100))

def time_frame_multi_and(self):
self.df[((self.df > 0) & (self.df2 > 0))]


class frame_multi_and_no_ne(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.randn(20000, 100))
self.df2 = DataFrame(np.random.randn(20000, 100))
expr.set_use_numexpr(False)

def time_frame_multi_and_no_ne(self):
self.df[((self.df > 0) & (self.df2 > 0))]

def teardown(self):
expr.set_use_numexpr(True)


class frame_multi_and_st(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.randn(20000, 100))
self.df2 = DataFrame(np.random.randn(20000, 100))
expr.set_numexpr_threads(1)

def time_frame_multi_and_st(self):
self.df[((self.df > 0) & (self.df2 > 0))]

def teardown(self):
expr.set_numexpr_threads()


class series_timestamp_compare(object):
goal_time = 0.2

def setup(self):
self.N = 1000000
self.halfway = ((self.N // 2) - 1)
self.s = Series(date_range('20010101', periods=self.N, freq='T'))
self.ts = self.s[self.halfway]

def time_series_timestamp_compare(self):
(self.s <= self.ts)


class timestamp_ops_diff1(object):
goal_time = 0.2

def setup(self):
self.N = 1000000
self.s = Series(date_range('20010101', periods=self.N, freq='s'))

def time_timestamp_ops_diff1(self):
self.s.diff()


class timestamp_ops_diff2(object):
goal_time = 0.2

def setup(self):
self.N = 1000000
self.s = Series(date_range('20010101', periods=self.N, freq='s'))

def time_timestamp_ops_diff2(self):
(self.s - self.s.shift())


class timestamp_series_compare(object):
goal_time = 0.2

def setup(self):
self.N = 1000000
self.halfway = ((self.N // 2) - 1)
self.s = Series(date_range('20010101', periods=self.N, freq='T'))
self.ts = self.s[self.halfway]

def time_timestamp_series_compare(self):
(self.ts >= self.s)
11 changes: 11 additions & 0 deletions asv_bench/benchmarks/categoricals.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from pandas_vb_common import *


class concat_categorical(object):
goal_time = 0.2

def setup(self):
self.s = pd.Series((list('aabbcd') * 1000000)).astype('category')

def time_concat_categorical(self):
concat([self.s, self.s])
52 changes: 52 additions & 0 deletions asv_bench/benchmarks/ctors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from pandas_vb_common import *


class frame_constructor_ndarray(object):
goal_time = 0.2

def setup(self):
self.arr = np.random.randn(100, 100)

def time_frame_constructor_ndarray(self):
DataFrame(self.arr)


class ctor_index_array_string(object):
goal_time = 0.2

def setup(self):
self.data = np.array(['foo', 'bar', 'baz'], dtype=object)

def time_ctor_index_array_string(self):
Index(self.data)


class series_constructor_ndarray(object):
goal_time = 0.2

def setup(self):
self.data = np.random.randn(100)
self.index = Index(np.arange(100))

def time_series_constructor_ndarray(self):
Series(self.data, index=self.index)


class dtindex_from_series_ctor(object):
goal_time = 0.2

def setup(self):
self.s = Series(([Timestamp('20110101'), Timestamp('20120101'), Timestamp('20130101')] * 1000))

def time_dtindex_from_series_ctor(self):
DatetimeIndex(self.s)


class index_from_series_ctor(object):
goal_time = 0.2

def setup(self):
self.s = Series(([Timestamp('20110101'), Timestamp('20120101'), Timestamp('20130101')] * 1000))

def time_index_from_series_ctor(self):
Index(self.s)
Loading