From d6b98f306292529f83e7842bb69785e7566e7bdc Mon Sep 17 00:00:00 2001 From: immerrr Date: Sun, 23 Feb 2014 14:40:29 +0400 Subject: [PATCH 1/5] BLD: add benchmarks for all loc/iloc indexers for Series, DataFrame & Panel --- vb_suite/indexing.py | 110 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) diff --git a/vb_suite/indexing.py b/vb_suite/indexing.py index 34cbadc2e042b..316a420303ad7 100644 --- a/vb_suite/indexing.py +++ b/vb_suite/indexing.py @@ -211,3 +211,113 @@ frame_iloc_big = Benchmark('df.iloc[:100,0]', setup, start_date=datetime(2013, 1, 1)) + + +#---------------------------------------------------------------------- +# Thorough checks of all containers and all indexing types + +import pandas.util.testing as tm +MAX_ENTRIES = 1000000 +_indices = {} + + +def get_index(idx_type, nentries=MAX_ENTRIES): + assert nentries <= MAX_ENTRIES + global _indices + if not idx_type in _indices: + _indices[idx_type] = tm.makeCustomIndex( + nentries=MAX_ENTRIES, nlevels=1, idx_type=idx_type) + return _indices[idx_type][:nentries] + +mask = tm.np.arange(MAX_ENTRIES) % 3 == 0 +series_mask = tm.Series(mask) + +setup_template = common_setup + """ +import sys + +data = sys.modules[%(module_name)r] + +obj = %(class_name)s(%(ctor_args)s) + +pos = -1 +axis = obj._get_axis(%(axis)r) +label = axis[pos] +arr_pos = np.arange(int(len(axis) / 2)) +arr_label = axis[arr_pos].values +mask = data.mask[:len(axis)] +series_mask = data.series_mask[:len(axis)] +""" + + +def generate_index_benchmarks(klass, idx_type, shape): + if not isinstance(shape, tuple): + shape = (shape,) + ndim = len(shape) + + if not isinstance(idx_type, tuple): + idx_types = tuple([idx_type] * ndim) + else: + assert len(idx_type) == ndim + idx_types = idx_type + + axes = klass._AXIS_ORDERS + ctor_args = ',\n '.join(['%s=data.get_index(%r, nentries=%s)' % v + for v in zip(axes, idx_types, shape)]) + + def get_benchmark_name(indexer, axis): + shape_type_str = 'x'.join([str(s) + str(t) + for s, t in zip(shape, idx_types)]) + + components = ['indexing_', klass.__name__.lower(), indexer, + shape_type_str] + if axis is not None: + components.append("ax%s" % axis) + + return '_'.join(components) + + def make_suffix(attrname, indexer_str, axis): + if axis is not None: + indexers = [':'] * ndim + indexers[axis] = indexer_str + indexer_str = ','.join(indexers) + return '%s[%s]' % (attrname, indexer_str) + + benchmarked_axes = set([None, 0, ndim - 1]) + + return { + b.name: b + for b in [ + Benchmark('obj%s' % params['suffix'], + setup_template % { + 'module_name': __name__, 'class_name': klass.__name__, + 'ctor_args': ctor_args, 'axis': axis or 0}, + name=get_benchmark_name(params['indexer'], axis)) + for axis in benchmarked_axes + for params in [ + {'indexer': 'basic_pos', + 'suffix': make_suffix('.iloc', 'pos', axis)}, + {'indexer': 'basic_label', + 'suffix': make_suffix('.loc', 'label', axis)}, + + {'indexer': 'slice_pos', + 'suffix': make_suffix('.iloc', ':pos', axis)}, + {'indexer': 'slice_label', + 'suffix': make_suffix('.loc', ':label', axis)}, + + {'indexer': 'arr_pos', + 'suffix': make_suffix('.iloc', 'arr_pos', axis)}, + {'indexer': 'arr_label', + 'suffix': make_suffix('.loc', 'arr_label', axis)}, + + {'indexer': 'iloc_mask', + 'suffix': make_suffix('.iloc', 'mask', axis)}, + {'indexer': 'loc_mask', + 'suffix': make_suffix('.loc', 'mask', axis)}, ] + ] + } + +globals().update(generate_index_benchmarks(tm.Series, 's', 100000)) +globals().update(generate_index_benchmarks(tm.DataFrame, 's', (10, 100000))) +globals().update(generate_index_benchmarks(tm.DataFrame, 's', (100000, 10))) +globals().update(generate_index_benchmarks(tm.Panel, 's', (100000, 10, 10))) +globals().update(generate_index_benchmarks(tm.Panel, 's', (10, 10, 100000))) From 8d17f508323635a1442c7d32bb8fe5c68b2d7297 Mon Sep 17 00:00:00 2001 From: immerrr Date: Thu, 27 Feb 2014 20:36:51 +0400 Subject: [PATCH 2/5] BLD: don't contract stats output if it's longer than 60 rows --- vb_suite/test_perf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vb_suite/test_perf.py b/vb_suite/test_perf.py index 66e50269f00c6..55f57621f90f1 100755 --- a/vb_suite/test_perf.py +++ b/vb_suite/test_perf.py @@ -442,6 +442,7 @@ def print_report(df,h_head=None,h_msg="",h_baseline=None,b_msg=""): if args.stats : try: pd.options.display.expand_frame_repr=False + pd.set_option('display.max_rows', None) except: pass stats_footer += str(df.T.describe().T) + "\n\n" From bb148fc78563e4cbeb2f0ea71a3c0fef547bdb4d Mon Sep 17 00:00:00 2001 From: immerrr Date: Fri, 28 Feb 2014 20:13:26 +0400 Subject: [PATCH 3/5] BLD: fix indexing__* benchmarks to work in comparative mode --- vb_suite/indexing.py | 103 ++++++++++++++++++++++--------------------- 1 file changed, 53 insertions(+), 50 deletions(-) diff --git a/vb_suite/indexing.py b/vb_suite/indexing.py index 316a420303ad7..00a0018c7ddb6 100644 --- a/vb_suite/indexing.py +++ b/vb_suite/indexing.py @@ -217,25 +217,28 @@ # Thorough checks of all containers and all indexing types import pandas.util.testing as tm -MAX_ENTRIES = 1000000 -_indices = {} +setup_template = common_setup + """ +import sys -def get_index(idx_type, nentries=MAX_ENTRIES): - assert nentries <= MAX_ENTRIES - global _indices - if not idx_type in _indices: - _indices[idx_type] = tm.makeCustomIndex( - nentries=MAX_ENTRIES, nlevels=1, idx_type=idx_type) - return _indices[idx_type][:nentries] +try: + make_index = tm.makeCustomIndexWithCache +except AttributeError: + MAX_ENTRIES = 1000000 + _indices = {} -mask = tm.np.arange(MAX_ENTRIES) % 3 == 0 -series_mask = tm.Series(mask) + def makeCustomIndexWithCache(nentries, **kwargs): + assert nentries < MAX_ENTRIES -setup_template = common_setup + """ -import sys + key = tuple(kwargs.items()) + try: + full_idx = _indices[key] + except KeyError: + full_idx = _indices[key] = tm.makeCustomIndex(nentries=MAX_ENTRIES, + **kwargs) + return full_idx[:nentries] -data = sys.modules[%(module_name)r] + make_index = tm.makeCustomIndexWithCache = makeCustomIndexWithCache obj = %(class_name)s(%(ctor_args)s) @@ -244,11 +247,10 @@ def get_index(idx_type, nentries=MAX_ENTRIES): label = axis[pos] arr_pos = np.arange(int(len(axis) / 2)) arr_label = axis[arr_pos].values -mask = data.mask[:len(axis)] -series_mask = data.series_mask[:len(axis)] +mask = tm.np.arange(len(axis)) %% 3 == 0 +series_mask = Series(mask) """ - def generate_index_benchmarks(klass, idx_type, shape): if not isinstance(shape, tuple): shape = (shape,) @@ -261,8 +263,9 @@ def generate_index_benchmarks(klass, idx_type, shape): idx_types = idx_type axes = klass._AXIS_ORDERS - ctor_args = ',\n '.join(['%s=data.get_index(%r, nentries=%s)' % v - for v in zip(axes, idx_types, shape)]) + ctor_args = ',\n '.join([ + '%s=make_index(idx_type=%r, nentries=%s, nlevels=1)' % v + for v in zip(axes, idx_types, shape)]) def get_benchmark_name(indexer, axis): shape_type_str = 'x'.join([str(s) + str(t) @@ -277,44 +280,44 @@ def get_benchmark_name(indexer, axis): def make_suffix(attrname, indexer_str, axis): if axis is not None: - indexers = [':'] * ndim - indexers[axis] = indexer_str - indexer_str = ','.join(indexers) + indexers = [':,'] * ndim + indexers[axis] = indexer_str + ',' + indexer_str = ''.join(indexers) return '%s[%s]' % (attrname, indexer_str) benchmarked_axes = set([None, 0, ndim - 1]) - return { - b.name: b - for b in [ - Benchmark('obj%s' % params['suffix'], + result = {} + for axis in benchmarked_axes: + for params in [ + {'indexer': 'basic_pos', + 'suffix': make_suffix('.iloc', 'pos', axis)}, + {'indexer': 'basic_label', + 'suffix': make_suffix('.loc', 'label', axis)}, + + {'indexer': 'slice_pos', + 'suffix': make_suffix('.iloc', ':pos', axis)}, + {'indexer': 'slice_label', + 'suffix': make_suffix('.loc', ':label', axis)}, + + {'indexer': 'arr_pos', + 'suffix': make_suffix('.iloc', 'arr_pos', axis)}, + {'indexer': 'arr_label', + 'suffix': make_suffix('.loc', 'arr_label', axis)}, + + {'indexer': 'iloc_mask', + 'suffix': make_suffix('.iloc', 'mask', axis)}, + {'indexer': 'loc_mask', + 'suffix': make_suffix('.loc', 'mask', axis)}, ]: + + b = Benchmark('obj%s' % params['suffix'], setup_template % { - 'module_name': __name__, 'class_name': klass.__name__, + 'class_name': klass.__name__, 'ctor_args': ctor_args, 'axis': axis or 0}, name=get_benchmark_name(params['indexer'], axis)) - for axis in benchmarked_axes - for params in [ - {'indexer': 'basic_pos', - 'suffix': make_suffix('.iloc', 'pos', axis)}, - {'indexer': 'basic_label', - 'suffix': make_suffix('.loc', 'label', axis)}, - - {'indexer': 'slice_pos', - 'suffix': make_suffix('.iloc', ':pos', axis)}, - {'indexer': 'slice_label', - 'suffix': make_suffix('.loc', ':label', axis)}, - - {'indexer': 'arr_pos', - 'suffix': make_suffix('.iloc', 'arr_pos', axis)}, - {'indexer': 'arr_label', - 'suffix': make_suffix('.loc', 'arr_label', axis)}, - - {'indexer': 'iloc_mask', - 'suffix': make_suffix('.iloc', 'mask', axis)}, - {'indexer': 'loc_mask', - 'suffix': make_suffix('.loc', 'mask', axis)}, ] - ] - } + result[b.name] = b + + return result globals().update(generate_index_benchmarks(tm.Series, 's', 100000)) globals().update(generate_index_benchmarks(tm.DataFrame, 's', (10, 100000))) From af56899f0e7aa73ff0355d4862511e0348395efa Mon Sep 17 00:00:00 2001 From: immerrr Date: Mon, 10 Mar 2014 00:41:36 +0400 Subject: [PATCH 4/5] BLD: move exhaustive benchmarks to separate module, add option to include that module --- vb_suite/indexing.py | 113 ------------------------------ vb_suite/indexing_exhaustive.py | 120 ++++++++++++++++++++++++++++++++ vb_suite/suite.py | 41 ++++++++--- vb_suite/test_perf.py | 15 ++-- 4 files changed, 164 insertions(+), 125 deletions(-) create mode 100644 vb_suite/indexing_exhaustive.py diff --git a/vb_suite/indexing.py b/vb_suite/indexing.py index 00a0018c7ddb6..34cbadc2e042b 100644 --- a/vb_suite/indexing.py +++ b/vb_suite/indexing.py @@ -211,116 +211,3 @@ frame_iloc_big = Benchmark('df.iloc[:100,0]', setup, start_date=datetime(2013, 1, 1)) - - -#---------------------------------------------------------------------- -# Thorough checks of all containers and all indexing types - -import pandas.util.testing as tm - -setup_template = common_setup + """ -import sys - -try: - make_index = tm.makeCustomIndexWithCache -except AttributeError: - MAX_ENTRIES = 1000000 - _indices = {} - - def makeCustomIndexWithCache(nentries, **kwargs): - assert nentries < MAX_ENTRIES - - key = tuple(kwargs.items()) - try: - full_idx = _indices[key] - except KeyError: - full_idx = _indices[key] = tm.makeCustomIndex(nentries=MAX_ENTRIES, - **kwargs) - return full_idx[:nentries] - - make_index = tm.makeCustomIndexWithCache = makeCustomIndexWithCache - -obj = %(class_name)s(%(ctor_args)s) - -pos = -1 -axis = obj._get_axis(%(axis)r) -label = axis[pos] -arr_pos = np.arange(int(len(axis) / 2)) -arr_label = axis[arr_pos].values -mask = tm.np.arange(len(axis)) %% 3 == 0 -series_mask = Series(mask) -""" - -def generate_index_benchmarks(klass, idx_type, shape): - if not isinstance(shape, tuple): - shape = (shape,) - ndim = len(shape) - - if not isinstance(idx_type, tuple): - idx_types = tuple([idx_type] * ndim) - else: - assert len(idx_type) == ndim - idx_types = idx_type - - axes = klass._AXIS_ORDERS - ctor_args = ',\n '.join([ - '%s=make_index(idx_type=%r, nentries=%s, nlevels=1)' % v - for v in zip(axes, idx_types, shape)]) - - def get_benchmark_name(indexer, axis): - shape_type_str = 'x'.join([str(s) + str(t) - for s, t in zip(shape, idx_types)]) - - components = ['indexing_', klass.__name__.lower(), indexer, - shape_type_str] - if axis is not None: - components.append("ax%s" % axis) - - return '_'.join(components) - - def make_suffix(attrname, indexer_str, axis): - if axis is not None: - indexers = [':,'] * ndim - indexers[axis] = indexer_str + ',' - indexer_str = ''.join(indexers) - return '%s[%s]' % (attrname, indexer_str) - - benchmarked_axes = set([None, 0, ndim - 1]) - - result = {} - for axis in benchmarked_axes: - for params in [ - {'indexer': 'basic_pos', - 'suffix': make_suffix('.iloc', 'pos', axis)}, - {'indexer': 'basic_label', - 'suffix': make_suffix('.loc', 'label', axis)}, - - {'indexer': 'slice_pos', - 'suffix': make_suffix('.iloc', ':pos', axis)}, - {'indexer': 'slice_label', - 'suffix': make_suffix('.loc', ':label', axis)}, - - {'indexer': 'arr_pos', - 'suffix': make_suffix('.iloc', 'arr_pos', axis)}, - {'indexer': 'arr_label', - 'suffix': make_suffix('.loc', 'arr_label', axis)}, - - {'indexer': 'iloc_mask', - 'suffix': make_suffix('.iloc', 'mask', axis)}, - {'indexer': 'loc_mask', - 'suffix': make_suffix('.loc', 'mask', axis)}, ]: - - b = Benchmark('obj%s' % params['suffix'], - setup_template % { - 'class_name': klass.__name__, - 'ctor_args': ctor_args, 'axis': axis or 0}, - name=get_benchmark_name(params['indexer'], axis)) - result[b.name] = b - - return result - -globals().update(generate_index_benchmarks(tm.Series, 's', 100000)) -globals().update(generate_index_benchmarks(tm.DataFrame, 's', (10, 100000))) -globals().update(generate_index_benchmarks(tm.DataFrame, 's', (100000, 10))) -globals().update(generate_index_benchmarks(tm.Panel, 's', (100000, 10, 10))) -globals().update(generate_index_benchmarks(tm.Panel, 's', (10, 10, 100000))) diff --git a/vb_suite/indexing_exhaustive.py b/vb_suite/indexing_exhaustive.py new file mode 100644 index 0000000000000..358a3d81b69e5 --- /dev/null +++ b/vb_suite/indexing_exhaustive.py @@ -0,0 +1,120 @@ +#---------------------------------------------------------------------- +# Thorough checks of all containers and all indexing types + +from vbench.benchmark import Benchmark + +SECTION = 'Exhaustive check of indexing and scalar value access' + +common_setup = """from pandas_vb_common import * +""" + + +import pandas.util.testing as tm + +setup_template = common_setup + """ +import sys + +try: + make_index = tm.makeCustomIndexWithCache +except AttributeError: + MAX_ENTRIES = 1000000 + _indices = {} + + def makeCustomIndexWithCache(nentries, **kwargs): + assert nentries < MAX_ENTRIES + + key = tuple(kwargs.items()) + try: + full_idx = _indices[key] + except KeyError: + full_idx = _indices[key] = tm.makeCustomIndex(nentries=MAX_ENTRIES, + **kwargs) + return full_idx[:nentries] + + make_index = tm.makeCustomIndexWithCache = makeCustomIndexWithCache + +obj = %(class_name)s(%(ctor_args)s) + +pos = -1 +axis = obj._get_axis(%(axis)r) +label = axis[pos] +arr_pos = np.arange(int(len(axis) / 2)) +arr_label = axis[arr_pos].values +mask = tm.np.arange(len(axis)) %% 3 == 0 +series_mask = Series(mask) +""" + + +def generate_index_benchmarks(klass, idx_type, shape): + if not isinstance(shape, tuple): + shape = (shape,) + ndim = len(shape) + + if not isinstance(idx_type, tuple): + idx_types = tuple([idx_type] * ndim) + else: + assert len(idx_type) == ndim + idx_types = idx_type + + axes = klass._AXIS_ORDERS + ctor_args = ',\n '.join([ + '%s=make_index(idx_type=%r, nentries=%s, nlevels=1)' % v + for v in zip(axes, idx_types, shape)]) + + def get_benchmark_name(indexer, axis): + shape_type_str = 'x'.join([str(s) + str(t) + for s, t in zip(shape, idx_types)]) + + components = ['indexing_', klass.__name__.lower(), indexer, + shape_type_str] + if axis is not None: + components.append("ax%s" % axis) + + return '_'.join(components) + + def make_suffix(attrname, indexer_str, axis): + if axis is not None: + indexers = [':,'] * ndim + indexers[axis] = indexer_str + ',' + indexer_str = ''.join(indexers) + return '%s[%s]' % (attrname, indexer_str) + + benchmarked_axes = set([None, 0, ndim - 1]) + + result = {} + for axis in benchmarked_axes: + for params in [ + {'indexer': 'basic_pos', + 'suffix': make_suffix('.iloc', 'pos', axis)}, + {'indexer': 'basic_label', + 'suffix': make_suffix('.loc', 'label', axis)}, + + {'indexer': 'slice_pos', + 'suffix': make_suffix('.iloc', ':pos', axis)}, + {'indexer': 'slice_label', + 'suffix': make_suffix('.loc', ':label', axis)}, + + {'indexer': 'arr_pos', + 'suffix': make_suffix('.iloc', 'arr_pos', axis)}, + {'indexer': 'arr_label', + 'suffix': make_suffix('.loc', 'arr_label', axis)}, + + {'indexer': 'iloc_mask', + 'suffix': make_suffix('.iloc', 'mask', axis)}, + {'indexer': 'loc_mask', + 'suffix': make_suffix('.loc', 'mask', axis)}, ]: + + b = Benchmark('obj%s' % params['suffix'], + setup_template % { + 'class_name': klass.__name__, + 'ctor_args': ctor_args, 'axis': axis or 0}, + name=get_benchmark_name(params['indexer'], axis)) + result[b.name] = b + + return result + +globals().update(generate_index_benchmarks(tm.Series, 's', 100000)) +globals().update(generate_index_benchmarks(tm.DataFrame, 's', (10, 100000))) +globals().update(generate_index_benchmarks(tm.DataFrame, 's', (100000, 10))) +globals().update(generate_index_benchmarks(tm.Panel, 's', (100000, 10, 10))) +globals().update(generate_index_benchmarks(tm.Panel, 's', (10, 10, 100000))) diff --git a/vb_suite/suite.py b/vb_suite/suite.py index a1b38e8509e4e..12da997068b38 100644 --- a/vb_suite/suite.py +++ b/vb_suite/suite.py @@ -30,17 +30,42 @@ 'timedelta', 'eval'] -by_module = {} -benchmarks = [] -for modname in modules: - ref = __import__(modname) - by_module[modname] = [v for v in ref.__dict__.values() +def discover_benchmarks(mods, return_as='list'): + """ + Collect available benchmarks from specified modules. + + Arguments + --------- + mods: list of str + List of modules to search in + return_as: {'both', 'list', 'dict'} + Specifies result type: dict will group benchmarks by module + """ + by_module = {} + benchmarks = [] + + for modname in mods: + ref = __import__(modname) + mod_benchmarks = [v for v in ref.__dict__.values() if isinstance(v, Benchmark)] - benchmarks.extend(by_module[modname]) -for bm in benchmarks: - assert(bm.name is not None) + for bm in mod_benchmarks: + assert bm.name is not None + + by_module[modname] = mod_benchmarks + benchmarks.extend(mod_benchmarks) + + if return_as == 'both': + return by_module, benchmarks + elif return_as == 'list': + return benchmarks + elif return_as == 'dict': + return by_module + else: + raise ValueError("Incorrect return_as value: %s" % return_as) + +by_module, benchmarks = discover_benchmarks(modules, return_as='both') import getpass import sys diff --git a/vb_suite/test_perf.py b/vb_suite/test_perf.py index 55f57621f90f1..005e829b96d46 100755 --- a/vb_suite/test_perf.py +++ b/vb_suite/test_perf.py @@ -113,6 +113,9 @@ def __call__(self, parser, namespace, values, option_string=None): dest='regex', default="", help='Regex pat, only tests whose name matches the regext will be run.') +parser.add_argument('-e', '--extra-benchmarks', metavar='EXTRA', + dest='extras', action='append', + help='Extra modules to collect benchmarks from') parser.add_argument('-s', '--seed', metavar="SEED", dest='seed', @@ -462,10 +465,7 @@ def print_report(df,h_head=None,h_msg="",h_baseline=None,b_msg=""): args.log_file) - def main(): - from suite import benchmarks - if not args.log_file: args.log_file = os.path.abspath( os.path.join(REPO_PATH, 'vb_suite.log')) @@ -510,7 +510,14 @@ def main(): # surprises os.chdir(os.path.dirname(os.path.abspath(__file__))) - benchmarks = [x for x in benchmarks if re.search(args.regex,x.name)] + from suite import discover_benchmarks, benchmarks + + benchmarks = [b for b in benchmarks] + if args.extras: + benchmarks.extend(discover_benchmarks(args.extras, return_as='list')) + + benchmarks = [bm for bm in benchmarks + if re.search(args.regex, bm.name)] for b in benchmarks: b.repeat = args.repeats From 001fcace27bc258bd9fc4d653213d1c0ee8ca3ec Mon Sep 17 00:00:00 2001 From: immerrr Date: Tue, 11 Mar 2014 14:15:36 +0400 Subject: [PATCH 5/5] vb_suite/extras_indexing.py: add axis types (str, int, datetime, etc) --- vb_suite/extras_indexing.py | 164 ++++++++++++++++++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 vb_suite/extras_indexing.py diff --git a/vb_suite/extras_indexing.py b/vb_suite/extras_indexing.py new file mode 100644 index 0000000000000..a6cc1284794b4 --- /dev/null +++ b/vb_suite/extras_indexing.py @@ -0,0 +1,164 @@ +#---------------------------------------------------------------------- +# Thorough checks of all containers and all indexing types + +from vbench.benchmark import Benchmark + +SECTION = 'Exhaustive check of indexing and scalar value access' + +common_setup = """from pandas_vb_common import * +""" + + +import pandas.util.testing as tm + +MAX_ENTRIES = 100000 + +# FIXME: makeCustomIndexWithCache reimplements (sort of) tm.makeCustomIndex, +# because the latter doesn't offer customization of date/period index +# frequencies and integer index offset. + +setup_template = common_setup + """ +import sys +import pandas as pd + +try: + make_index = tm.makeCustomIndexWithCache +except AttributeError: + MAX_ENTRIES = %(MAX_ENTRIES)s + _indices = {} + + def makeCustomIndexWithCache(nentries, idx_type): + assert nentries <= MAX_ENTRIES + + key = idx_type + try: + full_idx = _indices[key] + except KeyError: + if idx_type == 'mi': + full_idx = tm.makeCustomIndex(nentries=MAX_ENTRIES, nlevels=2) + elif idx_type == 'dt': + full_idx = pd.date_range('2000-01-01', periods=MAX_ENTRIES, freq='T') + elif idx_type == 'p': + full_idx = pd.period_range('2000-01-01', periods=MAX_ENTRIES, freq='T') + elif idx_type == 's': + full_idx = tm.makeStringIndex(k=MAX_ENTRIES) + elif idx_type == 'u': + full_idx = tm.makeUnicodeIndex(k=MAX_ENTRIES) + elif idx_type == 'i': + full_idx = pd.Index(np.arange(MAX_ENTRIES) + MAX_ENTRIES) + elif idx_type == 'f': + full_idx = tm.makeFloatIndex(MAX_ENTRIES) + else: + raise ValueError('Wrong idx type: %%s' %% idx_type) + + _indices[key] = full_idx + + return full_idx[:nentries] + + make_index = tm.makeCustomIndexWithCache = makeCustomIndexWithCache + +obj = %(class_name)s(%(ctor_args)s) + +pos = -1 +axis = obj._get_axis(%(axis)r) +label = axis[pos] +arr_pos = np.arange(int(len(axis) / 2)) +arr_label = axis[arr_pos].values +mask = tm.np.arange(len(axis)) %% 3 == 0 +series_mask = Series(mask) +""" + +# generate_index_benchmarks( +# klass, long_axis=axis, idx_type=idx_type, is_dup=is_dup) + + +def generate_index_benchmarks(klass, idx_type, long_axis): + ndim = klass().ndim + + shape = [10] * ndim + shape[long_axis] = MAX_ENTRIES + shape = tuple(shape) + + types = ['i'] * ndim + types[long_axis] = idx_type + types = tuple(types) + + axes = klass._AXIS_ORDERS + ctor_args = ',\n '.join([ + '%s=make_index(nentries=%r, idx_type=%r)' % v + for v in zip(axes, shape, types)]) + + def get_benchmark_name(indexer, axis): + shape_type_str = 'x'.join([str(s) + str(t) + for s, t in zip(shape, types)]) + + components = ['indexing_', klass.__name__.lower(), indexer, + shape_type_str] + if axis is not None: + components.append("ax%s" % axis) + + return '_'.join(components) + + def make_suffix(attrname, indexer_str, axis): + if axis is not None: + indexers = [':,'] * ndim + indexers[axis] = indexer_str + ',' + indexer_str = ''.join(indexers) + return '%s[%s]' % (attrname, indexer_str) + + benchmarked_axes = set([None, 0, ndim - 1]) + + result = {} + for axis in benchmarked_axes: + for params in [ + {'indexer': 'basic_pos', + 'suffix': make_suffix('.iloc', 'pos', axis)}, + {'indexer': 'basic_label', + 'suffix': make_suffix('.loc', 'label', axis)}, + + {'indexer': 'slice_pos', + 'suffix': make_suffix('.iloc', ':pos', axis)}, + {'indexer': 'slice_label', + 'suffix': make_suffix('.loc', ':label', axis)}, + + {'indexer': 'arr_pos', + 'suffix': make_suffix('.iloc', 'arr_pos', axis)}, + {'indexer': 'arr_label', + 'suffix': make_suffix('.loc', 'arr_label', axis)}, + + {'indexer': 'iloc_mask', + 'suffix': make_suffix('.iloc', 'mask', axis)}, + {'indexer': 'loc_mask', + 'suffix': make_suffix('.loc', 'mask', axis)}, ]: + + b = Benchmark('obj%s' % params['suffix'], + setup_template % { + 'class_name': klass.__name__, + 'ctor_args': ctor_args, 'axis': axis or 0, + 'MAX_ENTRIES': MAX_ENTRIES}, + name=get_benchmark_name(params['indexer'], axis)) + result[b.name] = b + + return result + +# Benchmarks are generated as follows: given a container type, generate an +# instance of it with one of the axes long enough to produce statistically +# significant timing values and try different kinds of indexing on it. +# +# Generated benchmark set involves a cartesian product of +# - container types +# - designated "long" axis (minor or major one) +# - "long" axis type (string, integer, datetime, period, multiindex) +# - indexer type (positional, slice, fancy, etc.) +# - indexer axis (indexing is not limited to "long" axis) +# - label/positional indexer +# +# FIXME: add multiindex indexers? +# FIXME: add non-unique axes? +# FIXME: add non-unique non-monotonic axes? +for klass in (tm.Series, tm.DataFrame, tm.Panel): + for axis in set([0, klass().ndim - 1]): + for idx_type in ('s', 'i', 'dt', 'p', 'mi'): + bms = generate_index_benchmarks( + klass, long_axis=axis, idx_type=idx_type) + globals().update(bms)