diff --git a/doc/source/merging.rst b/doc/source/merging.rst index 274bad618cb3d..7128e2dd82d6c 100644 --- a/doc/source/merging.rst +++ b/doc/source/merging.rst @@ -130,9 +130,9 @@ behavior: .. ipython:: python - from pandas.util.testing import rands + from pandas.util.testing import rands_array df = DataFrame(np.random.randn(10, 4), columns=['a', 'b', 'c', 'd'], - index=[rands(5) for _ in range(10)]) + index=rands_array(5, 10)) df concat([df.ix[:7, ['a', 'b']], df.ix[2:-2, ['c']], diff --git a/pandas/core/common.py b/pandas/core/common.py index 1e3d789ce206b..31dc58d1870e0 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -2110,11 +2110,6 @@ def _count_not_none(*args): # miscellaneous python tools -def rands(n): - """Generates a random alphanumeric string of length *n*""" - from random import Random - import string - return ''.join(Random().sample(string.ascii_letters + string.digits, n)) def adjoin(space, *lists): diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index c097f82c6bd2f..da9d39ae82617 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -198,8 +198,8 @@ def test_long_strings(self): # GH6166 # unconversion of long strings was being chopped in earlier # versions of numpy < 1.7.2 - df = DataFrame({'a': [tm.rands(100) for _ in range(10)]}, - index=[tm.rands(100) for _ in range(10)]) + df = DataFrame({'a': tm.rands_array(100, size=10)}, + index=tm.rands_array(100, size=10)) with ensure_clean_store(self.path) as store: store.append('df', df, data_columns=['a']) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 5e91adbe1a2fa..0d13b6513b377 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -274,11 +274,6 @@ def test_repr_binary_type(): assert_equal(res, b) -def test_rands(): - r = com.rands(10) - assert(len(r) == 10) - - def test_adjoin(): data = [['a', 'b', 'c'], ['dd', 'ee', 'ff'], diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index 7d4ee05a1e64f..89d08d37e0a30 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -1201,9 +1201,8 @@ def test_pprint_thing(self): def test_wide_repr(self): with option_context('mode.sim_interactive', True, 'display.show_dimensions', True): - col = lambda l, k: [tm.rands(k) for _ in range(l)] max_cols = get_option('display.max_columns') - df = DataFrame([col(max_cols - 1, 25) for _ in range(10)]) + df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1))) set_option('display.expand_frame_repr', False) rep_str = repr(df) @@ -1227,9 +1226,8 @@ def test_wide_repr_wide_columns(self): def test_wide_repr_named(self): with option_context('mode.sim_interactive', True): - col = lambda l, k: [tm.rands(k) for _ in range(l)] max_cols = get_option('display.max_columns') - df = DataFrame([col(max_cols-1, 25) for _ in range(10)]) + df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1))) df.index.name = 'DataFrame Index' set_option('display.expand_frame_repr', False) @@ -1249,11 +1247,10 @@ def test_wide_repr_named(self): def test_wide_repr_multiindex(self): with option_context('mode.sim_interactive', True): - col = lambda l, k: [tm.rands(k) for _ in range(l)] - midx = pandas.MultiIndex.from_arrays([np.array(col(10, 5)), - np.array(col(10, 5))]) + midx = pandas.MultiIndex.from_arrays( + tm.rands_array(5, size=(2, 10))) max_cols = get_option('display.max_columns') - df = DataFrame([col(max_cols-1, 25) for _ in range(10)], + df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1)), index=midx) df.index.names = ['Level 0', 'Level 1'] set_option('display.expand_frame_repr', False) @@ -1274,12 +1271,11 @@ def test_wide_repr_multiindex(self): def test_wide_repr_multiindex_cols(self): with option_context('mode.sim_interactive', True): max_cols = get_option('display.max_columns') - col = lambda l, k: [tm.rands(k) for _ in range(l)] - midx = pandas.MultiIndex.from_arrays([np.array(col(10, 5)), - np.array(col(10, 5))]) - mcols = pandas.MultiIndex.from_arrays([np.array(col(max_cols-1, 3)), - np.array(col(max_cols-1, 3))]) - df = DataFrame([col(max_cols-1, 25) for _ in range(10)], + midx = pandas.MultiIndex.from_arrays( + tm.rands_array(5, size=(2, 10))) + mcols = pandas.MultiIndex.from_arrays( + tm.rands_array(3, size=(2, max_cols - 1))) + df = DataFrame(tm.rands_array(25, (10, max_cols - 1)), index=midx, columns=mcols) df.index.names = ['Level 0', 'Level 1'] set_option('display.expand_frame_repr', False) @@ -1296,9 +1292,8 @@ def test_wide_repr_multiindex_cols(self): def test_wide_repr_unicode(self): with option_context('mode.sim_interactive', True): - col = lambda l, k: [tm.randu(k) for _ in range(l)] max_cols = get_option('display.max_columns') - df = DataFrame([col(max_cols-1, 25) for _ in range(10)]) + df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1))) set_option('display.expand_frame_repr', False) rep_str = repr(df) set_option('display.expand_frame_repr', True) @@ -1877,30 +1872,31 @@ def test_repr_html(self): self.reset_display_options() def test_repr_html_wide(self): - row = lambda l, k: [tm.rands(k) for _ in range(l)] max_cols = get_option('display.max_columns') - df = DataFrame([row(max_cols-1, 25) for _ in range(10)]) + df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1))) reg_repr = df._repr_html_() assert "..." not in reg_repr - wide_df = DataFrame([row(max_cols+1, 25) for _ in range(10)]) + wide_df = DataFrame(tm.rands_array(25, size=(10, max_cols + 1))) wide_repr = wide_df._repr_html_() assert "..." in wide_repr def test_repr_html_wide_multiindex_cols(self): - row = lambda l, k: [tm.rands(k) for _ in range(l)] max_cols = get_option('display.max_columns') - tuples = list(itertools.product(np.arange(max_cols//2), ['foo', 'bar'])) - mcols = pandas.MultiIndex.from_tuples(tuples, names=['first', 'second']) - df = DataFrame([row(len(mcols), 25) for _ in range(10)], columns=mcols) + mcols = pandas.MultiIndex.from_product([np.arange(max_cols//2), + ['foo', 'bar']], + names=['first', 'second']) + df = DataFrame(tm.rands_array(25, size=(10, len(mcols))), + columns=mcols) reg_repr = df._repr_html_() assert '...' not in reg_repr - - tuples = list(itertools.product(np.arange(1+(max_cols//2)), ['foo', 'bar'])) - mcols = pandas.MultiIndex.from_tuples(tuples, names=['first', 'second']) - df = DataFrame([row(len(mcols), 25) for _ in range(10)], columns=mcols) + mcols = pandas.MultiIndex.from_product((np.arange(1+(max_cols//2)), + ['foo', 'bar']), + names=['first', 'second']) + df = DataFrame(tm.rands_array(25, size=(10, len(mcols))), + columns=mcols) wide_repr = df._repr_html_() assert '...' in wide_repr diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 3efd399450d11..e5064544b292e 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -4734,7 +4734,7 @@ def test_bytestring_with_unicode(self): def test_very_wide_info_repr(self): df = DataFrame(np.random.randn(10, 20), - columns=[tm.rands(10) for _ in range(20)]) + columns=tm.rands_array(10, 20)) repr(df) def test_repr_column_name_unicode_truncation_bug(self): diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 0e8b5d68e3fd7..7ead8b30e8671 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -8,7 +8,6 @@ from pandas import date_range,bdate_range, Timestamp from pandas.core.index import Index, MultiIndex, Int64Index -from pandas.core.common import rands from pandas.core.api import Categorical, DataFrame from pandas.core.groupby import (SpecificationError, DataError, _nargsort, _lexsort_indexer) @@ -2579,7 +2578,7 @@ def test_cython_grouper_series_bug_noncontig(self): self.assertTrue(result.isnull().all()) def test_series_grouper_noncontig_index(self): - index = Index([tm.rands(10) for _ in range(100)]) + index = Index(tm.rands_array(10, 100)) values = Series(np.random.randn(50), index=index[::2]) labels = np.random.randint(0, 5, 50) @@ -2869,8 +2868,8 @@ def test_column_select_via_attr(self): assert_frame_equal(result, expected) def test_rank_apply(self): - lev1 = np.array([rands(10) for _ in range(100)], dtype=object) - lev2 = np.array([rands(10) for _ in range(130)], dtype=object) + lev1 = tm.rands_array(10, 100) + lev2 = tm.rands_array(10, 130) lab1 = np.random.randint(0, 100, size=500) lab2 = np.random.randint(0, 130, size=500) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 29bdb2c983d61..2d3961a643991 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -327,8 +327,7 @@ def test_getitem_setitem_ellipsis(self): self.assertTrue((result == 5).all()) def test_getitem_negative_out_of_bounds(self): - s = Series([tm.rands(5) for _ in range(10)], - index=[tm.rands(10) for _ in range(10)]) + s = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10)) self.assertRaises(IndexError, s.__getitem__, -11) self.assertRaises(IndexError, s.__setitem__, -11, 'foo') @@ -3852,11 +3851,10 @@ def _check_op(arr, op): _check_op(arr, operator.floordiv) def test_series_frame_radd_bug(self): - from pandas.util.testing import rands import operator # GH 353 - vals = Series([rands(5) for _ in range(10)]) + vals = Series(tm.rands_array(5, 10)) result = 'foo_' + vals expected = vals.map(lambda x: 'foo_' + x) assert_series_equal(result, expected) diff --git a/pandas/tests/test_util.py b/pandas/tests/test_util.py index 76b49a5f976bd..8c56ba0e0f548 100644 --- a/pandas/tests/test_util.py +++ b/pandas/tests/test_util.py @@ -59,6 +59,22 @@ def test_bad_deprecate_kwarg(self): def f4(new=None): pass + +def test_rands(): + r = tm.rands(10) + assert(len(r) == 10) + + +def test_rands_array(): + arr = tm.rands_array(5, size=10) + assert(arr.shape == (10,)) + assert(len(arr[0]) == 5) + + arr = tm.rands_array(7, size=(10, 10)) + assert(arr.shape == (10, 10)) + assert(len(arr[1, 1]) == 7) + + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index d1c6af5743e07..b9c7fdfeb6c48 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -14,7 +14,7 @@ from pandas.tseries.index import DatetimeIndex from pandas.tools.merge import merge, concat, ordered_merge, MergeError from pandas.util.testing import (assert_frame_equal, assert_series_equal, - assert_almost_equal, rands, + assert_almost_equal, makeCustomDataframe as mkdf, assertRaisesRegexp) from pandas import isnull, DataFrame, Index, MultiIndex, Panel, Series, date_range, read_table, read_csv @@ -913,7 +913,7 @@ def test_merge_right_vs_left(self): def test_compress_group_combinations(self): # ~ 40000000 possible unique groups - key1 = np.array([rands(10) for _ in range(10000)], dtype='O') + key1 = tm.rands_array(10, 10000) key1 = np.tile(key1, 2) key2 = key1[::-1] diff --git a/pandas/util/testing.py b/pandas/util/testing.py index b34bcc3c12890..38057d641fc17 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -193,15 +193,50 @@ def randbool(size=(), p=0.5): return rand(*size) <= p -def rands(n): - choices = string.ascii_letters + string.digits - return ''.join(random.choice(choices) for _ in range(n)) +RANDS_CHARS = np.array(list(string.ascii_letters + string.digits), + dtype=(np.str_, 1)) +RANDU_CHARS = np.array(list(u("").join(map(unichr, lrange(1488, 1488 + 26))) + + string.digits), dtype=(np.unicode_, 1)) + + +def rands_array(nchars, size, dtype='O'): + """Generate an array of byte strings.""" + retval = (choice(RANDS_CHARS, size=nchars * np.prod(size)) + .view((np.str_, nchars)).reshape(size)) + if dtype is None: + return retval + else: + return retval.astype(dtype) + + +def randu_array(nchars, size, dtype='O'): + """Generate an array of unicode strings.""" + retval = (choice(RANDU_CHARS, size=nchars * np.prod(size)) + .view((np.unicode_, nchars)).reshape(size)) + if dtype is None: + return retval + else: + return retval.astype(dtype) -def randu(n): - choices = u("").join(map(unichr, lrange(1488, 1488 + 26))) - choices += string.digits - return ''.join([random.choice(choices) for _ in range(n)]) +def rands(nchars): + """ + Generate one random byte string. + + See `rands_array` if you want to create an array of random strings. + + """ + return ''.join(choice(RANDS_CHARS, nchars)) + + +def randu(nchars): + """ + Generate one random unicode string. + + See `randu_array` if you want to create an array of random unicode strings. + + """ + return ''.join(choice(RANDU_CHARS, nchars)) def choice(x, size=10): @@ -743,10 +778,11 @@ def getArangeMat(): # make index def makeStringIndex(k=10): - return Index([rands(10) for _ in range(k)]) + return Index(rands_array(nchars=10, size=k)) + def makeUnicodeIndex(k=10): - return Index([randu(10) for _ in range(k)]) + return Index(randu_array(nchars=10, size=k)) def makeBoolIndex(k=10): if k == 1: diff --git a/vb_suite/frame_ctor.py b/vb_suite/frame_ctor.py index 713237779494e..b11dd6c290ae1 100644 --- a/vb_suite/frame_ctor.py +++ b/vb_suite/frame_ctor.py @@ -17,8 +17,8 @@ setup = common_setup + """ N, K = 5000, 50 -index = [rands(10) for _ in xrange(N)] -columns = [rands(10) for _ in xrange(K)] +index = tm.makeStringIndex(N) +columns = tm.makeStringIndex(K) frame = DataFrame(np.random.randn(N, K), index=index, columns=columns) try: diff --git a/vb_suite/groupby.py b/vb_suite/groupby.py index f9797def4c53b..26311920ec861 100644 --- a/vb_suite/groupby.py +++ b/vb_suite/groupby.py @@ -187,7 +187,7 @@ def f(): setup = common_setup + """ K = 1000 N = 100000 -uniques = np.array([rands(10) for x in xrange(K)], dtype='O') +uniques = tm.makeStringIndex(K).values s = Series(np.tile(uniques, N // K)) """ diff --git a/vb_suite/hdfstore_bench.py b/vb_suite/hdfstore_bench.py index 47f8e106351d4..a822ad1c614be 100644 --- a/vb_suite/hdfstore_bench.py +++ b/vb_suite/hdfstore_bench.py @@ -19,7 +19,7 @@ def remove(f): # get from a store setup1 = common_setup + """ -index = [rands(10) for _ in xrange(25000)] +index = tm.makeStringIndex(25000) df = DataFrame({'float1' : randn(25000), 'float2' : randn(25000)}, index=index) @@ -36,7 +36,7 @@ def remove(f): # write to a store setup2 = common_setup + """ -index = [rands(10) for _ in xrange(25000)] +index = tm.makeStringIndex(25000) df = DataFrame({'float1' : randn(25000), 'float2' : randn(25000)}, index=index) @@ -52,7 +52,7 @@ def remove(f): # get from a store (mixed) setup3 = common_setup + """ -index = [rands(10) for _ in xrange(25000)] +index = tm.makeStringIndex(25000) df = DataFrame({'float1' : randn(25000), 'float2' : randn(25000), 'string1' : ['foo'] * 25000, @@ -73,7 +73,7 @@ def remove(f): # write to a store (mixed) setup4 = common_setup + """ -index = [rands(10) for _ in xrange(25000)] +index = tm.makeStringIndex(25000) df = DataFrame({'float1' : randn(25000), 'float2' : randn(25000), 'string1' : ['foo'] * 25000, @@ -93,7 +93,7 @@ def remove(f): setup5 = common_setup + """ N=10000 -index = [rands(10) for _ in xrange(N)] +index = tm.makeStringIndex(N) df = DataFrame({'float1' : randn(N), 'float2' : randn(N), 'string1' : ['foo'] * N, @@ -115,7 +115,7 @@ def remove(f): # write to a table (mixed) setup6 = common_setup + """ -index = [rands(10) for _ in xrange(25000)] +index = tm.makeStringIndex(25000) df = DataFrame({'float1' : randn(25000), 'float2' : randn(25000), 'string1' : ['foo'] * 25000, @@ -134,7 +134,7 @@ def remove(f): # select from a table setup7 = common_setup + """ -index = [rands(10) for _ in xrange(25000)] +index = tm.makeStringIndex(25000) df = DataFrame({'float1' : randn(25000), 'float2' : randn(25000) }, index=index) @@ -153,7 +153,7 @@ def remove(f): # write to a table setup8 = common_setup + """ -index = [rands(10) for _ in xrange(25000)] +index = tm.makeStringIndex(25000) df = DataFrame({'float1' : randn(25000), 'float2' : randn(25000) }, index=index) diff --git a/vb_suite/indexing.py b/vb_suite/indexing.py index 34cbadc2e042b..320f261050e07 100644 --- a/vb_suite/indexing.py +++ b/vb_suite/indexing.py @@ -20,7 +20,7 @@ name='series_getitem_scalar') setup = common_setup + """ -index = [tm.rands(10) for _ in xrange(1000)] +index = tm.makeStringIndex(1000) s = Series(np.random.rand(1000), index=index) idx = index[100] """ @@ -51,8 +51,8 @@ # DataFrame __getitem__ setup = common_setup + """ -index = [tm.rands(10) for _ in xrange(1000)] -columns = [tm.rands(10) for _ in xrange(30)] +index = tm.makeStringIndex(1000) +columns = tm.makeStringIndex(30) df = DataFrame(np.random.rand(1000, 30), index=index, columns=columns) idx = index[100] @@ -68,10 +68,9 @@ except: klass = DataFrame -index = [tm.rands(10) for _ in xrange(1000)] -columns = [tm.rands(10) for _ in xrange(30)] -df = klass(np.random.rand(1000, 30), index=index, - columns=columns) +index = tm.makeStringIndex(1000) +columns = tm.makeStringIndex(30) +df = klass(np.random.rand(1000, 30), index=index, columns=columns) idx = index[100] col = columns[10] """ @@ -84,10 +83,9 @@ # ix get scalar setup = common_setup + """ -index = [tm.rands(10) for _ in xrange(1000)] -columns = [tm.rands(10) for _ in xrange(30)] -df = DataFrame(np.random.randn(1000, 30), index=index, - columns=columns) +index = tm.makeStringIndex(1000) +columns = tm.makeStringIndex(30) +df = DataFrame(np.random.randn(1000, 30), index=index, columns=columns) idx = index[100] col = columns[10] """ diff --git a/vb_suite/io_bench.py b/vb_suite/io_bench.py index b70a060233dae..0b9f68f0e6ed5 100644 --- a/vb_suite/io_bench.py +++ b/vb_suite/io_bench.py @@ -8,7 +8,7 @@ # read_csv setup1 = common_setup + """ -index = [rands(10) for _ in xrange(10000)] +index = tm.makeStringIndex(10000) df = DataFrame({'float1' : randn(10000), 'float2' : randn(10000), 'string1' : ['foo'] * 10000, @@ -26,7 +26,7 @@ # write_csv setup2 = common_setup + """ -index = [rands(10) for _ in xrange(10000)] +index = tm.makeStringIndex(10000) df = DataFrame({'float1' : randn(10000), 'float2' : randn(10000), 'string1' : ['foo'] * 10000, diff --git a/vb_suite/io_sql.py b/vb_suite/io_sql.py index 1a60982c487d4..7f580165939bb 100644 --- a/vb_suite/io_sql.py +++ b/vb_suite/io_sql.py @@ -17,7 +17,7 @@ # to_sql setup = common_setup + """ -index = [rands(10) for _ in xrange(10000)] +index = tm.makeStringIndex(10000) df = DataFrame({'float1' : randn(10000), 'float2' : randn(10000), 'string1' : ['foo'] * 10000, @@ -37,7 +37,7 @@ # read_sql setup = common_setup + """ -index = [rands(10) for _ in xrange(10000)] +index = tm.makeStringIndex(10000) df = DataFrame({'float1' : randn(10000), 'float2' : randn(10000), 'string1' : ['foo'] * 10000, diff --git a/vb_suite/join_merge.py b/vb_suite/join_merge.py index eb0608f12a8cb..facec39559ed3 100644 --- a/vb_suite/join_merge.py +++ b/vb_suite/join_merge.py @@ -5,8 +5,8 @@ """ setup = common_setup + """ -level1 = np.array([rands(10) for _ in xrange(10)], dtype='O') -level2 = np.array([rands(10) for _ in xrange(1000)], dtype='O') +level1 = tm.makeStringIndex(10).values +level2 = tm.makeStringIndex(1000).values label1 = np.arange(10).repeat(1000) label2 = np.tile(np.arange(1000), 10) @@ -91,8 +91,8 @@ setup = common_setup + """ N = 10000 -indices = np.array([rands(10) for _ in xrange(N)], dtype='O') -indices2 = np.array([rands(10) for _ in xrange(N)], dtype='O') +indices = tm.makeStringIndex(N).values +indices2 = tm.makeStringIndex(N).values key = np.tile(indices[:8000], 10) key2 = np.tile(indices2[:8000], 10) @@ -141,7 +141,7 @@ # data alignment setup = common_setup + """n = 1000000 -# indices = Index([rands(10) for _ in xrange(n)]) +# indices = tm.makeStringIndex(n) def sample(values, k): sampler = np.random.permutation(len(values)) return values.take(sampler[:k]) @@ -170,7 +170,7 @@ def sample(values, k): setup = common_setup + """ n = 1000 -indices = Index([rands(10) for _ in xrange(1000)]) +indices = tm.makeStringIndex(1000) s = Series(n, index=indices) pieces = [s[i:-i] for i in range(1, 10)] pieces = pieces * 50 @@ -205,7 +205,7 @@ def sample(values, k): # Ordered merge setup = common_setup + """ -groups = np.array([rands(10) for _ in xrange(10)], dtype='O') +groups = tm.makeStringIndex(10).values left = DataFrame({'group': groups.repeat(5000), 'key' : np.tile(np.arange(0, 10000, 2), 10), diff --git a/vb_suite/miscellaneous.py b/vb_suite/miscellaneous.py index eeeaf01a8b4af..27efadc7acfe0 100644 --- a/vb_suite/miscellaneous.py +++ b/vb_suite/miscellaneous.py @@ -24,9 +24,7 @@ def prop(self): # match setup = common_setup + """ -from pandas.util.testing import rands - -uniques = np.array([rands(10) for _ in xrange(1000)], dtype='O') +uniques = tm.makeStringIndex(1000).values all = uniques.repeat(10) """ diff --git a/vb_suite/pandas_vb_common.py b/vb_suite/pandas_vb_common.py index 77d0e2e27260e..a599301bb53fe 100644 --- a/vb_suite/pandas_vb_common.py +++ b/vb_suite/pandas_vb_common.py @@ -1,5 +1,4 @@ from pandas import * -from pandas.util.testing import rands from datetime import timedelta from numpy.random import randn from numpy.random import randint diff --git a/vb_suite/reindex.py b/vb_suite/reindex.py index 5d3d07783c9a8..156382f1fb13a 100644 --- a/vb_suite/reindex.py +++ b/vb_suite/reindex.py @@ -34,9 +34,8 @@ N = 1000 K = 20 -level1 = np.array([tm.rands(10) for _ in xrange(N)], dtype='O').repeat(K) -level2 = np.tile(np.array([tm.rands(10) for _ in xrange(K)], dtype='O'), - N) +level1 = tm.makeStringIndex(N).values.repeat(K) +level2 = np.tile(tm.makeStringIndex(K).values, N) index = MultiIndex.from_arrays([level1, level2]) s1 = Series(np.random.randn(N * K), index=index) @@ -125,8 +124,8 @@ def backfill(): N = 10000 K = 10 -key1 = np.array([rands(10) for _ in xrange(N)], dtype='O').repeat(K) -key2 = np.array([rands(10) for _ in xrange(N)], dtype='O').repeat(K) +key1 = tm.makeStringIndex(N).values.repeat(K) +key2 = tm.makeStringIndex(N).values.repeat(K) df = DataFrame({'key1' : key1, 'key2' : key2, 'value' : np.random.randn(N * K)}) @@ -166,7 +165,7 @@ def backfill(): setup = common_setup + """ s = Series(np.random.randint(0, 1000, size=10000)) -s2 = Series(np.tile([rands(10) for i in xrange(1000)], 10)) +s2 = Series(np.tile(tm.makeStringIndex(1000).values, 10)) """ series_drop_duplicates_int = Benchmark('s.drop_duplicates()', setup, @@ -195,7 +194,7 @@ def backfill(): setup = common_setup + """ n = 50000 -indices = Index([rands(10) for _ in xrange(n)]) +indices = tm.makeStringIndex(n) def sample(values, k): from random import shuffle