From 10f1112ed6edd5ee2c3f2034bf4ad6a61b90f7a0 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 17 Dec 2017 00:05:11 -0800 Subject: [PATCH 1/5] CLN: ASV io bench --- asv_bench/benchmarks/io_bench.py | 225 ------------------------ asv_bench/benchmarks/io_csv.py | 147 ++++++++++++++++ asv_bench/benchmarks/io_json.py | 121 +++++++++++++ asv_bench/benchmarks/packers.py | 62 ------- pandas/tests/io/sas/data/test1.sas7bdat | Bin 131072 -> 0 bytes 5 files changed, 268 insertions(+), 287 deletions(-) delete mode 100644 asv_bench/benchmarks/io_bench.py create mode 100644 asv_bench/benchmarks/io_csv.py create mode 100644 asv_bench/benchmarks/io_json.py delete mode 100644 pandas/tests/io/sas/data/test1.sas7bdat diff --git a/asv_bench/benchmarks/io_bench.py b/asv_bench/benchmarks/io_bench.py deleted file mode 100644 index e8112cc41f032..0000000000000 --- a/asv_bench/benchmarks/io_bench.py +++ /dev/null @@ -1,225 +0,0 @@ -import os -from .pandas_vb_common import * -from pandas import concat, Timestamp, compat -try: - from StringIO import StringIO -except ImportError: - from io import StringIO -import timeit - - -class frame_to_csv(BaseIO): - goal_time = 0.2 - fname = '__test__.csv' - - def setup(self): - self.df = DataFrame(np.random.randn(3000, 30)) - - def time_frame_to_csv(self): - self.df.to_csv(self.fname) - - -class frame_to_csv2(BaseIO): - goal_time = 0.2 - fname = '__test__.csv' - - def setup(self): - self.df = DataFrame({'A': range(50000), }) - self.df['B'] = (self.df.A + 1.0) - self.df['C'] = (self.df.A + 2.0) - self.df['D'] = (self.df.A + 3.0) - - def time_frame_to_csv2(self): - self.df.to_csv(self.fname) - - -class frame_to_csv_date_formatting(BaseIO): - goal_time = 0.2 - fname = '__test__.csv' - - def setup(self): - self.rng = date_range('1/1/2000', periods=1000) - self.data = DataFrame(self.rng, index=self.rng) - - def time_frame_to_csv_date_formatting(self): - self.data.to_csv(self.fname, date_format='%Y%m%d') - - -class frame_to_csv_mixed(BaseIO): - goal_time = 0.2 - fname = '__test__.csv' - - def setup(self): - self.df_float = DataFrame(np.random.randn(5000, 5), dtype='float64', columns=self.create_cols('float')) - self.df_int = DataFrame(np.random.randn(5000, 5), dtype='int64', columns=self.create_cols('int')) - self.df_bool = DataFrame(True, index=self.df_float.index, columns=self.create_cols('bool')) - self.df_object = DataFrame('foo', index=self.df_float.index, columns=self.create_cols('object')) - self.df_dt = DataFrame(Timestamp('20010101'), index=self.df_float.index, columns=self.create_cols('date')) - self.df_float.ix[30:500, 1:3] = np.nan - self.df = concat([self.df_float, self.df_int, self.df_bool, self.df_object, self.df_dt], axis=1) - - def time_frame_to_csv_mixed(self): - self.df.to_csv(self.fname) - - def create_cols(self, name): - return [('%s%03d' % (name, i)) for i in range(5)] - - -class read_csv_infer_datetime_format_custom(object): - goal_time = 0.2 - - def setup(self): - self.rng = date_range('1/1/2000', periods=1000) - self.data = '\n'.join(self.rng.map((lambda x: x.strftime('%m/%d/%Y %H:%M:%S.%f')))) - - def time_read_csv_infer_datetime_format_custom(self): - read_csv(StringIO(self.data), header=None, names=['foo'], parse_dates=['foo'], infer_datetime_format=True) - - -class read_csv_infer_datetime_format_iso8601(object): - goal_time = 0.2 - - def setup(self): - self.rng = date_range('1/1/2000', periods=1000) - self.data = '\n'.join(self.rng.map((lambda x: x.strftime('%Y-%m-%d %H:%M:%S')))) - - def time_read_csv_infer_datetime_format_iso8601(self): - read_csv(StringIO(self.data), header=None, names=['foo'], parse_dates=['foo'], infer_datetime_format=True) - - -class read_csv_infer_datetime_format_ymd(object): - goal_time = 0.2 - - def setup(self): - self.rng = date_range('1/1/2000', periods=1000) - self.data = '\n'.join(self.rng.map((lambda x: x.strftime('%Y%m%d')))) - - def time_read_csv_infer_datetime_format_ymd(self): - read_csv(StringIO(self.data), header=None, names=['foo'], parse_dates=['foo'], infer_datetime_format=True) - - -class read_csv_skiprows(BaseIO): - goal_time = 0.2 - fname = '__test__.csv' - - def setup(self): - self.index = tm.makeStringIndex(20000) - self.df = DataFrame({'float1': randn(20000), 'float2': randn(20000), 'string1': (['foo'] * 20000), 'bool1': ([True] * 20000), 'int1': np.random.randint(0, 200000, size=20000), }, index=self.index) - self.df.to_csv(self.fname) - - def time_read_csv_skiprows(self): - read_csv(self.fname, skiprows=10000) - - -class read_csv_standard(BaseIO): - goal_time = 0.2 - fname = '__test__.csv' - - def setup(self): - self.index = tm.makeStringIndex(10000) - self.df = DataFrame({'float1': randn(10000), 'float2': randn(10000), 'string1': (['foo'] * 10000), 'bool1': ([True] * 10000), 'int1': np.random.randint(0, 100000, size=10000), }, index=self.index) - self.df.to_csv(self.fname) - - def time_read_csv_standard(self): - read_csv(self.fname) - - -class read_parse_dates_iso8601(object): - goal_time = 0.2 - - def setup(self): - self.rng = date_range('1/1/2000', periods=1000) - self.data = '\n'.join(self.rng.map((lambda x: x.strftime('%Y-%m-%d %H:%M:%S')))) - - def time_read_parse_dates_iso8601(self): - read_csv(StringIO(self.data), header=None, names=['foo'], parse_dates=['foo']) - - -class read_uint64_integers(object): - goal_time = 0.2 - - def setup(self): - self.na_values = [2**63 + 500] - - self.arr1 = np.arange(10000).astype('uint64') + 2**63 - self.data1 = '\n'.join(map(lambda x: str(x), self.arr1)) - - self.arr2 = self.arr1.copy().astype(object) - self.arr2[500] = -1 - self.data2 = '\n'.join(map(lambda x: str(x), self.arr2)) - - def time_read_uint64(self): - read_csv(StringIO(self.data1), header=None) - - def time_read_uint64_neg_values(self): - read_csv(StringIO(self.data2), header=None) - - def time_read_uint64_na_values(self): - read_csv(StringIO(self.data1), header=None, na_values=self.na_values) - - -class write_csv_standard(BaseIO): - goal_time = 0.2 - fname = '__test__.csv' - - def setup(self): - self.index = tm.makeStringIndex(10000) - self.df = DataFrame({'float1': randn(10000), 'float2': randn(10000), 'string1': (['foo'] * 10000), 'bool1': ([True] * 10000), 'int1': np.random.randint(0, 100000, size=10000), }, index=self.index) - - def time_write_csv_standard(self): - self.df.to_csv(self.fname) - - -class read_csv_from_s3(object): - # Make sure that we can read part of a file from S3 without - # needing to download the entire thing. Use the timeit.default_timer - # to measure wall time instead of CPU time -- we want to see - # how long it takes to download the data. - timer = timeit.default_timer - params = ([None, "gzip", "bz2"], ["python", "c"]) - param_names = ["compression", "engine"] - - def setup(self, compression, engine): - if compression == "bz2" and engine == "c" and compat.PY2: - # The Python 2 C parser can't read bz2 from open files. - raise NotImplementedError - try: - import s3fs - except ImportError: - # Skip these benchmarks if `boto` is not installed. - raise NotImplementedError - - self.big_fname = "s3://pandas-test/large_random.csv" - - def time_read_nrows(self, compression, engine): - # Read a small number of rows from a huge (100,000 x 50) table. - ext = "" - if compression == "gzip": - ext = ".gz" - elif compression == "bz2": - ext = ".bz2" - pd.read_csv(self.big_fname + ext, nrows=10, - compression=compression, engine=engine) - - -class read_json_lines(BaseIO): - goal_time = 0.2 - fname = "__test__.json" - - def setup(self): - self.N = 100000 - self.C = 5 - self.df = DataFrame({'float{0}'.format(i): randn(self.N) for i in range(self.C)}) - self.df.to_json(self.fname,orient="records",lines=True) - - def time_read_json_lines(self): - pd.read_json(self.fname, lines=True) - - def time_read_json_lines_chunk(self): - pd.concat(pd.read_json(self.fname, lines=True, chunksize=self.N//4)) - - def peakmem_read_json_lines(self): - pd.read_json(self.fname, lines=True) - - def peakmem_read_json_lines_chunk(self): - pd.concat(pd.read_json(self.fname, lines=True, chunksize=self.N//4)) diff --git a/asv_bench/benchmarks/io_csv.py b/asv_bench/benchmarks/io_csv.py new file mode 100644 index 0000000000000..ef138206eb87b --- /dev/null +++ b/asv_bench/benchmarks/io_csv.py @@ -0,0 +1,147 @@ +import timeit + +import numpy as np +import pandas.util.testing as tm +from pandas import DataFrame, date_range, read_csv +from pandas.compat import PY2, StringIO + +from .pandas_vb_common import setup, BaseIO # noqa + + +class ToCSV(BaseIO): + + goal_time = 0.2 + fname = '__test__.csv' + params = ['wide', 'long', 'mixed'] + param_names = ['kind'] + + def setup(self, kind): + wide_frame = DataFrame(np.random.randn(3000, 30)) + long_frame = DataFrame({'A': np.arange(50000), + 'B': np.arange(50000) + 1., + 'C': np.arange(50000) + 2., + 'D': np.arange(50000) + 3.}) + mixed_frame = DataFrame({'float': np.random.randn(5000), + 'int': np.random.randn(5000).astype(int), + 'bool': (np.arange(5000) % 2) == 0, + 'datetime': date_range('2001', + freq='s', + periods=5000), + 'object': ['foo'] * 5000}) + mixed_frame.loc[30:500, 'float'] = np.nan + data = {'wide': wide_frame, + 'long': long_frame, + 'mixed': mixed_frame} + self.df = data[kind] + + def time_frame(self, kind): + self.df.to_csv(self.fname) + + +class ToCSVDatetime(BaseIO): + + goal_time = 0.2 + fname = '__test__.csv' + + def setup(self): + rng = date_range('1/1/2000', periods=1000) + self.data = DataFrame(rng, index=rng) + + def time_frame_date_formatting(self): + self.data.to_csv(self.fname, date_format='%Y%m%d') + + +class ReadCSVDInferDatetimeFormat(object): + + goal_time = 0.2 + params = ([True, False], ['custom', 'iso8601', 'ymd']) + param_names = ['infer_datetime_format', 'format'] + + def setup(self, infer_datetime_format, format): + rng = date_range('1/1/2000', periods=1000) + formats = {'custom': '%m/%d/%Y %H:%M:%S.%f', + 'iso8601': '%Y-%m-%d %H:%M:%S', + 'ymd': '%Y%m%d'} + dt_format = formats[format] + self.data = StringIO('\n'.join(rng.strftime(dt_format).tolist())) + + def time_read_csv(self, infer_datetime_format, format): + read_csv(self.data, header=None, names=['foo'], parse_dates=['foo'], + infer_datetime_format=infer_datetime_format) + + +class ReadCSVSkipRows(BaseIO): + + goal_time = 0.2 + fname = '__test__.csv' + params = [None, 10000] + param_names = ['skiprows'] + + def setup(self, skiprows): + N = 20000 + index = tm.makeStringIndex(N) + df = DataFrame({'float1': np.random.randn(N), + 'float2': np.random.randn(N), + 'string1': ['foo'] * N, + 'bool1': [True] * N, + 'int1': np.random.randint(0, N, size=N)}, + index=index) + df.to_csv(self.fname) + + def time_skipprows(self, skiprows): + read_csv(self.fname, skiprows=skiprows) + + +class ReadUint64Integers(object): + + goal_time = 0.2 + + def setup(self): + self.na_values = [2**63 + 500] + arr = np.arange(10000).astype('uint64') + 2**63 + self.data1 = StringIO('\n'.join(arr.astype(str).tolist())) + arr = arr.astype(object) + arr[500] = -1 + self.data2 = StringIO('\n'.join(arr.astype(str).tolist())) + + def time_read_uint64(self): + read_csv(self.data1, header=None, names=['foo']) + + def time_read_uint64_neg_values(self): + read_csv(self.data2, header=None, names=['foo']) + + def time_read_uint64_na_values(self): + read_csv(self.data1, header=None, names=['foo'], + na_values=self.na_values) + + +class S3(object): + # Make sure that we can read part of a file from S3 without + # needing to download the entire thing. Use the timeit.default_timer + # to measure wall time instead of CPU time -- we want to see + # how long it takes to download the data. + timer = timeit.default_timer + params = ([None, "gzip", "bz2"], ["python", "c"]) + param_names = ["compression", "engine"] + + def setup(self, compression, engine): + if compression == "bz2" and engine == "c" and PY2: + # The Python 2 C parser can't read bz2 from open files. + raise NotImplementedError + try: + import s3fs + except ImportError: + # Skip these benchmarks if `boto` is not installed. + raise NotImplementedError + + ext = "" + if compression == "gzip": + ext = ".gz" + elif compression == "bz2": + ext = ".bz2" + self.big_fname = "s3://pandas-test/large_random.csv" + ext + + def time_read_csv_10_rows(self, compression, engine): + # Read a small number of rows from a huge (100,000 x 50) table. + read_csv(self.big_fname, nrows=10, compression=compression, + engine=engine) diff --git a/asv_bench/benchmarks/io_json.py b/asv_bench/benchmarks/io_json.py new file mode 100644 index 0000000000000..6b93fc71b9a24 --- /dev/null +++ b/asv_bench/benchmarks/io_json.py @@ -0,0 +1,121 @@ +import numpy as np +import pandas.util.testing as tm +from pandas import DataFrame, date_range, timedelta_range, concat, read_json + +from .pandas_vb_common import setup, BaseIO # noqa + + +class ReadJSON(BaseIO): + + goal_time = 0.2 + fname = "__test__.json" + params = (['records', 'split'], [None, 25000], ['int', 'datetime']) + param_names = ['orient', 'chunksize', 'index'] + + def setup(self, orient, chunksize, index): + N = 100000 + indexes = {'int': np.arange(N), + 'datetime': date_range('20000101', periods=N, freq='H')} + df = DataFrame(np.random.randn(N, 5), + columns=['float_{}'.format(i) for i in range(5)], + index=indexes[index]) + df.to_json(self.fname, orient=lines_orient[1], lines=lines_orient[0]) + + def time_read_json(self, orient, chunksize, index): + read_json(self.fname, orient=orient, chunksize=chunksize) + + def time_read_json_concat(self, orient, chunksize, index): + concat(read_json(self.fname, orient=orient, chunksize=chunksize)) + + def peakmem_read_json(self, orient, chunksize, index): + read_json(self.fname, orient=orient, chunksize=chunksize) + + def peakmem_read_json_concat(self, orient, chunksize, index): + concat(read_json(self.fname, orient=orient, chunksize=chunksize)) + + def time_read_json_lines(self, orient, chunksize, index): + read_json(self.fname, orient='records', lines=True, + chunksize=chunksize) + + def time_read_json_lines_concat(self, orient, chunksize, index): + concat(read_json(self.fname, orient='records', lines=True, + chunksize=chunksize)) + + def peakmem_read_json_lines(self, orient, chunksize, index): + read_json(self.fname, orient='records', lines=True, + chunksize=chunksize) + + def peakmem_read_json_lines_concat(self, orient, chunksize, index): + concat(read_json(self.fname, orient='records', lines=True, + chunksize=chunksize)) + + +class ToJSON(BaseIO): + + goal_time = 0.2 + fname = "__test__.json" + params = ['split', 'columns', 'index'] + param_names = ['orient'] + + def setup(self, lines_orient): + N = 10**5 + ncols = 5 + index = date_range('20000101', periods=N, freq='H') + timedeltas = timedelta_range(start=1, periods=N, freq='s') + datetimes = date_range(start=1, periods=N, freq='s') + ints = np.random.randint(100000000, size=N) + floats = np.random.randn(N) + strings = tm.makeStringIndex(N) + self.df = DataFrame(np.random.randn(N, ncols), index=np.arange(N)) + self.df_date_idx = DataFrame(np.random.randn(N, ncols), index=index) + self.df_td_int_ts = DataFrame({'td_1': timedeltas, + 'td_2': timedeltas, + 'int_1': ints, + 'int_2': ints, + 'ts_1': datetimes, + 'ts_2': datetimes}, + index=index) + self.df_int_floats = DataFrame({'int_1': ints, + 'int_2': ints, + 'int_3': ints, + 'float_1': floats, + 'float_2': floats, + 'float_3': floats}, + index=index) + self.df_int_float_str = DataFrame({'int_1': ints, + 'int_2': ints, + 'float_1': floats, + 'float_2': floats, + 'str_1': strings, + 'str_2': strings}, + index=index) + + def time_floats_with_int_index(self, orient): + self.df.to_json(self.fname, orient=orient) + + def time_floats_with_dt_index(self, orient): + self.df_date_idx.to_json(self.fname, orient=orient) + + def time_delta_int_tstamp(self, orient): + self.df_td_int_ts.to_json(self.fname, orient=orient) + + def time_float_int(self, orient): + self.df_int_floats.to_json(self.fname, orient=orient) + + def time_float_int_str(self, orient): + self.df_int_float_str.to_json(self.fname, orient=orient) + + def time_floats_with_int_idex_lines(self, orient): + self.df.to_json(self.fname, orient='records', lines=True) + + def time_floats_with_dt_index_lines(self, orient): + self.df_date_idx.to_json(self.fname, orient='records', lines=True) + + def time_delta_int_tstamp_lines(self, orient): + self.df_td_int_ts.to_json(self.fname, orient='records', lines=True) + + def time_float_int_lines(self, orient): + self.df_int_floats.to_json(self.fname, orient='records', lines=True) + + def time_float_int_str_lines(self, orient): + self.df_int_float_str.to_json(self.fname, orient='records', lines=True) diff --git a/asv_bench/benchmarks/packers.py b/asv_bench/benchmarks/packers.py index 758162f000e8d..7b6cefc56f0da 100644 --- a/asv_bench/benchmarks/packers.py +++ b/asv_bench/benchmarks/packers.py @@ -77,28 +77,6 @@ def time_packers_read_hdf_table(self): pd.read_hdf(self.f, 'df') -class packers_read_json(_Packers): - - def setup(self): - self._setup() - self.df.to_json(self.f, orient='split') - self.df.index = np.arange(self.N) - - def time_packers_read_json(self): - pd.read_json(self.f, orient='split') - - -class packers_read_json_date_index(_Packers): - - def setup(self): - self._setup() - self.remove(self.f) - self.df.to_json(self.f, orient='split') - - def time_packers_read_json_date_index(self): - pd.read_json(self.f, orient='split') - - class packers_read_pack(_Packers): def setup(self): @@ -219,46 +197,6 @@ def time_write_hdf_table(self): self.df2.to_hdf(self.f, 'df', table=True) -class JSON(_Packers): - - def setup(self): - self._setup() - self.df_date = self.df.copy() - self.df.index = np.arange(self.N) - self.cols = [(lambda i: ('{0}_timedelta'.format(i), [pd.Timedelta(('%d seconds' % randrange(1000000.0))) for _ in range(self.N)])), (lambda i: ('{0}_int'.format(i), randint(100000000.0, size=self.N))), (lambda i: ('{0}_timestamp'.format(i), [pd.Timestamp((1418842918083256000 + randrange(1000000000.0, 1e+18, 200))) for _ in range(self.N)]))] - self.df_mixed = DataFrame(OrderedDict([self.cols[(i % len(self.cols))](i) for i in range(self.C)]), index=self.index) - - self.cols = [(lambda i: ('{0}_float'.format(i), randn(self.N))), (lambda i: ('{0}_int'.format(i), randint(100000000.0, size=self.N)))] - self.df_mixed2 = DataFrame(OrderedDict([self.cols[(i % len(self.cols))](i) for i in range(self.C)]), index=self.index) - - self.cols = [(lambda i: ('{0}_float'.format(i), randn(self.N))), (lambda i: ('{0}_int'.format(i), randint(100000000.0, size=self.N))), (lambda i: ('{0}_str'.format(i), [('%08x' % randrange((16 ** 8))) for _ in range(self.N)]))] - self.df_mixed3 = DataFrame(OrderedDict([self.cols[(i % len(self.cols))](i) for i in range(self.C)]), index=self.index) - - def time_write_json(self): - self.df.to_json(self.f, orient='split') - - def time_write_json_T(self): - self.df.to_json(self.f, orient='columns') - - def time_write_json_date_index(self): - self.df_date.to_json(self.f, orient='split') - - def time_write_json_mixed_delta_int_tstamp(self): - self.df_mixed.to_json(self.f, orient='split') - - def time_write_json_mixed_float_int(self): - self.df_mixed2.to_json(self.f, orient='index') - - def time_write_json_mixed_float_int_T(self): - self.df_mixed2.to_json(self.f, orient='columns') - - def time_write_json_mixed_float_int_str(self): - self.df_mixed3.to_json(self.f, orient='split') - - def time_write_json_lines(self): - self.df.to_json(self.f, orient="records", lines=True) - - class MsgPack(_Packers): def setup(self): diff --git a/pandas/tests/io/sas/data/test1.sas7bdat b/pandas/tests/io/sas/data/test1.sas7bdat deleted file mode 100644 index 951173ce4d9f9395cf5f7c1e2c51ac4bd51ea51c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 131072 zcmeI24Rl>qmB(+|P$MgACc3gTOFNqBL?%kC`FKfQhJdG#LcjpgW-vOLnJB@D7$9Oh zE85^Zq)LT~5vqnU9g7r5C1BcyHb7`!(xB1R!6?NBsT!n6fC6=>QZ&Gvv(NvWcka3E z^SvycHO}nI>f8J7yZ1R~|IX*VAqY~}#4}g7?)5?~Wt6D0G7k)LYXj!467tOro^o{@c z(-lTiJsW?NOi5LBj{Hyk>Z@*fd91wR{EDWJU%sXXHYF;T*DSB9e0Ozi)$-cq70WMi z9+BD;uleViSjwvEyx?nVTFt-I6RpiZlI&AjsBOLz-!lOwzyz286JP>NfC(@GCcp%k z025#WOn?b60Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k z025#WOn?b60Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k z025#WOn?b60Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k z025#WOn?b60Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k z025#WOn?b60Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k z025#WOn?b60Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k z025#WOn?b60Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k z025#WOn?b60Vco%m;e)C0!)AjFaajO1pW*Nyz)r0&ouMR_egEgn&1;s6u%k-vpM=n zq15x?bZ9;Z%3<&PZ4mS>kq_j3>_z!+vDUkv4}!^Dt=CIEhS)qu-~U|7zS+^c96j#n zxsE=4I2AkK=$(!pbM(N`_dT16?RWGJM=y8u`Dar8Q;xpT(c2xp%+ZfMor;}w^gc&# zbM#V2KRTC+O*;ApM{jlX5=TGsR4R7N(bqeAi=!7i`r%)tVn-c)oufB9`cg-qdomR} z;^+xSZ*ue^M?Z8Z6+7(cy^h}K=!K3x`$Q^s$kBToy}{879R1)gQ?Y}NzS_}+qvt#N z%;TxpEsoyp==F}C=ji)qQ?Z*Jz01+#j-Ko2)4xc?4mf(JqsJUQaP)mYPsR2-dWWNz zJNo?3QvOqpzR}U!9lgxaj~z_KPC9y@qqjMFsiPl#EESt{^bL;Q>gXkoe&naA*fB?6 z@8~U#UhL_CymHi!y#BT#Z)oX<54~64{kim77W6FsrLW*tz!G87w-7(`<~MF#QU`U@-5(f%C#mudTJyTM^j+f6-V-x8ndR6PuTQ_k_5A8xZwc1`YDjt@zv9%8Up1}B*{_RgrFMI6e zU&6nX{1e+IYT=AuY8?=3s-cFqmveI{N z-}FOmH{MUT>aIvt~c|vNtYwrt6J0c<)3PQ)Bd7eLfZ`=a67gM^WquQ`+i-{ zp8Ldqsb#Mf{hP$R{^T}s1x+OINr1!HPOEi_K^cSUykRbbN57_&3^M%y`4RPJCEA`jFb#?nk~@ zH~Mc7w9HRo{WRsw?iP)5wY~cstn)#EF8{4N!tD%%zF)o<<9rs}n#rEHdtA`AcIq!s zFUH&1V?yova!r9h_T^WG(Vm}ZJ2)q<*^7Q0Lpc}fdN1B7XnUMC9L#=thu6-)-s}Cb z8|7gBHtrR_`>X=jk9sj4WdFTY&Xy%tKcekspOtnyPOWl+acH00iDMn@aJCj2y*9#* z<)~sq?IS9~hDM*;M>c{KYYyZk*U8JC`n^0snEm2(&24BztfdXuoQrU-7`RQmU2VT_ zGy6WyEyY{JgL0LW6AwHwo-eo)`-s`Eu?|o53))^T$GVrqIH}r(^&I@oe44vcsME>R zzha*X&e82Wv zA2{HDCiy3RGmiD&jB~8ppZm5h&$K&0y>Cl~?dF_Qyho_ne)`Gg9Qa=CxBV{cJD6Xl zyc_NnwCz7_<}b#7^LDR%jOXG{i+{gaU6|JioD;SWVg8!^5&8bZHt`?o_*`qo1J22b zHbKoiuooOEbbZh40cUV9>(%|qr57F zwh{KZtt@P@j)A=7I@xf2mz*&eKj!^nY?Dx@ffw#D`#9#ydBa%8_JP|en%lDPU>`@n znEoyB`q7+QesZghGkyX3QQqtK3o{;Ye!F59<_qjejEBUH;;(38nDq_kw}*G&T)i9Z z)a!ZhnGx?iiT&|)$?*G{IseG^$Vn=X9GFMJ5tO%9sMF--Z#3^s*k1}Zp?sW6O}_gl zggQlaJ%#taV7bm;zOU&0@o8Od0{hVqHVaxFcH=x|I7t~g{|Ky0GdBoYt$NLVg#DuL zTAatsc?tXG^i4ubl^Vci~cUhV#jP^;DMZ(fJ|(avRCF<M;^Jv5$nlCog)*7xuP8YRK((CU<;A#5n z>T#jpYp|Z1{+hZ4`->T$SU<&A@V<$0V$NeU$b7m0N_NHw(|Lj4( z!(WzvCcPkGy-lFr+Ium7&3wc-`QU`erx!3YuQA`}#<724ekEW(X*j&U8+-4~cu$`M zH|*mrw}jvKO}{+(K$sisi62kkJssz_1on}{TA|(&Ii+Zu&owr5k zbp!jLz^H06ELn`9uL1~nfHZ}d(qAW%E$bY?V{J> zF0y-GvyWq6>)aatu4wj`w@=`G9C-!kue_V^-mzY{ zU*NmS-Gic^UPH}3i1!rPUOoQJcK|s=)OFrnI9HqX2mNx{{o*`YAk6tKCdmJpUBZqt zJN)d#pmAsiY*dLx*s)B`;X75VwS?0#F;{w zgYs_(daXGh@9BX)=On~9&aviPQKFl56iwL=(EJ>XlDW(HYQO%zPFqDhwsJv z5jdNEnYj-8vN;FhT=z&)JWrqh%|3|pZLlMJUNYsc+=hNM^B3Pky01ZAQ{azwCKKYY zd~5yuv*vp#zCUceU+DQ|&h2PV;g-~VvE$T^L$cTbYt2@%L)0qPnTD_}(JD67I@(aH z&?EcYMo@|B$3C(VRJ?Um^TJ;z&AEE_5YA)wfg9GhnyvW0WcH&{JRxZZsMPVp1NiQ} zP3M)se0kH|I5%QFH|4J|`zF30CehCC--C74j33OSoNf5-Vb_^?_6Ox85v-Hz3aBr-(T$T3oCYb+5u}vzkOuKs#VCx7Gy*1 zbNk3ztU}Jw=e9Ciz4f688z0C^u9NzE2=--@ckOQP9EEk>JU22ZC!UEX#)bN;-t@9N zF~9JAzU6b6%sz{8`>cPC zO`Q+Je+NOmWrN}J1MJrq?hL;Vn)6lhC&K5+0N-I|2L*lJejMwYY47CM!++;8f9E}N zAJ&sEXl}-T)3$Jb2|eEve>s8nU|%-xS>=1NZ(=<(`>ZU_j=JbLq#Y>??XY5g(YNfC(@GCcp%k025#WOn?b6 z0Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k025#WOn?b6 z0Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k025#WOn?b6 z0Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k025#WOn?b6 z0Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k025#WOn?de zc@cQ^_pj0Aw>n=MibKYZCAspxoqkgfGLLVj$fgYAUoWdr|5BG&k|S^G={IG~y#8{k zxh%u_qxlz8Zt%DjHT`Il48Wf|5V&A*r;mo7NIfFkoVjF09YQ0DWw3!eYEH2>*$ zLGzy?4=y-6-$#)fGK`O|{|U;xcft90 zQ_Wo&)*sEkog&*79N$clO&P{V`(IGz^$X6woN6x1u>NTN#T2=8!SMwYnV(^NH2;7y zpI^M-`M;RvKmDfWztYqh!dRktchQNZ^El_06UVfYcT1X>Xnu`!zG*nl`R>H^#Hof@ zntG!7l#+KzRxw57U6>{&I$!fBliaNGIYzs~(c|n9CuT55-iNY^nIZ4}X=0-NGe!H) zWSu4O5P<|#T1cuVVam|K6#W$ZdUmmqt*53an{%qkI^~u zK9p6=40-QQ6BEs6iq`MR-S((dpo*cScBmKA0o#Ls`YlkoW#HG0}Xc=;Skbobykb`9n2~q^T#`KZE4GC99Zz z^4^#xCYsNB%4A(u`Sef?tJBmI&8L&RJF<#tCGVCrG0}V)DU*h*@`+Oou{8BW^C>0o zlB{Bi$h$C2Of;W7$|N_de2y(rXY8Z&YewZCI$zI`_o1v}X2^SgnwaQ%J+(-^IZV=T zrfnOg8b;F86YZZt^4^kFOh0*VOcN9BpY@cWSu4O5P<|#T1cuVVam|K6#W$ZdUmm%c1#0znPBFIr2V~Rm==| z?@tpG&1Wiy<`4a5+O|=uVI)mG(f%1E?=4xy^pp3-G%?Y9)>9_yvdX82YFM47o@hRu z#WR*{xYKWz&Cz?+wd6#4rQ$*f{X=0-5bslAsn^iu?0-8Vc zd;BqavNRW`9!YJEyk~|8$UdTUPzkOfgMq>WStfD3kiE@+qep%Dj5! zEy~ioP)3J9udXJ-D?f54g`+7(3bM)&R|9;25+0h3b{U*nM*s+f~ zdeYGc9RDfDKJDl;j((ftKkL}%9Q}x+-!1(O&tr~V4XHeNuf{;0qwjFaFL3Ndj$Z8O zV~&5RV=s5~xT8Pl_zTD0=;+Ok{*dF}>e$;Iz0=VTIR4#^y~oiLjy~)7uXpTyj^6L+ zPfLHpbF*U~bo60If8Hs7)UhWWeag{aa{Q+q`;4Q{I(o1;HGk(E`w>S!=IF0?`~z8+ z9RrSD;OK93{EHlWv7?td`dg*H;ZyF|O*T%}qaD1iLzRH*v@3pV8#*6pbR~h%>z4ldk46D5MRaJ)LtGxDA zdHkxp_EmZOs=W49dHkxp_QgDYF|U0wk6+AdU(Dkd^V%2l_{F^T#XNp7uYJ`XziO|2 z)gHfUuYJ`XziO|2)gHfUuYJ`XziO|2H6FhjuYENhzZ$Q7H6FhjuYENhzZ$Q7H6Fhj zuYI*1zgn+-wI07(uYI*1zgn+-wI07(uYI*1zgn+-agSfzYhT>s7x&s1_xQ!V_QgGZ zaj$)Gk6+wtU!BLV&TC(t$FI(7U!BLV&TC(t$FI(7U!BLV&TC&qdAZyp%o<|$oK1r+O!&$wvs?(p^zKm4+nbLD%Q&O79rp}$}nUb$cn-rl-{cqJFgS`3O zacnOi$L7cVDeiwOuMQOalDw{$XW+|l|Az8qx#be1`W9g> Date: Sun, 17 Dec 2017 18:00:27 -0800 Subject: [PATCH 2/5] Migrate benchmarks for parser_vb --- asv_bench/benchmarks/io_csv.py | 106 +++++++++++++++++++++++++- asv_bench/benchmarks/io_json.py | 50 ++++++------ asv_bench/benchmarks/parser_vb.py | 121 ------------------------------ 3 files changed, 132 insertions(+), 145 deletions(-) delete mode 100644 asv_bench/benchmarks/parser_vb.py diff --git a/asv_bench/benchmarks/io_csv.py b/asv_bench/benchmarks/io_csv.py index ef138206eb87b..5d35e68aad6c6 100644 --- a/asv_bench/benchmarks/io_csv.py +++ b/asv_bench/benchmarks/io_csv.py @@ -1,9 +1,12 @@ +import random import timeit +import string import numpy as np import pandas.util.testing as tm -from pandas import DataFrame, date_range, read_csv -from pandas.compat import PY2, StringIO +from pandas import DataFrame, Categorical, date_range, read_csv +from pandas.compat import PY2 +from pandas.compat import cStringIO as StringIO from .pandas_vb_common import setup, BaseIO # noqa @@ -145,3 +148,102 @@ def time_read_csv_10_rows(self, compression, engine): # Read a small number of rows from a huge (100,000 x 50) table. read_csv(self.big_fname, nrows=10, compression=compression, engine=engine) + + +class ReadCSVThousands(object): + + goal_time = 0.2 + fname = '__test__.csv' + params = ([',', '|'], [None, ',']) + param_names = ['sep', 'thousands'] + + def setup(self, sep, thousands): + N = 10000 + K = 8 + data = np.random.randn(N, K) * np.random.randint(100, 10000, (N, K)) + df = DataFrame(data) + if thousands is not None: + fmt = ':{}'.format(thousands) + fmt = '{' + fmt + '}' + df = df.applymap(lambda x: fmt.format(x)) + df.to_csv(self.fname, sep=sep) + + def time_thousands(self, sep, thousands): + read_csv(self.fname, sep=sep, thousands=thousands) + + +class ReadCSVComment(object): + + goal_time = 0.2 + + def setup(self): + data = ['A,B,C'] + (['1,2,3 # comment'] * 100000) + self.s_data = StringIO('\n'.join(data)) + + def time_comment(self): + read_csv(self.s_data, comment='#', header=None, names=list('abc')) + + +class ReadCSVFloatPrecision(object): + + goal_time = 0.2 + params = ([',', ';'], ['.', '_'], [None, 'high', 'round_trip']) + param_names = ['sep', 'decimal', 'float_precision'] + + def setup(self, sep, decimal, float_precision): + floats = [''.join(random.choice(string.digits) for _ in range(28)) + for _ in range(15)] + rows = sep.join(['0{}'.format(decimal) + '{}'] * 3) + '\n' + data = rows * 5 + data = data.format(*floats) * 200 # 1000 x 3 strings csv + self.s_data = StringIO(data) + + def time_read_csv(self, sep, decimal, float_precision): + read_csv(self.s_data, sep=sep, header=None, names=list('abc'), + float_precision=float_precision) + + def time_read_csv_python_engine(self, sep, decimal, float_precision): + read_csv(self.s_data, sep=sep, header=None, engine='python', + float_precision=None, names=list('abc')) + + +class ReadCSVCategorical(BaseIO): + + goal_time = 0.2 + fname = '__test__.csv' + + def setup(self): + N = 100000 + group1 = ['aaaaaaaa', 'bbbbbbb', 'cccccccc', 'dddddddd', 'eeeeeeee'] + df = DataFrame(np.random.choice(group1, (N, 3)), columns=list('abc')) + df.to_csv(self.fname, index=False) + + def time_convert_post(self): + read_csv(self.fname).apply(Categorical) + + def time_convert_direct(self): + read_csv(self.fname, dtype='category') + + +class ReadCSVParseDates(object): + + goal_time = 0.2 + + def setup(self): + data = """{},19:00:00,18:56:00,0.8100,2.8100,7.2000,0.0000,280.0000\n + {},20:00:00,19:56:00,0.0100,2.2100,7.2000,0.0000,260.0000\n + {},21:00:00,20:56:00,-0.5900,2.2100,5.7000,0.0000,280.0000\n + {},21:00:00,21:18:00,-0.9900,2.0100,3.6000,0.0000,270.0000\n + {},22:00:00,21:56:00,-0.5900,1.7100,5.1000,0.0000,290.0000\n + """ + two_cols = ['KORD,19990127'] * 5 + data = data.format(*two_cols) + self.s_data = StringIO(data) + + def time_multiple_date(self): + read_csv(self.s_data, sep=',', header=None, + names=list(string.digits[:9]), parse_dates=[[1, 2], [1, 3]]) + + def time_baseline(self): + read_csv(self.s_data, sep=',', header=None, parse_dates=[1], + names=list(string.digits[:9])) diff --git a/asv_bench/benchmarks/io_json.py b/asv_bench/benchmarks/io_json.py index 6b93fc71b9a24..c3882667c6ed8 100644 --- a/asv_bench/benchmarks/io_json.py +++ b/asv_bench/benchmarks/io_json.py @@ -9,45 +9,51 @@ class ReadJSON(BaseIO): goal_time = 0.2 fname = "__test__.json" - params = (['records', 'split'], [None, 25000], ['int', 'datetime']) - param_names = ['orient', 'chunksize', 'index'] + params = (['split', 'index', 'records'], ['int', 'datetime']) + param_names = ['orient', 'index'] - def setup(self, orient, chunksize, index): + def setup(self, orient, index): N = 100000 indexes = {'int': np.arange(N), 'datetime': date_range('20000101', periods=N, freq='H')} df = DataFrame(np.random.randn(N, 5), columns=['float_{}'.format(i) for i in range(5)], index=indexes[index]) - df.to_json(self.fname, orient=lines_orient[1], lines=lines_orient[0]) + df.to_json(self.fname, orient=orient) - def time_read_json(self, orient, chunksize, index): - read_json(self.fname, orient=orient, chunksize=chunksize) + def time_read_json(self, orient, index): + read_json(self.fname, orient=orient) - def time_read_json_concat(self, orient, chunksize, index): - concat(read_json(self.fname, orient=orient, chunksize=chunksize)) - def peakmem_read_json(self, orient, chunksize, index): - read_json(self.fname, orient=orient, chunksize=chunksize) +class ReadJSONLines(BaseIO): - def peakmem_read_json_concat(self, orient, chunksize, index): - concat(read_json(self.fname, orient=orient, chunksize=chunksize)) + goal_time = 0.2 + fname = "__test_lines__.json" + params = ['int', 'datetime'] + param_names = ['index'] + + def setup(self, index): + N = 100000 + indexes = {'int': np.arange(N), + 'datetime': date_range('20000101', periods=N, freq='H')} + df = DataFrame(np.random.randn(N, 5), + columns=['float_{}'.format(i) for i in range(5)], + index=indexes[index]) + df.to_json(self.fname, orient='records', lines=True) - def time_read_json_lines(self, orient, chunksize, index): - read_json(self.fname, orient='records', lines=True, - chunksize=chunksize) + def time_read_json_lines(self, index): + read_json(self.fname, orient='records', lines=True) - def time_read_json_lines_concat(self, orient, chunksize, index): + def time_read_json_lines_concat(self, index): concat(read_json(self.fname, orient='records', lines=True, - chunksize=chunksize)) + chunksize=25000)) - def peakmem_read_json_lines(self, orient, chunksize, index): - read_json(self.fname, orient='records', lines=True, - chunksize=chunksize) + def peakmem_read_json_lines(self, index): + read_json(self.fname, orient='records', lines=True) - def peakmem_read_json_lines_concat(self, orient, chunksize, index): + def peakmem_read_json_lines_concat(self, index): concat(read_json(self.fname, orient='records', lines=True, - chunksize=chunksize)) + chunksize=25000)) class ToJSON(BaseIO): diff --git a/asv_bench/benchmarks/parser_vb.py b/asv_bench/benchmarks/parser_vb.py deleted file mode 100644 index 32bf7e50d1a89..0000000000000 --- a/asv_bench/benchmarks/parser_vb.py +++ /dev/null @@ -1,121 +0,0 @@ -from .pandas_vb_common import * -import os -from pandas import read_csv -try: - from cStringIO import StringIO -except ImportError: - from io import StringIO - - -class read_csv1(object): - goal_time = 0.2 - - def setup(self): - self.N = 10000 - self.K = 8 - self.df = DataFrame((np.random.randn(self.N, self.K) * np.random.randint(100, 10000, (self.N, self.K)))) - self.df.to_csv('test.csv', sep='|') - - self.format = (lambda x: '{:,}'.format(x)) - self.df2 = self.df.applymap(self.format) - self.df2.to_csv('test2.csv', sep='|') - - def time_sep(self): - read_csv('test.csv', sep='|') - - def time_thousands(self): - read_csv('test.csv', sep='|', thousands=',') - - def teardown(self): - os.remove('test.csv') - os.remove('test2.csv') - - -class read_csv2(object): - goal_time = 0.2 - - def setup(self): - self.data = ['A,B,C'] - self.data = (self.data + (['1,2,3 # comment'] * 100000)) - self.data = '\n'.join(self.data) - - def time_comment(self): - read_csv(StringIO(self.data), comment='#') - - -class read_csv3(object): - goal_time = 0.2 - - def setup(self): - self.data = """0.1213700904466425978256438611,0.0525708283766902484401839501,0.4174092731488769913994474336\n -0.4096341697147408700274695547,0.1587830198973579909349496119,0.1292545832485494372576795285\n -0.8323255650024565799327547210,0.9694902427379478160318626578,0.6295047811546814475747169126\n -0.4679375305798131323697930383,0.2963942381834381301075609371,0.5268936082160610157032465394\n -0.6685382761849776311890991564,0.6721207066140679753374342908,0.6519975277021627935170045020\n""" - self.data2 = self.data.replace(',', ';').replace('.', ',') - self.data = (self.data * 200) - self.data2 = (self.data2 * 200) - - def time_default_converter(self): - read_csv(StringIO(self.data), sep=',', header=None, - float_precision=None) - - def time_default_converter_with_decimal(self): - read_csv(StringIO(self.data2), sep=';', header=None, - float_precision=None, decimal=',') - - def time_default_converter_python_engine(self): - read_csv(StringIO(self.data), sep=',', header=None, - float_precision=None, engine='python') - - def time_default_converter_with_decimal_python_engine(self): - read_csv(StringIO(self.data2), sep=';', header=None, - float_precision=None, decimal=',', engine='python') - - def time_precise_converter(self): - read_csv(StringIO(self.data), sep=',', header=None, - float_precision='high') - - def time_roundtrip_converter(self): - read_csv(StringIO(self.data), sep=',', header=None, - float_precision='round_trip') - - -class read_csv_categorical(object): - goal_time = 0.2 - - def setup(self): - N = 100000 - group1 = ['aaaaaaaa', 'bbbbbbb', 'cccccccc', 'dddddddd', 'eeeeeeee'] - df = DataFrame({'a': np.random.choice(group1, N).astype('object'), - 'b': np.random.choice(group1, N).astype('object'), - 'c': np.random.choice(group1, N).astype('object')}) - df.to_csv('strings.csv', index=False) - - def time_convert_post(self): - read_csv('strings.csv').apply(pd.Categorical) - - def time_convert_direct(self): - read_csv('strings.csv', dtype='category') - - def teardown(self): - os.remove('strings.csv') - - -class read_csv_dateparsing(object): - goal_time = 0.2 - - def setup(self): - self.N = 10000 - self.K = 8 - self.data = 'KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000\n KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000\n KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000\n KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000\n KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000\n ' - self.data = (self.data * 200) - self.data2 = 'KORD,19990127 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000\n KORD,19990127 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000\n KORD,19990127 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000\n KORD,19990127 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000\n KORD,19990127 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000\n ' - self.data2 = (self.data2 * 200) - - def time_multiple_date(self): - read_csv(StringIO(self.data), sep=',', header=None, - parse_dates=[[1, 2], [1, 3]]) - - def time_baseline(self): - read_csv(StringIO(self.data2), sep=',', header=None, parse_dates=[1]) From bf4e257b93ca24459db74d6d6e03a3971c6918a4 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 17 Dec 2017 18:24:42 -0800 Subject: [PATCH 3/5] Undo removed file --- pandas/tests/io/sas/data/test1.sas7bdat | Bin 0 -> 131072 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 pandas/tests/io/sas/data/test1.sas7bdat diff --git a/pandas/tests/io/sas/data/test1.sas7bdat b/pandas/tests/io/sas/data/test1.sas7bdat new file mode 100644 index 0000000000000000000000000000000000000000..951173ce4d9f9395cf5f7c1e2c51ac4bd51ea51c GIT binary patch literal 131072 zcmeI24Rl>qmB(+|P$MgACc3gTOFNqBL?%kC`FKfQhJdG#LcjpgW-vOLnJB@D7$9Oh zE85^Zq)LT~5vqnU9g7r5C1BcyHb7`!(xB1R!6?NBsT!n6fC6=>QZ&Gvv(NvWcka3E z^SvycHO}nI>f8J7yZ1R~|IX*VAqY~}#4}g7?)5?~Wt6D0G7k)LYXj!467tOro^o{@c z(-lTiJsW?NOi5LBj{Hyk>Z@*fd91wR{EDWJU%sXXHYF;T*DSB9e0Ozi)$-cq70WMi z9+BD;uleViSjwvEyx?nVTFt-I6RpiZlI&AjsBOLz-!lOwzyz286JP>NfC(@GCcp%k z025#WOn?b60Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k z025#WOn?b60Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k z025#WOn?b60Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k z025#WOn?b60Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k z025#WOn?b60Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k z025#WOn?b60Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k z025#WOn?b60Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k z025#WOn?b60Vco%m;e)C0!)AjFaajO1pW*Nyz)r0&ouMR_egEgn&1;s6u%k-vpM=n zq15x?bZ9;Z%3<&PZ4mS>kq_j3>_z!+vDUkv4}!^Dt=CIEhS)qu-~U|7zS+^c96j#n zxsE=4I2AkK=$(!pbM(N`_dT16?RWGJM=y8u`Dar8Q;xpT(c2xp%+ZfMor;}w^gc&# zbM#V2KRTC+O*;ApM{jlX5=TGsR4R7N(bqeAi=!7i`r%)tVn-c)oufB9`cg-qdomR} z;^+xSZ*ue^M?Z8Z6+7(cy^h}K=!K3x`$Q^s$kBToy}{879R1)gQ?Y}NzS_}+qvt#N z%;TxpEsoyp==F}C=ji)qQ?Z*Jz01+#j-Ko2)4xc?4mf(JqsJUQaP)mYPsR2-dWWNz zJNo?3QvOqpzR}U!9lgxaj~z_KPC9y@qqjMFsiPl#EESt{^bL;Q>gXkoe&naA*fB?6 z@8~U#UhL_CymHi!y#BT#Z)oX<54~64{kim77W6FsrLW*tz!G87w-7(`<~MF#QU`U@-5(f%C#mudTJyTM^j+f6-V-x8ndR6PuTQ_k_5A8xZwc1`YDjt@zv9%8Up1}B*{_RgrFMI6e zU&6nX{1e+IYT=AuY8?=3s-cFqmveI{N z-}FOmH{MUT>aIvt~c|vNtYwrt6J0c<)3PQ)Bd7eLfZ`=a67gM^WquQ`+i-{ zp8Ldqsb#Mf{hP$R{^T}s1x+OINr1!HPOEi_K^cSUykRbbN57_&3^M%y`4RPJCEA`jFb#?nk~@ zH~Mc7w9HRo{WRsw?iP)5wY~cstn)#EF8{4N!tD%%zF)o<<9rs}n#rEHdtA`AcIq!s zFUH&1V?yova!r9h_T^WG(Vm}ZJ2)q<*^7Q0Lpc}fdN1B7XnUMC9L#=thu6-)-s}Cb z8|7gBHtrR_`>X=jk9sj4WdFTY&Xy%tKcekspOtnyPOWl+acH00iDMn@aJCj2y*9#* z<)~sq?IS9~hDM*;M>c{KYYyZk*U8JC`n^0snEm2(&24BztfdXuoQrU-7`RQmU2VT_ zGy6WyEyY{JgL0LW6AwHwo-eo)`-s`Eu?|o53))^T$GVrqIH}r(^&I@oe44vcsME>R zzha*X&e82Wv zA2{HDCiy3RGmiD&jB~8ppZm5h&$K&0y>Cl~?dF_Qyho_ne)`Gg9Qa=CxBV{cJD6Xl zyc_NnwCz7_<}b#7^LDR%jOXG{i+{gaU6|JioD;SWVg8!^5&8bZHt`?o_*`qo1J22b zHbKoiuooOEbbZh40cUV9>(%|qr57F zwh{KZtt@P@j)A=7I@xf2mz*&eKj!^nY?Dx@ffw#D`#9#ydBa%8_JP|en%lDPU>`@n znEoyB`q7+QesZghGkyX3QQqtK3o{;Ye!F59<_qjejEBUH;;(38nDq_kw}*G&T)i9Z z)a!ZhnGx?iiT&|)$?*G{IseG^$Vn=X9GFMJ5tO%9sMF--Z#3^s*k1}Zp?sW6O}_gl zggQlaJ%#taV7bm;zOU&0@o8Od0{hVqHVaxFcH=x|I7t~g{|Ky0GdBoYt$NLVg#DuL zTAatsc?tXG^i4ubl^Vci~cUhV#jP^;DMZ(fJ|(avRCF<M;^Jv5$nlCog)*7xuP8YRK((CU<;A#5n z>T#jpYp|Z1{+hZ4`->T$SU<&A@V<$0V$NeU$b7m0N_NHw(|Lj4( z!(WzvCcPkGy-lFr+Ium7&3wc-`QU`erx!3YuQA`}#<724ekEW(X*j&U8+-4~cu$`M zH|*mrw}jvKO}{+(K$sisi62kkJssz_1on}{TA|(&Ii+Zu&owr5k zbp!jLz^H06ELn`9uL1~nfHZ}d(qAW%E$bY?V{J> zF0y-GvyWq6>)aatu4wj`w@=`G9C-!kue_V^-mzY{ zU*NmS-Gic^UPH}3i1!rPUOoQJcK|s=)OFrnI9HqX2mNx{{o*`YAk6tKCdmJpUBZqt zJN)d#pmAsiY*dLx*s)B`;X75VwS?0#F;{w zgYs_(daXGh@9BX)=On~9&aviPQKFl56iwL=(EJ>XlDW(HYQO%zPFqDhwsJv z5jdNEnYj-8vN;FhT=z&)JWrqh%|3|pZLlMJUNYsc+=hNM^B3Pky01ZAQ{azwCKKYY zd~5yuv*vp#zCUceU+DQ|&h2PV;g-~VvE$T^L$cTbYt2@%L)0qPnTD_}(JD67I@(aH z&?EcYMo@|B$3C(VRJ?Um^TJ;z&AEE_5YA)wfg9GhnyvW0WcH&{JRxZZsMPVp1NiQ} zP3M)se0kH|I5%QFH|4J|`zF30CehCC--C74j33OSoNf5-Vb_^?_6Ox85v-Hz3aBr-(T$T3oCYb+5u}vzkOuKs#VCx7Gy*1 zbNk3ztU}Jw=e9Ciz4f688z0C^u9NzE2=--@ckOQP9EEk>JU22ZC!UEX#)bN;-t@9N zF~9JAzU6b6%sz{8`>cPC zO`Q+Je+NOmWrN}J1MJrq?hL;Vn)6lhC&K5+0N-I|2L*lJejMwYY47CM!++;8f9E}N zAJ&sEXl}-T)3$Jb2|eEve>s8nU|%-xS>=1NZ(=<(`>ZU_j=JbLq#Y>??XY5g(YNfC(@GCcp%k025#WOn?b6 z0Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k025#WOn?b6 z0Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k025#WOn?b6 z0Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k025#WOn?b6 z0Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k025#WOn?de zc@cQ^_pj0Aw>n=MibKYZCAspxoqkgfGLLVj$fgYAUoWdr|5BG&k|S^G={IG~y#8{k zxh%u_qxlz8Zt%DjHT`Il48Wf|5V&A*r;mo7NIfFkoVjF09YQ0DWw3!eYEH2>*$ zLGzy?4=y-6-$#)fGK`O|{|U;xcft90 zQ_Wo&)*sEkog&*79N$clO&P{V`(IGz^$X6woN6x1u>NTN#T2=8!SMwYnV(^NH2;7y zpI^M-`M;RvKmDfWztYqh!dRktchQNZ^El_06UVfYcT1X>Xnu`!zG*nl`R>H^#Hof@ zntG!7l#+KzRxw57U6>{&I$!fBliaNGIYzs~(c|n9CuT55-iNY^nIZ4}X=0-NGe!H) zWSu4O5P<|#T1cuVVam|K6#W$ZdUmmqt*53an{%qkI^~u zK9p6=40-QQ6BEs6iq`MR-S((dpo*cScBmKA0o#Ls`YlkoW#HG0}Xc=;Skbobykb`9n2~q^T#`KZE4GC99Zz z^4^#xCYsNB%4A(u`Sef?tJBmI&8L&RJF<#tCGVCrG0}V)DU*h*@`+Oou{8BW^C>0o zlB{Bi$h$C2Of;W7$|N_de2y(rXY8Z&YewZCI$zI`_o1v}X2^SgnwaQ%J+(-^IZV=T zrfnOg8b;F86YZZt^4^kFOh0*VOcN9BpY@cWSu4O5P<|#T1cuVVam|K6#W$ZdUmm%c1#0znPBFIr2V~Rm==| z?@tpG&1Wiy<`4a5+O|=uVI)mG(f%1E?=4xy^pp3-G%?Y9)>9_yvdX82YFM47o@hRu z#WR*{xYKWz&Cz?+wd6#4rQ$*f{X=0-5bslAsn^iu?0-8Vc zd;BqavNRW`9!YJEyk~|8$UdTUPzkOfgMq>WStfD3kiE@+qep%Dj5! zEy~ioP)3J9udXJ-D?f54g`+7(3bM)&R|9;25+0h3b{U*nM*s+f~ zdeYGc9RDfDKJDl;j((ftKkL}%9Q}x+-!1(O&tr~V4XHeNuf{;0qwjFaFL3Ndj$Z8O zV~&5RV=s5~xT8Pl_zTD0=;+Ok{*dF}>e$;Iz0=VTIR4#^y~oiLjy~)7uXpTyj^6L+ zPfLHpbF*U~bo60If8Hs7)UhWWeag{aa{Q+q`;4Q{I(o1;HGk(E`w>S!=IF0?`~z8+ z9RrSD;OK93{EHlWv7?td`dg*H;ZyF|O*T%}qaD1iLzRH*v@3pV8#*6pbR~h%>z4ldk46D5MRaJ)LtGxDA zdHkxp_EmZOs=W49dHkxp_QgDYF|U0wk6+AdU(Dkd^V%2l_{F^T#XNp7uYJ`XziO|2 z)gHfUuYJ`XziO|2)gHfUuYJ`XziO|2H6FhjuYENhzZ$Q7H6FhjuYENhzZ$Q7H6Fhj zuYI*1zgn+-wI07(uYI*1zgn+-wI07(uYI*1zgn+-agSfzYhT>s7x&s1_xQ!V_QgGZ zaj$)Gk6+wtU!BLV&TC(t$FI(7U!BLV&TC(t$FI(7U!BLV&TC&qdAZyp%o<|$oK1r+O!&$wvs?(p^zKm4+nbLD%Q&O79rp}$}nUb$cn-rl-{cqJFgS`3O zacnOi$L7cVDeiwOuMQOalDw{$XW+|l|Az8qx#be1`W9g> Date: Sun, 17 Dec 2017 18:49:33 -0800 Subject: [PATCH 4/5] Add additional BasIO --- asv_bench/benchmarks/io_csv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/io_csv.py b/asv_bench/benchmarks/io_csv.py index 5d35e68aad6c6..efd855f0f909b 100644 --- a/asv_bench/benchmarks/io_csv.py +++ b/asv_bench/benchmarks/io_csv.py @@ -150,7 +150,7 @@ def time_read_csv_10_rows(self, compression, engine): engine=engine) -class ReadCSVThousands(object): +class ReadCSVThousands(BaseIO): goal_time = 0.2 fname = '__test__.csv' From e8eda6af19d0f78bf6abad2f88948aa7c0de533d Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 18 Dec 2017 19:51:54 -0800 Subject: [PATCH 5/5] Create io folder for benchmarks --- asv_bench/benchmarks/io/__init__.py | 0 asv_bench/benchmarks/{io_csv.py => io/csv.py} | 2 +- asv_bench/benchmarks/{io_json.py => io/json.py} | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 asv_bench/benchmarks/io/__init__.py rename asv_bench/benchmarks/{io_csv.py => io/csv.py} (99%) rename asv_bench/benchmarks/{io_json.py => io/json.py} (98%) diff --git a/asv_bench/benchmarks/io/__init__.py b/asv_bench/benchmarks/io/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/asv_bench/benchmarks/io_csv.py b/asv_bench/benchmarks/io/csv.py similarity index 99% rename from asv_bench/benchmarks/io_csv.py rename to asv_bench/benchmarks/io/csv.py index efd855f0f909b..bc4599436111f 100644 --- a/asv_bench/benchmarks/io_csv.py +++ b/asv_bench/benchmarks/io/csv.py @@ -8,7 +8,7 @@ from pandas.compat import PY2 from pandas.compat import cStringIO as StringIO -from .pandas_vb_common import setup, BaseIO # noqa +from ..pandas_vb_common import setup, BaseIO # noqa class ToCSV(BaseIO): diff --git a/asv_bench/benchmarks/io_json.py b/asv_bench/benchmarks/io/json.py similarity index 98% rename from asv_bench/benchmarks/io_json.py rename to asv_bench/benchmarks/io/json.py index c3882667c6ed8..acfdd327c3b51 100644 --- a/asv_bench/benchmarks/io_json.py +++ b/asv_bench/benchmarks/io/json.py @@ -2,7 +2,7 @@ import pandas.util.testing as tm from pandas import DataFrame, date_range, timedelta_range, concat, read_json -from .pandas_vb_common import setup, BaseIO # noqa +from ..pandas_vb_common import setup, BaseIO # noqa class ReadJSON(BaseIO):