From d1671ed4fdbac61475d8542510e0241b6f2a1030 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 22 Aug 2014 23:24:08 +0200 Subject: [PATCH] BENCH: add benchmarks for SQL --- vb_suite/io_sql.py | 126 ++++++++++++++++++++++++++++++++++++++++++++ vb_suite/packers.py | 21 ++++++++ vb_suite/suite.py | 1 + 3 files changed, 148 insertions(+) create mode 100644 vb_suite/io_sql.py diff --git a/vb_suite/io_sql.py b/vb_suite/io_sql.py new file mode 100644 index 0000000000000..696f66ec3137c --- /dev/null +++ b/vb_suite/io_sql.py @@ -0,0 +1,126 @@ +from vbench.api import Benchmark +from datetime import datetime + +common_setup = """from pandas_vb_common import * +import sqlite3 +import sqlalchemy +from sqlalchemy import create_engine + +engine = create_engine('sqlite:///:memory:') +con = sqlite3.connect(':memory:') +""" + +sdate = datetime(2014, 6, 1) + + +#------------------------------------------------------------------------------- +# to_sql + +setup = common_setup + """ +index = [rands(10) for _ in xrange(10000)] +df = DataFrame({'float1' : randn(10000), + 'float2' : randn(10000), + 'string1' : ['foo'] * 10000, + 'bool1' : [True] * 10000, + 'int1' : np.random.randint(0, 100000, size=10000)}, + index=index) +""" + +sql_write_sqlalchemy = Benchmark("df.to_sql('test1', engine, if_exists='replace')", + setup, start_date=sdate) + +sql_write_fallback = Benchmark("df.to_sql('test1', con, if_exists='replace')", + setup, start_date=sdate) + + +#------------------------------------------------------------------------------- +# read_sql + +setup = common_setup + """ +index = [rands(10) for _ in xrange(10000)] +df = DataFrame({'float1' : randn(10000), + 'float2' : randn(10000), + 'string1' : ['foo'] * 10000, + 'bool1' : [True] * 10000, + 'int1' : np.random.randint(0, 100000, size=10000)}, + index=index) +df.to_sql('test2', engine, if_exists='replace') +df.to_sql('test2', con, if_exists='replace') +""" + +sql_read_query_sqlalchemy = Benchmark("read_sql_query('SELECT * FROM test2', engine)", + setup, start_date=sdate) + +sql_read_query_fallback = Benchmark("read_sql_query('SELECT * FROM test2', con)", + setup, start_date=sdate) + +sql_read_table_sqlalchemy = Benchmark("read_sql_table('test2', engine)", + setup, start_date=sdate) + + +#------------------------------------------------------------------------------- +# type specific write + +setup = common_setup + """ +df = DataFrame({'float' : randn(10000), + 'string' : ['foo'] * 10000, + 'bool' : [True] * 10000, + 'datetime' : date_range('2000-01-01', periods=10000, freq='s')}) +df.loc[1000:3000, 'float'] = np.nan +""" + +sql_float_write_sqlalchemy = \ + Benchmark("df[['float']].to_sql('test_float', engine, if_exists='replace')", + setup, start_date=sdate) + +sql_float_write_fallback = \ + Benchmark("df[['float']].to_sql('test_float', con, if_exists='replace')", + setup, start_date=sdate) + +sql_string_write_sqlalchemy = \ + Benchmark("df[['string']].to_sql('test_string', engine, if_exists='replace')", + setup, start_date=sdate) + +sql_string_write_fallback = \ + Benchmark("df[['string']].to_sql('test_string', con, if_exists='replace')", + setup, start_date=sdate) + +sql_datetime_write_sqlalchemy = \ + Benchmark("df[['datetime']].to_sql('test_datetime', engine, if_exists='replace')", + setup, start_date=sdate) + +#sql_datetime_write_fallback = \ +# Benchmark("df[['datetime']].to_sql('test_datetime', con, if_exists='replace')", +# setup3, start_date=sdate) + +#------------------------------------------------------------------------------- +# type specific read + +setup = common_setup + """ +df = DataFrame({'float' : randn(10000), + 'datetime' : date_range('2000-01-01', periods=10000, freq='s')}) +df['datetime_string'] = df['datetime'].map(str) + +df.to_sql('test_type', engine, if_exists='replace') +df[['float', 'datetime_string']].to_sql('test_type', con, if_exists='replace') +""" + +sql_float_read_query_sqlalchemy = \ + Benchmark("read_sql_query('SELECT float FROM test_type', engine)", + setup, start_date=sdate) + +sql_float_read_table_sqlalchemy = \ + Benchmark("read_sql_table('test_type', engine, columns=['float'])", + setup, start_date=sdate) + +sql_float_read_query_fallback = \ + Benchmark("read_sql_query('SELECT float FROM test_type', con)", + setup, start_date=sdate) + +sql_datetime_read_as_native_sqlalchemy = \ + Benchmark("read_sql_table('test_type', engine, columns=['datetime'])", + setup, start_date=sdate) + +sql_datetime_read_and_parse_sqlalchemy = \ + Benchmark("read_sql_table('test_type', engine, columns=['datetime_string'], parse_dates=['datetime_string'])", + setup, start_date=sdate) diff --git a/vb_suite/packers.py b/vb_suite/packers.py index 403adbf289e1f..8d3d833ed9704 100644 --- a/vb_suite/packers.py +++ b/vb_suite/packers.py @@ -101,6 +101,27 @@ def remove(f): packers_write_hdf_table = Benchmark("df2.to_hdf(f,'df',table=True)", setup, cleanup="remove(f)", start_date=start_date) +#---------------------------------------------------------------------- +# sql + +setup = common_setup + """ +import sqlite3 +from sqlalchemy import create_engine +engine = create_engine('sqlite:///:memory:') + +df2.to_sql('table', engine, if_exists='replace') +""" + +packers_read_sql= Benchmark("pd.read_sql_table('table', engine)", setup, start_date=start_date) + +setup = common_setup + """ +import sqlite3 +from sqlalchemy import create_engine +engine = create_engine('sqlite:///:memory:') +""" + +packers_write_sql = Benchmark("df2.to_sql('table', engine, if_exists='replace')", setup, start_date=start_date) + #---------------------------------------------------------------------- # json diff --git a/vb_suite/suite.py b/vb_suite/suite.py index be9aa03801641..a16d183ae62e2 100644 --- a/vb_suite/suite.py +++ b/vb_suite/suite.py @@ -12,6 +12,7 @@ 'index_object', 'indexing', 'io_bench', + 'io_sql', 'inference', 'hdfstore_bench', 'join_merge',