|
| 1 | +import numpy as np |
| 2 | +from collections import defaultdict |
| 3 | +import gc |
| 4 | +import time |
| 5 | +from pandas import DataFrame |
| 6 | +from pandas.util.testing import rands |
| 7 | +import random |
| 8 | + |
| 9 | +N = 10000 |
| 10 | + |
| 11 | +indices = np.array([rands(10) for _ in xrange(N)], dtype='O') |
| 12 | +indices2 = np.array([rands(10) for _ in xrange(N)], dtype='O') |
| 13 | +key = np.tile(indices[:8000], 10) |
| 14 | +key2 = np.tile(indices2[:8000], 10) |
| 15 | + |
| 16 | +left = DataFrame({'key' : key, 'key2':key2, |
| 17 | + 'value' : np.random.randn(80000)}) |
| 18 | +right = DataFrame({'key': indices[2000:], 'key2':indices2[2000:], |
| 19 | + 'value2' : np.random.randn(8000)}) |
| 20 | + |
| 21 | +right2 = right.append(right, ignore_index=True) |
| 22 | +right = right2 |
| 23 | + |
| 24 | +# random.shuffle(key2) |
| 25 | +# indices2 = indices.copy() |
| 26 | +# random.shuffle(indices2) |
| 27 | + |
| 28 | +# Prepare Database |
| 29 | +import sqlite3 |
| 30 | +create_sql_indexes = False |
| 31 | + |
| 32 | +conn = sqlite3.connect(':memory:') |
| 33 | +conn.execute('create table left( key varchar(10), key2 varchar(10), value int);') |
| 34 | +conn.execute('create table right( key varchar(10), key2 varchar(10), value2 int);') |
| 35 | +conn.executemany('insert into left values (?, ?, ?)', |
| 36 | + zip(key, key2, left['value'])) |
| 37 | +conn.executemany('insert into right values (?, ?, ?)', |
| 38 | + zip(right['key'], right['key2'], right['value2'])) |
| 39 | + |
| 40 | +# Create Indices |
| 41 | +if create_sql_indexes: |
| 42 | + conn.execute('create index left_ix on left(key, key2)') |
| 43 | + conn.execute('create index right_ix on right(key, key2)') |
| 44 | + |
| 45 | + |
| 46 | +join_methods = ['inner', 'left outer', 'left'] # others not supported |
| 47 | +sql_results = DataFrame(index=join_methods, columns=[False]) |
| 48 | +niter = 5 |
| 49 | +for sort in [False]: |
| 50 | + for join_method in join_methods: |
| 51 | + sql = """select * |
| 52 | + from left |
| 53 | + %s join right |
| 54 | + on left.key=right.key |
| 55 | + and left.key2 = right.key2""" % join_method |
| 56 | + if sort: |
| 57 | + sql = '%s order by key, key2' % sql |
| 58 | + f = lambda: list(conn.execute(sql)) # list fetches results |
| 59 | + g = lambda: conn.execute(sql) # list fetches results |
| 60 | + gc.disable() |
| 61 | + start = time.time() |
| 62 | + for _ in xrange(niter): |
| 63 | + f() |
| 64 | + elapsed = (time.time() - start) / niter |
| 65 | + gc.enable() |
| 66 | + sql_results[sort][join_method] = elapsed |
| 67 | +sql_results.columns = ['sqlite3'] # ['dont_sort', 'sort'] |
| 68 | +sql_results.index = ['inner', 'outer', 'left'] |
0 commit comments