Skip to content

Commit a835118

Browse files
danielballanchanghiskhan
authored andcommitted
Merging in MySQL support #2482
1 parent 3718bc3 commit a835118

File tree

2 files changed

+409
-93
lines changed

2 files changed

+409
-93
lines changed

pandas/io/sql.py

+128-77
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Collection of query wrappers / abstractions to both facilitate data
33
retrieval and to reduce dependency on DB-specific API.
44
"""
5-
from datetime import datetime
5+
from datetime import datetime, date
66

77
import numpy as np
88
import traceback
@@ -158,99 +158,150 @@ def read_frame(sql, con, index_col=None, coerce_float=True):
158158
frame_query = read_frame
159159

160160

161-
def write_frame(frame, name=None, con=None, flavor='sqlite', append=False):
161+
def write_frame(frame, name, con, flavor='sqlite', if_exists='fail', **kwargs):
162162
"""
163-
Write records stored in a DataFrame to SQLite. The index will currently be
164-
dropped
163+
Write records stored in a DataFrame to a SQL database.
164+
165+
Parameters
166+
----------
167+
frame: DataFrame
168+
name: name of SQL table
169+
conn: an open SQL database connection object
170+
flavor: {'sqlite', 'mysql', 'oracle'}, default 'sqlite'
171+
if_exists: {'fail', 'replace', 'append'}, default 'fail'
172+
fail: If table exists, do nothing.
173+
replace: If table exists, drop it, recreate it, and insert data.
174+
append: If table exists, insert data. Create if does not exist.
165175
"""
166-
if flavor == 'sqlite':
167-
schema = get_sqlite_schema(frame, name)
168-
else:
169-
raise NotImplementedError
170176

171-
if not append and not has_table(name, con):
172-
con.execute(schema)
177+
if 'append' in kwargs:
178+
import warnings
179+
warnings.warn("append is deprecated, use if_exists instead",
180+
FutureWarning)
181+
if kwargs['append']:
182+
if_exists='append'
183+
else:
184+
if_exists='fail'
185+
exists = table_exists(name, con, flavor)
186+
if if_exists == 'fail' and exists:
187+
raise ValueError, "Table '%s' already exists." % name
188+
189+
#create or drop-recreate if necessary
190+
create = None
191+
if exists and if_exists == 'replace':
192+
create = "DROP TABLE %s" % name
193+
elif not exists:
194+
create = get_schema(frame, name, flavor)
195+
196+
if create is not None:
197+
cur = con.cursor()
198+
cur.execute(create)
199+
cur.close()
200+
201+
cur = con.cursor()
202+
# Replace spaces in DataFrame column names with _.
203+
safe_names = [s.replace(' ', '_').strip() for s in frame.columns]
204+
flavor_picker = {'sqlite' : _write_sqlite,
205+
'mysql' : _write_mysql}
206+
207+
func = flavor_picker.get(flavor, None)
208+
if func is None:
209+
raise NotImplementedError
210+
func(frame, name, safe_names, cur)
211+
cur.close()
212+
con.commit()
173213

174-
wildcards = ','.join(['?'] * len(frame.columns))
175-
insert_sql = 'INSERT INTO %s VALUES (%s)' % (name, wildcards)
214+
def _write_sqlite(frame, table, names, cur):
215+
bracketed_names = ['[' + column + ']' for column in names]
216+
col_names = ','.join(bracketed_names)
217+
wildcards = ','.join(['?'] * len(names))
218+
insert_query = 'INSERT INTO %s (%s) VALUES (%s)' % (
219+
table, col_names, wildcards)
176220
data = [tuple(x) for x in frame.values]
177-
con.executemany(insert_sql, data)
178-
179-
180-
def has_table(name, con):
181-
"""
182-
Check if a given SQLite table exists.
221+
cur.executemany(insert_query, data)
222+
223+
def _write_mysql(frame, table, names, cur):
224+
bracketed_names = ['`' + column + '`' for column in names]
225+
col_names = ','.join(bracketed_names)
226+
wildcards = ','.join([r'%s'] * len(names))
227+
insert_query = "INSERT INTO %s (%s) VALUES (%s)" % (
228+
table, col_names, wildcards)
229+
data = [tuple(x) for x in frame.values]
230+
cur.executemany(insert_query, data)
231+
232+
def table_exists(name, con, flavor):
233+
flavor_map = {
234+
'sqlite': ("SELECT name FROM sqlite_master "
235+
"WHERE type='table' AND name='%s';") % name,
236+
'mysql' : "SHOW TABLES LIKE '%s'" % name}
237+
query = flavor_map.get(flavor, None)
238+
if query is None:
239+
raise NotImplementedError
240+
return len(tquery(query, con)) > 0
183241

184-
Parameters
185-
----------
186-
name: string
187-
SQLite table name
188-
con: DB connection object
189-
"""
190-
sqlstr = "SELECT name FROM sqlite_master WHERE type='table' AND name='%s'" % name
191-
rs = tquery(sqlstr, con)
192-
return len(rs) > 0
242+
def get_sqltype(pytype, flavor):
243+
sqltype = {'mysql': 'VARCHAR (63)',
244+
'sqlite': 'TEXT'}
193245

246+
if issubclass(pytype, np.floating):
247+
sqltype['mysql'] = 'FLOAT'
248+
sqltype['sqlite'] = 'REAL'
194249

195-
def get_sqlite_schema(frame, name, dtypes=None, keys=None):
196-
template = """
197-
CREATE TABLE %(name)s (
198-
%(columns)s%(keystr)s
199-
);"""
250+
if issubclass(pytype, np.integer):
251+
#TODO: Refine integer size.
252+
sqltype['mysql'] = 'BIGINT'
253+
sqltype['sqlite'] = 'INTEGER'
200254

201-
column_types = []
255+
if issubclass(pytype, np.datetime64) or pytype is datetime:
256+
# Caution: np.datetime64 is also a subclass of np.number.
257+
sqltype['mysql'] = 'DATETIME'
258+
sqltype['sqlite'] = 'TIMESTAMP'
202259

203-
frame_types = frame.dtypes
204-
for k in frame_types.index:
205-
dt = frame_types[k]
260+
if pytype is datetime.date:
261+
sqltype['mysql'] = 'DATE'
262+
sqltype['sqlite'] = 'TIMESTAMP'
206263

207-
if issubclass(dt.type, (np.integer, np.bool_)):
208-
sqltype = 'INTEGER'
209-
elif issubclass(dt.type, np.floating):
210-
sqltype = 'REAL'
211-
else:
212-
sqltype = 'TEXT'
264+
if issubclass(pytype, np.bool_):
265+
sqltype['sqlite'] = 'INTEGER'
213266

214-
if dtypes is not None:
215-
sqltype = dtypes.get(k, sqltype)
267+
return sqltype[flavor]
216268

217-
column_types.append((k, sqltype))
218-
columns = ',\n '.join('[%s] %s' % x for x in column_types)
269+
def get_schema(frame, name, flavor, keys=None):
270+
"Return a CREATE TABLE statement to suit the contents of a DataFrame."
271+
lookup_type = lambda dtype: get_sqltype(dtype.type, flavor)
272+
# Replace spaces in DataFrame column names with _.
273+
safe_columns = [s.replace(' ', '_').strip() for s in frame.dtypes.index]
274+
column_types = zip(safe_columns, map(lookup_type, frame.dtypes))
275+
if flavor == 'sqlite':
276+
columns = ',\n '.join('[%s] %s' % x for x in column_types)
277+
else:
278+
columns = ',\n '.join('`%s` %s' % x for x in column_types)
219279

220280
keystr = ''
221281
if keys is not None:
222282
if isinstance(keys, basestring):
223283
keys = (keys,)
224284
keystr = ', PRIMARY KEY (%s)' % ','.join(keys)
225-
return template % {'name': name, 'columns': columns, 'keystr': keystr}
226-
227-
228-
#------------------------------------------------------------------------------
229-
# Query formatting
230-
231-
_formatters = {
232-
datetime: lambda dt: "'%s'" % date_format(dt),
233-
str: lambda x: "'%s'" % x,
234-
np.str_: lambda x: "'%s'" % x,
235-
unicode: lambda x: "'%s'" % x,
236-
float: lambda x: "%.8f" % x,
237-
int: lambda x: "%s" % x,
238-
type(None): lambda x: "NULL",
239-
np.float64: lambda x: "%.10f" % x,
240-
bool: lambda x: "'%s'" % x,
241-
}
242-
243-
244-
def format_query(sql, *args):
285+
template = """CREATE TABLE %(name)s (
286+
%(columns)s
287+
%(keystr)s
288+
);"""
289+
create_statement = template % {'name': name, 'columns': columns,
290+
'keystr': keystr}
291+
return create_statement
292+
293+
def sequence2dict(seq):
294+
"""Helper function for cx_Oracle.
295+
296+
For each element in the sequence, creates a dictionary item equal
297+
to the element and keyed by the position of the item in the list.
298+
>>> sequence2dict(("Matt", 1))
299+
{'1': 'Matt', '2': 1}
300+
301+
Source:
302+
http://www.gingerandjohn.com/archives/2004/02/26/cx_oracle-executemany-example/
245303
"""
246-
247-
"""
248-
processed_args = []
249-
for arg in args:
250-
if isinstance(arg, float) and isnull(arg):
251-
arg = None
252-
253-
formatter = _formatters[type(arg)]
254-
processed_args.append(formatter(arg))
255-
256-
return sql % tuple(processed_args)
304+
d = {}
305+
for k,v in zip(range(1, 1 + len(seq)), seq):
306+
d[str(k)] = v
307+
return d

0 commit comments

Comments
 (0)