|
2 | 2 | Collection of query wrappers / abstractions to both facilitate data
|
3 | 3 | retrieval and to reduce dependency on DB-specific API.
|
4 | 4 | """
|
5 |
| -from datetime import datetime, date |
| 5 | +from datetime import datetime |
6 | 6 |
|
7 | 7 | import numpy as np
|
8 | 8 | import traceback
|
@@ -158,150 +158,99 @@ def read_frame(sql, con, index_col=None, coerce_float=True):
|
158 | 158 | frame_query = read_frame
|
159 | 159 |
|
160 | 160 |
|
161 |
| -def write_frame(frame, name, con, flavor='sqlite', if_exists='fail', **kwargs): |
| 161 | +def write_frame(frame, name=None, con=None, flavor='sqlite', append=False): |
162 | 162 | """
|
163 |
| - Write records stored in a DataFrame to a SQL database. |
164 |
| -
|
165 |
| - Parameters |
166 |
| - ---------- |
167 |
| - frame: DataFrame |
168 |
| - name: name of SQL table |
169 |
| - conn: an open SQL database connection object |
170 |
| - flavor: {'sqlite', 'mysql', 'oracle'}, default 'sqlite' |
171 |
| - if_exists: {'fail', 'replace', 'append'}, default 'fail' |
172 |
| - fail: If table exists, do nothing. |
173 |
| - replace: If table exists, drop it, recreate it, and insert data. |
174 |
| - append: If table exists, insert data. Create if does not exist. |
| 163 | + Write records stored in a DataFrame to SQLite. The index will currently be |
| 164 | + dropped |
175 | 165 | """
|
176 |
| - |
177 |
| - if 'append' in kwargs: |
178 |
| - import warnings |
179 |
| - warnings.warn("append is deprecated, use if_exists instead", |
180 |
| - FutureWarning) |
181 |
| - if kwargs['append']: |
182 |
| - if_exists='append' |
183 |
| - else: |
184 |
| - if_exists='fail' |
185 |
| - exists = table_exists(name, con, flavor) |
186 |
| - if if_exists == 'fail' and exists: |
187 |
| - raise ValueError, "Table '%s' already exists." % name |
188 |
| - |
189 |
| - #create or drop-recreate if necessary |
190 |
| - create = None |
191 |
| - if exists and if_exists == 'replace': |
192 |
| - create = "DROP TABLE %s" % name |
193 |
| - elif not exists: |
194 |
| - create = get_schema(frame, name, flavor) |
195 |
| - |
196 |
| - if create is not None: |
197 |
| - cur = con.cursor() |
198 |
| - cur.execute(create) |
199 |
| - cur.close() |
200 |
| - |
201 |
| - cur = con.cursor() |
202 |
| - # Replace spaces in DataFrame column names with _. |
203 |
| - safe_names = [s.replace(' ', '_').strip() for s in frame.columns] |
204 |
| - flavor_picker = {'sqlite' : _write_sqlite, |
205 |
| - 'mysql' : _write_mysql} |
206 |
| - |
207 |
| - func = flavor_picker.get(flavor, None) |
208 |
| - if func is None: |
| 166 | + if flavor == 'sqlite': |
| 167 | + schema = get_sqlite_schema(frame, name) |
| 168 | + else: |
209 | 169 | raise NotImplementedError
|
210 |
| - func(frame, name, safe_names, cur) |
211 |
| - cur.close() |
212 |
| - con.commit() |
213 | 170 |
|
214 |
| -def _write_sqlite(frame, table, names, cur): |
215 |
| - bracketed_names = ['[' + column + ']' for column in names] |
216 |
| - col_names = ','.join(bracketed_names) |
217 |
| - wildcards = ','.join(['?'] * len(names)) |
218 |
| - insert_query = 'INSERT INTO %s (%s) VALUES (%s)' % ( |
219 |
| - table, col_names, wildcards) |
220 |
| - data = [tuple(x) for x in frame.values] |
221 |
| - cur.executemany(insert_query, data) |
222 |
| - |
223 |
| -def _write_mysql(frame, table, names, cur): |
224 |
| - bracketed_names = ['`' + column + '`' for column in names] |
225 |
| - col_names = ','.join(bracketed_names) |
226 |
| - wildcards = ','.join([r'%s'] * len(names)) |
227 |
| - insert_query = "INSERT INTO %s (%s) VALUES (%s)" % ( |
228 |
| - table, col_names, wildcards) |
| 171 | + if not append and not has_table(name, con): |
| 172 | + con.execute(schema) |
| 173 | + |
| 174 | + wildcards = ','.join(['?'] * len(frame.columns)) |
| 175 | + insert_sql = 'INSERT INTO %s VALUES (%s)' % (name, wildcards) |
229 | 176 | data = [tuple(x) for x in frame.values]
|
230 |
| - cur.executemany(insert_query, data) |
231 |
| - |
232 |
| -def table_exists(name, con, flavor): |
233 |
| - flavor_map = { |
234 |
| - 'sqlite': ("SELECT name FROM sqlite_master " |
235 |
| - "WHERE type='table' AND name='%s';") % name, |
236 |
| - 'mysql' : "SHOW TABLES LIKE '%s'" % name} |
237 |
| - query = flavor_map.get(flavor, None) |
238 |
| - if query is None: |
239 |
| - raise NotImplementedError |
240 |
| - return len(tquery(query, con)) > 0 |
| 177 | + con.executemany(insert_sql, data) |
| 178 | + |
| 179 | + |
| 180 | +def has_table(name, con): |
| 181 | + """ |
| 182 | + Check if a given SQLite table exists. |
241 | 183 |
|
242 |
| -def get_sqltype(pytype, flavor): |
243 |
| - sqltype = {'mysql': 'VARCHAR (63)', |
244 |
| - 'sqlite': 'TEXT'} |
| 184 | + Parameters |
| 185 | + ---------- |
| 186 | + name: string |
| 187 | + SQLite table name |
| 188 | + con: DB connection object |
| 189 | + """ |
| 190 | + sqlstr = "SELECT name FROM sqlite_master WHERE type='table' AND name='%s'" % name |
| 191 | + rs = tquery(sqlstr, con) |
| 192 | + return len(rs) > 0 |
245 | 193 |
|
246 |
| - if issubclass(pytype, np.floating): |
247 |
| - sqltype['mysql'] = 'FLOAT' |
248 |
| - sqltype['sqlite'] = 'REAL' |
249 | 194 |
|
250 |
| - if issubclass(pytype, np.integer): |
251 |
| - #TODO: Refine integer size. |
252 |
| - sqltype['mysql'] = 'BIGINT' |
253 |
| - sqltype['sqlite'] = 'INTEGER' |
| 195 | +def get_sqlite_schema(frame, name, dtypes=None, keys=None): |
| 196 | + template = """ |
| 197 | +CREATE TABLE %(name)s ( |
| 198 | + %(columns)s%(keystr)s |
| 199 | +);""" |
254 | 200 |
|
255 |
| - if issubclass(pytype, np.datetime64) or pytype is datetime: |
256 |
| - # Caution: np.datetime64 is also a subclass of np.number. |
257 |
| - sqltype['mysql'] = 'DATETIME' |
258 |
| - sqltype['sqlite'] = 'TIMESTAMP' |
| 201 | + column_types = [] |
259 | 202 |
|
260 |
| - if pytype is datetime.date: |
261 |
| - sqltype['mysql'] = 'DATE' |
262 |
| - sqltype['sqlite'] = 'TIMESTAMP' |
| 203 | + frame_types = frame.dtypes |
| 204 | + for k in frame_types.index: |
| 205 | + dt = frame_types[k] |
263 | 206 |
|
264 |
| - if issubclass(pytype, np.bool_): |
265 |
| - sqltype['sqlite'] = 'INTEGER' |
| 207 | + if issubclass(dt.type, (np.integer, np.bool_)): |
| 208 | + sqltype = 'INTEGER' |
| 209 | + elif issubclass(dt.type, np.floating): |
| 210 | + sqltype = 'REAL' |
| 211 | + else: |
| 212 | + sqltype = 'TEXT' |
266 | 213 |
|
267 |
| - return sqltype[flavor] |
| 214 | + if dtypes is not None: |
| 215 | + sqltype = dtypes.get(k, sqltype) |
268 | 216 |
|
269 |
| -def get_schema(frame, name, flavor, keys=None): |
270 |
| - "Return a CREATE TABLE statement to suit the contents of a DataFrame." |
271 |
| - lookup_type = lambda dtype: get_sqltype(dtype.type, flavor) |
272 |
| - # Replace spaces in DataFrame column names with _. |
273 |
| - safe_columns = [s.replace(' ', '_').strip() for s in frame.dtypes.index] |
274 |
| - column_types = zip(safe_columns, map(lookup_type, frame.dtypes)) |
275 |
| - if flavor == 'sqlite': |
276 |
| - columns = ',\n '.join('[%s] %s' % x for x in column_types) |
277 |
| - else: |
278 |
| - columns = ',\n '.join('`%s` %s' % x for x in column_types) |
| 217 | + column_types.append((k, sqltype)) |
| 218 | + columns = ',\n '.join('[%s] %s' % x for x in column_types) |
279 | 219 |
|
280 | 220 | keystr = ''
|
281 | 221 | if keys is not None:
|
282 | 222 | if isinstance(keys, basestring):
|
283 | 223 | keys = (keys,)
|
284 | 224 | keystr = ', PRIMARY KEY (%s)' % ','.join(keys)
|
285 |
| - template = """CREATE TABLE %(name)s ( |
286 |
| - %(columns)s |
287 |
| - %(keystr)s |
288 |
| - );""" |
289 |
| - create_statement = template % {'name': name, 'columns': columns, |
290 |
| - 'keystr': keystr} |
291 |
| - return create_statement |
292 |
| - |
293 |
| -def sequence2dict(seq): |
294 |
| - """Helper function for cx_Oracle. |
295 |
| -
|
296 |
| - For each element in the sequence, creates a dictionary item equal |
297 |
| - to the element and keyed by the position of the item in the list. |
298 |
| - >>> sequence2dict(("Matt", 1)) |
299 |
| - {'1': 'Matt', '2': 1} |
300 |
| -
|
301 |
| - Source: |
302 |
| - http://www.gingerandjohn.com/archives/2004/02/26/cx_oracle-executemany-example/ |
| 225 | + return template % {'name': name, 'columns': columns, 'keystr': keystr} |
| 226 | + |
| 227 | + |
| 228 | +#------------------------------------------------------------------------------ |
| 229 | +# Query formatting |
| 230 | + |
| 231 | +_formatters = { |
| 232 | + datetime: lambda dt: "'%s'" % date_format(dt), |
| 233 | + str: lambda x: "'%s'" % x, |
| 234 | + np.str_: lambda x: "'%s'" % x, |
| 235 | + unicode: lambda x: "'%s'" % x, |
| 236 | + float: lambda x: "%.8f" % x, |
| 237 | + int: lambda x: "%s" % x, |
| 238 | + type(None): lambda x: "NULL", |
| 239 | + np.float64: lambda x: "%.10f" % x, |
| 240 | + bool: lambda x: "'%s'" % x, |
| 241 | +} |
| 242 | + |
| 243 | + |
| 244 | +def format_query(sql, *args): |
303 | 245 | """
|
304 |
| - d = {} |
305 |
| - for k,v in zip(range(1, 1 + len(seq)), seq): |
306 |
| - d[str(k)] = v |
307 |
| - return d |
| 246 | +
|
| 247 | + """ |
| 248 | + processed_args = [] |
| 249 | + for arg in args: |
| 250 | + if isinstance(arg, float) and isnull(arg): |
| 251 | + arg = None |
| 252 | + |
| 253 | + formatter = _formatters[type(arg)] |
| 254 | + processed_args.append(formatter(arg)) |
| 255 | + |
| 256 | + return sql % tuple(processed_args) |
0 commit comments