@@ -23,7 +23,7 @@ class DatabaseError(IOError):
23
23
24
24
25
25
#------------------------------------------------------------------------------
26
- # Helper execution functions
26
+ # Helper functions
27
27
28
28
def _convert_params (sql , params ):
29
29
"""convert sql and params args to DBAPI2.0 compliant format"""
@@ -33,6 +33,47 @@ def _convert_params(sql, params):
33
33
return args
34
34
35
35
36
+ def _safe_col_name (col_name ):
37
+ #TODO: probably want to forbid database reserved names, such as "database"
38
+ return col_name .strip ().replace (' ' , '_' )
39
+
40
+
41
+ def _handle_date_column (col , format = None ):
42
+ if isinstance (format , dict ):
43
+ return to_datetime (col , ** format )
44
+ else :
45
+ if format in ['D' , 's' , 'ms' , 'us' , 'ns' ]:
46
+ return to_datetime (col , coerce = True , unit = format )
47
+ elif issubclass (col .dtype .type , np .floating ) or issubclass (col .dtype .type , np .integer ):
48
+ # parse dates as timestamp
49
+ format = 's' if format is None else format
50
+ return to_datetime (col , coerce = True , unit = format )
51
+ else :
52
+ return to_datetime (col , coerce = True , format = format )
53
+
54
+
55
+ def _parse_date_columns (data_frame , parse_dates ):
56
+ """ Force non-datetime columns to be read as such.
57
+ Supports both string formatted and integer timestamp columns
58
+ """
59
+ # handle non-list entries for parse_dates gracefully
60
+ if parse_dates is True or parse_dates is None or parse_dates is False :
61
+ parse_dates = []
62
+
63
+ if not hasattr (parse_dates , '__iter__' ):
64
+ parse_dates = [parse_dates ]
65
+
66
+ for col_name in parse_dates :
67
+ df_col = data_frame [col_name ]
68
+ try :
69
+ fmt = parse_dates [col_name ]
70
+ except TypeError :
71
+ fmt = None
72
+ data_frame [col_name ] = _handle_date_column (df_col , format = fmt )
73
+
74
+ return data_frame
75
+
76
+
36
77
def execute (sql , con , cur = None , params = None , flavor = 'sqlite' ):
37
78
"""
38
79
Execute the given SQL query using the provided connection object.
@@ -44,7 +85,7 @@ def execute(sql, con, cur=None, params=None, flavor='sqlite'):
44
85
con: SQLAlchemy engine or DBAPI2 connection (legacy mode)
45
86
Using SQLAlchemy makes it possible to use any DB supported by that
46
87
library.
47
- If a DBAPI2 object is given , a supported SQL flavor must also be provided
88
+ If a DBAPI2 object, a supported SQL flavor must also be provided
48
89
cur: depreciated, cursor is obtained from connection
49
90
params: list or tuple, optional
50
91
List of parameters to pass to execute method.
@@ -283,9 +324,11 @@ def pandasSQL_builder(con, flavor=None, meta=None):
283
324
return PandasSQLAlchemy (con , meta = meta )
284
325
else :
285
326
warnings .warn (
286
- "Not an SQLAlchemy engine, attempting to use as legacy DBAPI connection" )
327
+ """Not an SQLAlchemy engine,
328
+ attempting to use as legacy DBAPI connection""" )
287
329
if flavor is None :
288
- raise ValueError ("""PandasSQL must be created with an SQLAlchemy engine
330
+ raise ValueError (
331
+ """PandasSQL must be created with an SQLAlchemy engine
289
332
or a DBAPI2 connection and SQL flavour""" )
290
333
else :
291
334
return PandasSQLLegacy (con , flavor )
@@ -298,36 +341,16 @@ def pandasSQL_builder(con, flavor=None, meta=None):
298
341
return PandasSQLLegacy (con , flavor )
299
342
300
343
301
- def _safe_col_name (col_name ):
302
- return col_name .strip ().replace (' ' , '_' )
303
-
304
-
305
- def _parse_date_column (col , format = None ):
306
- if isinstance (format , dict ):
307
- return to_datetime (col , ** format )
308
- else :
309
- if format in ['D' , 's' , 'ms' , 'us' , 'ns' ]:
310
- return to_datetime (col , coerce = True , unit = format )
311
- elif issubclass (col .dtype .type , np .floating ) or issubclass (col .dtype .type , np .integer ):
312
- # parse dates as timestamp
313
- format = 's' if format is None else format
314
- return to_datetime (col , coerce = True , unit = format )
315
- else :
316
- return to_datetime (col , coerce = True , format = format )
317
-
318
-
319
- def _frame_from_data_and_columns (data , columns , index_col = None ,
320
- coerce_float = True ):
321
- df = DataFrame .from_records (
322
- data , columns = columns , coerce_float = coerce_float )
323
- if index_col is not None :
324
- df .set_index (index_col , inplace = True )
325
- return df
326
-
327
-
328
344
class PandasSQLTable (PandasObject ):
329
-
330
- def __init__ (self , name , pandas_sql_engine , frame = None , index = True , if_exists = 'fail' , prefix = 'pandas' ):
345
+ """ For mapping Pandas tables to SQL tables.
346
+ Uses fact that table is reflected by SQLAlchemy to
347
+ do better type convertions.
348
+ Also holds various flags needed to avoid having to
349
+ pass them between functions all the time.
350
+ """
351
+ # TODO: support for multiIndex
352
+ def __init__ (self , name , pandas_sql_engine , frame = None , index = True ,
353
+ if_exists = 'fail' , prefix = 'pandas' ):
331
354
self .name = name
332
355
self .pd_sql = pandas_sql_engine
333
356
self .prefix = prefix
@@ -400,13 +423,15 @@ def read(self, coerce_float=True, parse_dates=None, columns=None):
400
423
data = result .fetchall ()
401
424
column_names = result .keys ()
402
425
403
- self .frame = _frame_from_data_and_columns (data , column_names ,
404
- index_col = self .index ,
405
- coerce_float = coerce_float )
426
+ self .frame = DataFrame .from_records (
427
+ data , columns = column_names , coerce_float = coerce_float )
406
428
407
429
self ._harmonize_columns (parse_dates = parse_dates )
408
430
409
- # Assume that if the index was in prefix_index format, we gave it a name
431
+ if self .index is not None :
432
+ self .frame .set_index (self .index , inplace = True )
433
+
434
+ # Assume if the index in prefix_index format, we gave it a name
410
435
# and should return it nameless
411
436
if self .index == self .prefix + '_index' :
412
437
self .frame .index .name = None
@@ -442,13 +467,14 @@ def _create_table_statement(self):
442
467
return Table (self .name , self .pd_sql .meta , * columns )
443
468
444
469
def _harmonize_columns (self , parse_dates = None ):
445
- """ Make a data_frame's column type align with an sql_table column types
470
+ """ Make a data_frame's column type align with an sql_table
471
+ column types
446
472
Need to work around limited NA value support.
447
473
Floats are always fine, ints must always
448
474
be floats if there are Null values.
449
475
Booleans are hard because converting bool column with None replaces
450
- all Nones with false. Therefore only convert bool if there are no NA
451
- values.
476
+ all Nones with false. Therefore only convert bool if there are no
477
+ NA values.
452
478
Datetimes should already be converted
453
479
to np.datetime if supported, but here we also force conversion
454
480
if required
@@ -469,7 +495,7 @@ def _harmonize_columns(self, parse_dates=None):
469
495
470
496
if col_type is datetime or col_type is date :
471
497
if not issubclass (df_col .dtype .type , np .datetime64 ):
472
- self .frame [col_name ] = _parse_date_column (df_col )
498
+ self .frame [col_name ] = _handle_date_column (df_col )
473
499
474
500
elif col_type is float :
475
501
# floats support NA, can always convert!
@@ -486,7 +512,7 @@ def _harmonize_columns(self, parse_dates=None):
486
512
fmt = parse_dates [col_name ]
487
513
except TypeError :
488
514
fmt = None
489
- self .frame [col_name ] = _parse_date_column (
515
+ self .frame [col_name ] = _handle_date_column (
490
516
df_col , format = fmt )
491
517
492
518
except KeyError :
@@ -543,27 +569,6 @@ def to_sql(self, *args, **kwargs):
543
569
raise ValueError (
544
570
"PandasSQL must be created with an SQLAlchemy engine or connection+sql flavor" )
545
571
546
- def _parse_date_columns (self , data_frame , parse_dates ):
547
- """ Force non-datetime columns to be read as such.
548
- Supports both string formatted and integer timestamp columns
549
- """
550
- # handle non-list entries for parse_dates gracefully
551
- if parse_dates is True or parse_dates is None or parse_dates is False :
552
- parse_dates = []
553
-
554
- if not hasattr (parse_dates , '__iter__' ):
555
- parse_dates = [parse_dates ]
556
-
557
- for col_name in parse_dates :
558
- df_col = data_frame [col_name ]
559
- try :
560
- fmt = parse_dates [col_name ]
561
- except TypeError :
562
- fmt = None
563
- data_frame [col_name ] = _parse_date_column (df_col , format = fmt )
564
-
565
- return data_frame
566
-
567
572
568
573
class PandasSQLAlchemy (PandasSQL ):
569
574
@@ -593,17 +598,23 @@ def uquery(self, *args, **kwargs):
593
598
result = self .execute (* args , ** kwargs )
594
599
return result .rowcount
595
600
596
- def read_sql (self , sql , index_col = None , coerce_float = True , parse_dates = None , params = None ):
601
+ def read_sql (self , sql , index_col = None , coerce_float = True ,
602
+ parse_dates = None , params = None ):
597
603
args = _convert_params (sql , params )
604
+
598
605
result = self .execute (* args )
599
606
data = result .fetchall ()
600
607
columns = result .keys ()
601
608
602
- data_frame = _frame_from_data_and_columns (data , columns ,
603
- index_col = index_col ,
604
- coerce_float = coerce_float )
609
+ data_frame = DataFrame .from_records (
610
+ data , columns = columns , coerce_float = coerce_float )
611
+
612
+ _parse_date_columns (data_frame , parse_dates )
613
+
614
+ if index_col is not None :
615
+ data_frame .set_index (index_col , inplace = True )
605
616
606
- return self . _parse_date_columns ( data_frame , parse_dates )
617
+ return data_frame
607
618
608
619
def to_sql (self , frame , name , if_exists = 'fail' , index = True ):
609
620
table = PandasSQLTable (
@@ -818,10 +829,14 @@ def read_sql(self, sql, index_col=None, coerce_float=True, params=None,
818
829
data = self ._fetchall_as_list (cursor )
819
830
cursor .close ()
820
831
821
- data_frame = _frame_from_data_and_columns (data , columns ,
822
- index_col = index_col ,
823
- coerce_float = coerce_float )
824
- return self ._parse_date_columns (data_frame , parse_dates = parse_dates )
832
+ data_frame = DataFrame .from_records (
833
+ data , columns = columns , coerce_float = coerce_float )
834
+
835
+ _parse_date_columns (data_frame , parse_dates )
836
+
837
+ if index_col is not None :
838
+ data_frame .set_index (index_col , inplace = True )
839
+ return data_frame
825
840
826
841
def _fetchall_as_list (self , cur ):
827
842
result = cur .fetchall ()
0 commit comments