1
+ # -*- coding: utf-8 -*-
1
2
"""
2
3
Collection of query wrappers / abstractions to both facilitate data
3
4
retrieval and to reduce dependency on DB-specific API.
4
5
"""
6
+
5
7
from __future__ import print_function , division
6
- from datetime import datetime , date , timedelta
8
+ from datetime import datetime , date
7
9
8
10
import warnings
9
11
import traceback
10
- import itertools
11
12
import re
12
13
import numpy as np
13
14
14
15
import pandas .lib as lib
15
16
import pandas .core .common as com
16
17
from pandas .compat import lzip , map , zip , raise_with_traceback , string_types
17
18
from pandas .core .api import DataFrame , Series
18
- from pandas .core .common import notnull , isnull
19
+ from pandas .core .common import isnull
19
20
from pandas .core .base import PandasObject
20
21
from pandas .tseries .tools import to_datetime
21
22
22
23
from contextlib import contextmanager
23
24
25
+
24
26
class SQLAlchemyRequired (ImportError ):
25
27
pass
26
28
@@ -34,6 +36,7 @@ class DatabaseError(IOError):
34
36
35
37
_SQLALCHEMY_INSTALLED = None
36
38
39
+
37
40
def _is_sqlalchemy_engine (con ):
38
41
global _SQLALCHEMY_INSTALLED
39
42
if _SQLALCHEMY_INSTALLED is None :
@@ -80,7 +83,8 @@ def _handle_date_column(col, format=None):
80
83
else :
81
84
if format in ['D' , 's' , 'ms' , 'us' , 'ns' ]:
82
85
return to_datetime (col , coerce = True , unit = format )
83
- elif issubclass (col .dtype .type , np .floating ) or issubclass (col .dtype .type , np .integer ):
86
+ elif (issubclass (col .dtype .type , np .floating )
87
+ or issubclass (col .dtype .type , np .integer )):
84
88
# parse dates as timestamp
85
89
format = 's' if format is None else format
86
90
return to_datetime (col , coerce = True , unit = format )
@@ -89,8 +93,9 @@ def _handle_date_column(col, format=None):
89
93
90
94
91
95
def _parse_date_columns (data_frame , parse_dates ):
92
- """ Force non-datetime columns to be read as such.
93
- Supports both string formatted and integer timestamp columns
96
+ """
97
+ Force non-datetime columns to be read as such.
98
+ Supports both string formatted and integer timestamp columns
94
99
"""
95
100
# handle non-list entries for parse_dates gracefully
96
101
if parse_dates is True or parse_dates is None or parse_dates is False :
@@ -152,6 +157,7 @@ def _safe_fetch(cur):
152
157
if excName == 'OperationalError' :
153
158
return []
154
159
160
+
155
161
def tquery (sql , con = None , cur = None , retry = True ):
156
162
"""
157
163
DEPRECATED. Returns list of tuples corresponding to each row in given sql
@@ -209,8 +215,8 @@ def tquery(sql, con=None, cur=None, retry=True):
209
215
210
216
def uquery (sql , con = None , cur = None , retry = True , params = None ):
211
217
"""
212
- DEPRECATED. Does the same thing as tquery, but instead of returning results, it
213
- returns the number of rows affected. Good for update queries.
218
+ DEPRECATED. Does the same thing as tquery, but instead of returning
219
+ results, it returns the number of rows affected. Good for update queries.
214
220
215
221
To obtain the same result in the future, you can use the following:
216
222
@@ -269,8 +275,8 @@ def read_sql_table(table_name, con, schema=None, index_col=None,
269
275
con : SQLAlchemy engine
270
276
Sqlite DBAPI connection mode not supported
271
277
schema : string, default None
272
- Name of SQL schema in database to query (if database flavor supports this).
273
- If None, use default schema (default).
278
+ Name of SQL schema in database to query (if database flavor
279
+ supports this). If None, use default schema (default).
274
280
index_col : string, optional
275
281
Column to set as index
276
282
coerce_float : boolean, default True
@@ -343,7 +349,7 @@ def read_sql_query(sql, con, index_col=None, coerce_float=True, params=None,
343
349
decimal.Decimal) to floating point, useful for SQL result sets
344
350
params : list, tuple or dict, optional
345
351
List of parameters to pass to execute method. The syntax used
346
- to pass parameters is database driver dependent. Check your
352
+ to pass parameters is database driver dependent. Check your
347
353
database driver documentation for which of the five syntax styles,
348
354
described in PEP 249's paramstyle, is supported.
349
355
Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'}
@@ -393,7 +399,7 @@ def read_sql(sql, con, index_col=None, coerce_float=True, params=None,
393
399
decimal.Decimal) to floating point, useful for SQL result sets
394
400
params : list, tuple or dict, optional
395
401
List of parameters to pass to execute method. The syntax used
396
- to pass parameters is database driver dependent. Check your
402
+ to pass parameters is database driver dependent. Check your
397
403
database driver documentation for which of the five syntax styles,
398
404
described in PEP 249's paramstyle, is supported.
399
405
Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'}
@@ -469,8 +475,8 @@ def to_sql(frame, name, con, flavor='sqlite', schema=None, if_exists='fail',
469
475
'mysql' is deprecated and will be removed in future versions, but it
470
476
will be further supported through SQLAlchemy engines.
471
477
schema : string, default None
472
- Name of SQL schema in database to write to (if database flavor supports
473
- this). If None, use default schema (default).
478
+ Name of SQL schema in database to write to (if database flavor
479
+ supports this). If None, use default schema (default).
474
480
if_exists : {'fail', 'replace', 'append'}, default 'fail'
475
481
- fail: If table exists, do nothing.
476
482
- replace: If table exists, drop it, recreate it, and insert data.
@@ -482,7 +488,7 @@ def to_sql(frame, name, con, flavor='sqlite', schema=None, if_exists='fail',
482
488
`index` is True, then the index names are used.
483
489
A sequence should be given if the DataFrame uses MultiIndex.
484
490
chunksize : int, default None
485
- If not None, then rows will be written in batches of this size at a
491
+ If not None, then rows will be written in batches of this size at a
486
492
time. If None, all rows will be written at once.
487
493
488
494
"""
@@ -535,7 +541,9 @@ def has_table(table_name, con, flavor='sqlite', schema=None):
535
541
"and will be removed in future versions. "
536
542
"MySQL will be further supported with SQLAlchemy engines." )
537
543
538
- def pandasSQL_builder (con , flavor = None , schema = None , meta = None , is_cursor = False ):
544
+
545
+ def pandasSQL_builder (con , flavor = None , schema = None , meta = None ,
546
+ is_cursor = False ):
539
547
"""
540
548
Convenience function to return the correct PandasSQL subclass based on the
541
549
provided parameters
@@ -622,7 +630,7 @@ def insert_data(self):
622
630
"duplicate name in index/columns: {0}" .format (err ))
623
631
else :
624
632
temp = self .frame
625
-
633
+
626
634
column_names = list (map (str , temp .columns ))
627
635
ncols = len (column_names )
628
636
data_list = [None ] * ncols
@@ -631,7 +639,8 @@ def insert_data(self):
631
639
for i in range (len (blocks )):
632
640
b = blocks [i ]
633
641
if b .is_datetime :
634
- # convert to microsecond resolution so this yields datetime.datetime
642
+ # convert to microsecond resolution so this yields
643
+ # datetime.datetime
635
644
d = b .values .astype ('M8[us]' ).astype (object )
636
645
else :
637
646
d = np .array (b .values , dtype = object )
@@ -647,7 +656,7 @@ def insert_data(self):
647
656
return column_names , data_list
648
657
649
658
def _execute_insert (self , conn , keys , data_iter ):
650
- data = [dict ( (k , v ) for k , v in zip (keys , row ) ) for row in data_iter ]
659
+ data = [dict ((k , v ) for k , v in zip (keys , row )) for row in data_iter ]
651
660
conn .execute (self .insert_statement (), data )
652
661
653
662
def insert (self , chunksize = None ):
@@ -658,11 +667,11 @@ def insert(self, chunksize=None):
658
667
if nrows == 0 :
659
668
return
660
669
661
- if chunksize is None :
670
+ if chunksize is None :
662
671
chunksize = nrows
663
672
elif chunksize == 0 :
664
673
raise ValueError ('chunksize argument should be non-zero' )
665
-
674
+
666
675
chunks = int (nrows / chunksize ) + 1
667
676
668
677
with self .pd_sql .run_transaction () as conn :
@@ -715,7 +724,8 @@ def _index_name(self, index, index_label):
715
724
else :
716
725
return index_label
717
726
# return the used column labels for the index columns
718
- if nlevels == 1 and 'index' not in self .frame .columns and self .frame .index .name is None :
727
+ if (nlevels == 1 and 'index' not in self .frame .columns
728
+ and self .frame .index .name is None ):
719
729
return ['index' ]
720
730
else :
721
731
return [l if l is not None else "level_{0}" .format (i )
@@ -739,7 +749,7 @@ def _get_column_names_and_types(self, dtype_mapper):
739
749
740
750
column_names_and_types += [
741
751
(str (self .frame .columns [i ]),
742
- dtype_mapper (self .frame .iloc [:,i ]),
752
+ dtype_mapper (self .frame .iloc [:, i ]),
743
753
False )
744
754
for i in range (len (self .frame .columns ))
745
755
]
@@ -756,9 +766,8 @@ def _create_table_setup(self):
756
766
for name , typ , is_index in column_names_and_types ]
757
767
758
768
if self .keys is not None :
759
- columns .append (PrimaryKeyConstraint (self .keys ,
760
- name = self .name + '_pk' ))
761
-
769
+ pkc = PrimaryKeyConstraint (self .keys , name = self .name + '_pk' )
770
+ columns .append (pkc )
762
771
763
772
schema = self .schema or self .pd_sql .meta .schema
764
773
@@ -770,17 +779,16 @@ def _create_table_setup(self):
770
779
return Table (self .name , meta , * columns , schema = schema )
771
780
772
781
def _harmonize_columns (self , parse_dates = None ):
773
- """ Make a data_frame's column type align with an sql_table
774
- column types
775
- Need to work around limited NA value support.
776
- Floats are always fine, ints must always
777
- be floats if there are Null values.
778
- Booleans are hard because converting bool column with None replaces
779
- all Nones with false. Therefore only convert bool if there are no
780
- NA values.
781
- Datetimes should already be converted
782
- to np.datetime if supported, but here we also force conversion
783
- if required
782
+ """
783
+ Make the DataFrame's column types align with the SQL table
784
+ column types.
785
+ Need to work around limited NA value support. Floats are always
786
+ fine, ints must always be floats if there are Null values.
787
+ Booleans are hard because converting bool column with None replaces
788
+ all Nones with false. Therefore only convert bool if there are no
789
+ NA values.
790
+ Datetimes should already be converted to np.datetime64 if supported,
791
+ but here we also force conversion if required
784
792
"""
785
793
# handle non-list entries for parse_dates gracefully
786
794
if parse_dates is True or parse_dates is None or parse_dates is False :
@@ -823,7 +831,7 @@ def _harmonize_columns(self, parse_dates=None):
823
831
824
832
def _sqlalchemy_type (self , col ):
825
833
from sqlalchemy .types import (BigInteger , Float , Text , Boolean ,
826
- DateTime , Date , Time , Interval )
834
+ DateTime , Date , Time )
827
835
828
836
if com .is_datetime64_dtype (col ):
829
837
try :
@@ -874,12 +882,12 @@ class PandasSQL(PandasObject):
874
882
"""
875
883
876
884
def read_sql (self , * args , ** kwargs ):
877
- raise ValueError (
878
- "PandasSQL must be created with an SQLAlchemy engine or connection+sql flavor" )
885
+ raise ValueError ("PandasSQL must be created with an SQLAlchemy engine"
886
+ " or connection+sql flavor" )
879
887
880
888
def to_sql (self , * args , ** kwargs ):
881
- raise ValueError (
882
- "PandasSQL must be created with an SQLAlchemy engine or connection+sql flavor" )
889
+ raise ValueError ("PandasSQL must be created with an SQLAlchemy engine"
890
+ " or connection+sql flavor" )
883
891
884
892
885
893
class PandasSQLAlchemy (PandasSQL ):
@@ -897,7 +905,7 @@ def __init__(self, engine, schema=None, meta=None):
897
905
self .meta = meta
898
906
899
907
def run_transaction (self ):
900
- return self .engine .begin ()
908
+ return self .engine .begin ()
901
909
902
910
def execute (self , * args , ** kwargs ):
903
911
"""Simple passthrough to SQLAlchemy engine"""
@@ -964,8 +972,8 @@ def drop_table(self, table_name, schema=None):
964
972
self .meta .clear ()
965
973
966
974
def _create_sql_schema (self , frame , table_name , keys = None ):
967
- table = PandasSQLTable (table_name , self , frame = frame , index = False ,
968
- keys = keys )
975
+ table = PandasSQLTable (table_name , self , frame = frame , index = False ,
976
+ keys = keys )
969
977
return str (table .sql_schema ())
970
978
971
979
@@ -1025,9 +1033,11 @@ def _create_sql_schema(self, frame, table_name, keys=None):
1025
1033
1026
1034
1027
1035
class PandasSQLTableLegacy (PandasSQLTable ):
1028
- """Patch the PandasSQLTable for legacy support.
1029
- Instead of a table variable just use the Create Table
1030
- statement"""
1036
+ """
1037
+ Patch the PandasSQLTable for legacy support.
1038
+ Instead of a table variable just use the Create Table statement.
1039
+ """
1040
+
1031
1041
def sql_schema (self ):
1032
1042
return str (";\n " .join (self .table ))
1033
1043
@@ -1058,11 +1068,11 @@ def _execute_insert(self, conn, keys, data_iter):
1058
1068
conn .executemany (self .insert_statement (), data_list )
1059
1069
1060
1070
def _create_table_setup (self ):
1061
- """Return a list of SQL statement that create a table reflecting the
1071
+ """
1072
+ Return a list of SQL statement that create a table reflecting the
1062
1073
structure of a DataFrame. The first entry will be a CREATE TABLE
1063
1074
statement while the rest will be CREATE INDEX statements
1064
1075
"""
1065
-
1066
1076
column_names_and_types = \
1067
1077
self ._get_column_names_and_types (self ._sql_type_name )
1068
1078
@@ -1159,15 +1169,15 @@ def execute(self, *args, **kwargs):
1159
1169
else :
1160
1170
cur .execute (* args )
1161
1171
return cur
1162
- except Exception as e :
1172
+ except Exception as exc :
1163
1173
try :
1164
1174
self .con .rollback ()
1165
1175
except Exception : # pragma: no cover
1166
- ex = DatabaseError (
1167
- "Execution failed on sql: %s \n %s \n unable to rollback" % (args [0 ], e ))
1176
+ ex = DatabaseError ("Execution failed on sql: %s \n %s \n unable"
1177
+ " to rollback" % (args [0 ], exc ))
1168
1178
raise_with_traceback (ex )
1169
1179
1170
- ex = DatabaseError ("Execution failed on sql '%s': %s" % (args [0 ], e ))
1180
+ ex = DatabaseError ("Execution failed on sql '%s': %s" % (args [0 ], exc ))
1171
1181
raise_with_traceback (ex )
1172
1182
1173
1183
def read_sql (self , sql , index_col = None , coerce_float = True , params = None ,
@@ -1213,11 +1223,11 @@ def to_sql(self, frame, name, if_exists='fail', index=True,
1213
1223
`index` is True, then the index names are used.
1214
1224
A sequence should be given if the DataFrame uses MultiIndex.
1215
1225
schema : string, default None
1216
- Ignored parameter included for compatability with SQLAlchemy version
1217
- of `to_sql`.
1226
+ Ignored parameter included for compatability with SQLAlchemy
1227
+ version of `` to_sql` `.
1218
1228
chunksize : int, default None
1219
- If not None, then rows will be written in batches of this size at a
1220
- time. If None, all rows will be written at once.
1229
+ If not None, then rows will be written in batches of this
1230
+ size at a time. If None, all rows will be written at once.
1221
1231
1222
1232
"""
1223
1233
table = PandasSQLTableLegacy (
@@ -1243,8 +1253,8 @@ def drop_table(self, name, schema=None):
1243
1253
self .execute (drop_sql )
1244
1254
1245
1255
def _create_sql_schema (self , frame , table_name , keys = None ):
1246
- table = PandasSQLTableLegacy (table_name , self , frame = frame , index = False ,
1247
- keys = keys )
1256
+ table = PandasSQLTableLegacy (table_name , self , frame = frame ,
1257
+ index = False , keys = keys )
1248
1258
return str (table .sql_schema ())
1249
1259
1250
1260
0 commit comments