@@ -552,33 +552,19 @@ class PandasSQLTable(PandasObject):
552
552
# TODO: support for multiIndex
553
553
def __init__ (self , name , pandas_sql_engine , frame = None , index = True ,
554
554
if_exists = 'fail' , prefix = 'pandas' , index_label = None ,
555
- schema = None ):
555
+ schema = None , keys = None ):
556
556
self .name = name
557
557
self .pd_sql = pandas_sql_engine
558
558
self .prefix = prefix
559
559
self .frame = frame
560
560
self .index = self ._index_name (index , index_label )
561
561
self .schema = schema
562
+ self .if_exists = if_exists
563
+ self .keys = keys
562
564
563
565
if frame is not None :
564
- # We want to write a frame
565
- if self .pd_sql .has_table (self .name , self .schema ):
566
- if if_exists == 'fail' :
567
- raise ValueError ("Table '%s' already exists." % name )
568
- elif if_exists == 'replace' :
569
- self .pd_sql .drop_table (self .name , self .schema )
570
- self .table = self ._create_table_setup ()
571
- self .create ()
572
- elif if_exists == 'append' :
573
- self .table = self .pd_sql .get_table (self .name , self .schema )
574
- if self .table is None :
575
- self .table = self ._create_table_setup ()
576
- else :
577
- raise ValueError (
578
- "'{0}' is not valid for if_exists" .format (if_exists ))
579
- else :
580
- self .table = self ._create_table_setup ()
581
- self .create ()
566
+ # We want to initialize based on a dataframe
567
+ self .table = self ._create_table_setup ()
582
568
else :
583
569
# no data provided, read-only mode
584
570
self .table = self .pd_sql .get_table (self .name , self .schema )
@@ -593,9 +579,26 @@ def sql_schema(self):
593
579
from sqlalchemy .schema import CreateTable
594
580
return str (CreateTable (self .table ))
595
581
596
- def create (self ):
582
+ def _execute_create (self ):
583
+ # Inserting table into database, add to MetaData object
584
+ self .table = self .table .tometadata (self .pd_sql .meta )
597
585
self .table .create ()
598
586
587
+ def create (self ):
588
+ if self .exists ():
589
+ if self .if_exists == 'fail' :
590
+ raise ValueError ("Table '%s' already exists." % self .name )
591
+ elif self .if_exists == 'replace' :
592
+ self .pd_sql .drop_table (self .name , self .schema )
593
+ self ._execute_create ()
594
+ elif self .if_exists == 'append' :
595
+ pass
596
+ else :
597
+ raise ValueError (
598
+ "'{0}' is not valid for if_exists" .format (self .if_exists ))
599
+ else :
600
+ self ._execute_create ()
601
+
599
602
def insert_statement (self ):
600
603
return self .table .insert ()
601
604
@@ -634,28 +637,31 @@ def insert_data(self):
634
637
635
638
return column_names , data_list
636
639
637
- def insert (self , chunksize = None ):
640
+ def get_session (self ):
641
+ con = self .pd_sql .engine .connect ()
642
+ return con .begin ()
638
643
639
- ins = self .insert_statement ()
644
+ def _execute_insert (self , trans , keys , data_iter ):
645
+ data = [dict ( (k , v ) for k , v in zip (keys , row ) ) for row in data_iter ]
646
+ trans .connection .execute (self .insert_statement (), data )
647
+
648
+ def insert (self , chunksize = None ):
640
649
keys , data_list = self .insert_data ()
641
650
642
651
nrows = len (self .frame )
643
652
if chunksize is None :
644
653
chunksize = nrows
645
654
chunks = int (nrows / chunksize ) + 1
646
655
647
- con = self .pd_sql .engine .connect ()
648
- with con .begin () as trans :
656
+ with self .get_session () as trans :
649
657
for i in range (chunks ):
650
658
start_i = i * chunksize
651
659
end_i = min ((i + 1 ) * chunksize , nrows )
652
660
if start_i >= end_i :
653
661
break
654
662
655
- chunk_list = [arr [start_i :end_i ] for arr in data_list ]
656
- insert_list = [dict ((k , v ) for k , v in zip (keys , row ))
657
- for row in zip (* chunk_list )]
658
- con .execute (ins , insert_list )
663
+ chunk_iter = zip (* [arr [start_i :end_i ] for arr in data_list ])
664
+ self ._execute_insert (trans , keys , chunk_iter )
659
665
660
666
def read (self , coerce_float = True , parse_dates = None , columns = None ):
661
667
@@ -729,15 +735,27 @@ def _get_column_names_and_types(self, dtype_mapper):
729
735
return column_names_and_types
730
736
731
737
def _create_table_setup (self ):
732
- from sqlalchemy import Table , Column
738
+ from sqlalchemy import Table , Column , PrimaryKeyConstraint
733
739
734
740
column_names_and_types = \
735
741
self ._get_column_names_and_types (self ._sqlalchemy_type )
736
742
737
743
columns = [Column (name , typ , index = is_index )
738
744
for name , typ , is_index in column_names_and_types ]
739
745
740
- return Table (self .name , self .pd_sql .meta , * columns , schema = self .schema )
746
+ if self .keys is not None :
747
+ columns .append (PrimaryKeyConstraint (self .keys ,
748
+ name = self .name + '_pk' ))
749
+
750
+
751
+ schema = self .schema or self .pd_sql .meta .schema
752
+
753
+ # At this point, attach to new metadata, only attach to self.meta
754
+ # once table is created.
755
+ from sqlalchemy .schema import MetaData
756
+ meta = MetaData (self .pd_sql , schema = schema )
757
+
758
+ return Table (self .name , meta , * columns , schema = schema )
741
759
742
760
def _harmonize_columns (self , parse_dates = None ):
743
761
""" Make a data_frame's column type align with an sql_table
@@ -872,7 +890,6 @@ def execute(self, *args, **kwargs):
872
890
873
891
def read_table (self , table_name , index_col = None , coerce_float = True ,
874
892
parse_dates = None , columns = None , schema = None ):
875
-
876
893
table = PandasSQLTable (
877
894
table_name , self , index = index_col , schema = schema )
878
895
return table .read (coerce_float = coerce_float ,
@@ -901,6 +918,7 @@ def to_sql(self, frame, name, if_exists='fail', index=True,
901
918
table = PandasSQLTable (
902
919
name , self , frame = frame , index = index , if_exists = if_exists ,
903
920
index_label = index_label , schema = schema )
921
+ table .create ()
904
922
table .insert (chunksize )
905
923
# check for potentially case sensitivity issues (GH7815)
906
924
if name not in self .engine .table_names (schema = schema or self .meta .schema ):
@@ -930,8 +948,9 @@ def drop_table(self, table_name, schema=None):
930
948
self .get_table (table_name , schema ).drop ()
931
949
self .meta .clear ()
932
950
933
- def _create_sql_schema (self , frame , table_name ):
934
- table = PandasSQLTable (table_name , self , frame = frame )
951
+ def _create_sql_schema (self , frame , table_name , keys = None ):
952
+ table = PandasSQLTable (table_name , self , frame = frame , index = False ,
953
+ keys = keys )
935
954
return str (table .sql_schema ())
936
955
937
956
@@ -997,8 +1016,8 @@ class PandasSQLTableLegacy(PandasSQLTable):
997
1016
def sql_schema (self ):
998
1017
return str (";\n " .join (self .table ))
999
1018
1000
- def create (self ):
1001
- with self .pd_sql . con :
1019
+ def _execute_create (self ):
1020
+ with self .get_session () :
1002
1021
for stmt in self .table :
1003
1022
self .pd_sql .execute (stmt )
1004
1023
@@ -1019,28 +1038,12 @@ def insert_statement(self):
1019
1038
self .name , col_names , wildcards )
1020
1039
return insert_statement
1021
1040
1022
- def insert (self , chunksize = None ):
1023
-
1024
- ins = self .insert_statement ()
1025
- keys , data_list = self .insert_data ()
1026
-
1027
- nrows = len (self .frame )
1028
- if chunksize is None :
1029
- chunksize = nrows
1030
- chunks = int (nrows / chunksize ) + 1
1041
+ def get_session (self ):
1042
+ return self .pd_sql .con
1031
1043
1032
- with self .pd_sql .con :
1033
- for i in range (chunks ):
1034
- start_i = i * chunksize
1035
- end_i = min ((i + 1 ) * chunksize , nrows )
1036
- if start_i >= end_i :
1037
- break
1038
- chunk_list = [arr [start_i :end_i ] for arr in data_list ]
1039
- insert_list = [tuple ((v for v in row ))
1040
- for row in zip (* chunk_list )]
1041
- cur = self .pd_sql .con .cursor ()
1042
- cur .executemany (ins , insert_list )
1043
- cur .close ()
1044
+ def _execute_insert (self , trans , keys , data_iter ):
1045
+ data_list = list (data_iter )
1046
+ trans .executemany (self .insert_statement (), data_list )
1044
1047
1045
1048
def _create_table_setup (self ):
1046
1049
"""Return a list of SQL statement that create a table reflecting the
@@ -1061,21 +1064,25 @@ def _create_table_setup(self):
1061
1064
br_l = _SQL_SYMB [flv ]['br_l' ] # left val quote char
1062
1065
br_r = _SQL_SYMB [flv ]['br_r' ] # right val quote char
1063
1066
1064
- col_template = br_l + '%s' + br_r + ' %s'
1065
-
1066
- columns = ',\n ' .join (col_template % (cname , ctype )
1067
- for cname , ctype , _ in column_names_and_types )
1068
- template = """CREATE TABLE %(name)s (
1069
- %(columns)s
1070
- )"""
1071
- create_stmts = [template % {'name' : self .name , 'columns' : columns }, ]
1072
-
1073
- ix_tpl = "CREATE INDEX ix_{tbl}_{col} ON {tbl} ({br_l}{col}{br_r})"
1074
- for cname , _ , is_index in column_names_and_types :
1075
- if not is_index :
1076
- continue
1077
- create_stmts .append (ix_tpl .format (tbl = self .name , col = cname ,
1078
- br_l = br_l , br_r = br_r ))
1067
+ create_tbl_stmts = [(br_l + '%s' + br_r + ' %s' ) % (cname , ctype )
1068
+ for cname , ctype , _ in column_names_and_types ]
1069
+ if self .keys is not None and len (self .keys ):
1070
+ cnames_br = "," .join ([br_l + c + br_r for c in self .keys ])
1071
+ create_tbl_stmts .append (
1072
+ "CONSTRAINT {tbl}_pk PRIMARY KEY ({cnames_br})" .format (
1073
+ tbl = self .name , cnames_br = cnames_br ))
1074
+
1075
+ create_stmts = ["CREATE TABLE " + self .name + " (\n " +
1076
+ ',\n ' .join (create_tbl_stmts ) + "\n )" ]
1077
+
1078
+ ix_cols = [cname for cname , _ , is_index in column_names_and_types
1079
+ if is_index ]
1080
+ if len (ix_cols ):
1081
+ cnames = "_" .join (ix_cols )
1082
+ cnames_br = "," .join ([br_l + c + br_r for c in ix_cols ])
1083
+ create_stmts .append (
1084
+ "CREATE INDEX ix_{tbl}_{cnames} ON {tbl} ({cnames_br})" .format (
1085
+ tbl = self .name , cnames = cnames , cnames_br = cnames_br ))
1079
1086
1080
1087
return create_stmts
1081
1088
@@ -1172,16 +1179,28 @@ def to_sql(self, frame, name, if_exists='fail', index=True,
1172
1179
----------
1173
1180
frame: DataFrame
1174
1181
name: name of SQL table
1175
- flavor: {'sqlite', 'mysql'}, default 'sqlite'
1176
1182
if_exists: {'fail', 'replace', 'append'}, default 'fail'
1177
1183
fail: If table exists, do nothing.
1178
1184
replace: If table exists, drop it, recreate it, and insert data.
1179
1185
append: If table exists, insert data. Create if does not exist.
1186
+ index : boolean, default True
1187
+ Write DataFrame index as a column
1188
+ index_label : string or sequence, default None
1189
+ Column label for index column(s). If None is given (default) and
1190
+ `index` is True, then the index names are used.
1191
+ A sequence should be given if the DataFrame uses MultiIndex.
1192
+ schema : string, default None
1193
+ Ignored parameter included for compatability with SQLAlchemy version
1194
+ of `to_sql`.
1195
+ chunksize : int, default None
1196
+ If not None, then rows will be written in batches of this size at a
1197
+ time. If None, all rows will be written at once.
1180
1198
1181
1199
"""
1182
1200
table = PandasSQLTableLegacy (
1183
1201
name , self , frame = frame , index = index , if_exists = if_exists ,
1184
1202
index_label = index_label )
1203
+ table .create ()
1185
1204
table .insert (chunksize )
1186
1205
1187
1206
def has_table (self , name , schema = None ):
@@ -1200,8 +1219,9 @@ def drop_table(self, name, schema=None):
1200
1219
drop_sql = "DROP TABLE %s" % name
1201
1220
self .execute (drop_sql )
1202
1221
1203
- def _create_sql_schema (self , frame , table_name ):
1204
- table = PandasSQLTableLegacy (table_name , self , frame = frame )
1222
+ def _create_sql_schema (self , frame , table_name , keys = None ):
1223
+ table = PandasSQLTableLegacy (table_name , self , frame = frame , index = False ,
1224
+ keys = keys )
1205
1225
return str (table .sql_schema ())
1206
1226
1207
1227
@@ -1227,58 +1247,8 @@ def get_schema(frame, name, flavor='sqlite', keys=None, con=None):
1227
1247
1228
1248
"""
1229
1249
1230
- if con is None :
1231
- if flavor == 'mysql' :
1232
- warnings .warn (_MYSQL_WARNING , FutureWarning )
1233
- return _get_schema_legacy (frame , name , flavor , keys )
1234
-
1235
1250
pandas_sql = pandasSQL_builder (con = con , flavor = flavor )
1236
- return pandas_sql ._create_sql_schema (frame , name )
1237
-
1238
-
1239
- def _get_schema_legacy (frame , name , flavor , keys = None ):
1240
- """Old function from 0.13.1. To keep backwards compatibility.
1241
- When mysql legacy support is dropped, it should be possible to
1242
- remove this code
1243
- """
1244
-
1245
- def get_sqltype (dtype , flavor ):
1246
- pytype = dtype .type
1247
- pytype_name = "text"
1248
- if issubclass (pytype , np .floating ):
1249
- pytype_name = "float"
1250
- elif issubclass (pytype , np .integer ):
1251
- pytype_name = "int"
1252
- elif issubclass (pytype , np .datetime64 ) or pytype is datetime :
1253
- # Caution: np.datetime64 is also a subclass of np.number.
1254
- pytype_name = "datetime"
1255
- elif pytype is datetime .date :
1256
- pytype_name = "date"
1257
- elif issubclass (pytype , np .bool_ ):
1258
- pytype_name = "bool"
1259
-
1260
- return _SQL_TYPES [pytype_name ][flavor ]
1261
-
1262
- lookup_type = lambda dtype : get_sqltype (dtype , flavor )
1263
-
1264
- column_types = lzip (frame .dtypes .index , map (lookup_type , frame .dtypes ))
1265
- if flavor == 'sqlite' :
1266
- columns = ',\n ' .join ('[%s] %s' % x for x in column_types )
1267
- else :
1268
- columns = ',\n ' .join ('`%s` %s' % x for x in column_types )
1269
-
1270
- keystr = ''
1271
- if keys is not None :
1272
- if isinstance (keys , string_types ):
1273
- keys = (keys ,)
1274
- keystr = ', PRIMARY KEY (%s)' % ',' .join (keys )
1275
- template = """CREATE TABLE %(name)s (
1276
- %(columns)s
1277
- %(keystr)s
1278
- );"""
1279
- create_statement = template % {'name' : name , 'columns' : columns ,
1280
- 'keystr' : keystr }
1281
- return create_statement
1251
+ return pandas_sql ._create_sql_schema (frame , name , keys = keys )
1282
1252
1283
1253
1284
1254
# legacy names, with depreciation warnings and copied docs
0 commit comments