6
6
7
7
from __future__ import print_function , division
8
8
from datetime import datetime , date , time
9
+ import csv
10
+ from io import StringIO
9
11
10
12
import warnings
11
13
import re
@@ -398,7 +400,7 @@ def read_sql(sql, con, index_col=None, coerce_float=True, params=None,
398
400
399
401
400
402
def to_sql (frame , name , con , schema = None , if_exists = 'fail' , index = True ,
401
- index_label = None , chunksize = None , dtype = None ):
403
+ index_label = None , chunksize = None , dtype = None , method = None ):
402
404
"""
403
405
Write records stored in a DataFrame to a SQL database.
404
406
@@ -447,7 +449,7 @@ def to_sql(frame, name, con, schema=None, if_exists='fail', index=True,
447
449
448
450
pandas_sql .to_sql (frame , name , if_exists = if_exists , index = index ,
449
451
index_label = index_label , schema = schema ,
450
- chunksize = chunksize , dtype = dtype )
452
+ chunksize = chunksize , dtype = dtype , method = method )
451
453
452
454
453
455
def has_table (table_name , con , schema = None ):
@@ -572,29 +574,47 @@ def create(self):
572
574
else :
573
575
self ._execute_create ()
574
576
575
- def insert_statement (self , data , conn ):
576
- """
577
- Generate tuple of SQLAlchemy insert statement and any arguments
578
- to be executed by connection (via `_execute_insert`).
577
+ def _exec_insert (self , conn , keys , data_iter ):
578
+ """Execute SQL statement inserting data
579
579
580
580
Parameters
581
581
----------
582
- conn : SQLAlchemy connectable(engine/connection)
583
- Connection to recieve the data
584
- data : list of dict
585
- The data to be inserted
582
+ data : list of list
583
+ of values to be inserted
584
+ """
585
+ data = [{k : v for k , v in zip (keys , row )} for row in data_iter ]
586
+ conn .execute (self .table .insert (), data )
586
587
587
- Returns
588
- -------
589
- SQLAlchemy statement
590
- insert statement
591
- *, optional
592
- Additional parameters to be passed when executing insert statement
588
+ def _exec_insert_multi (self , conn , keys , data_iter ):
589
+ """Alternative to _exec_insert for DBs that support multivalue INSERT.
590
+
591
+ Note: multi-value insert is usually faster for a few columns
592
+ but performance degrades quickly with increase of columns.
593
593
"""
594
- dialect = getattr (conn , 'dialect' , None )
595
- if dialect and getattr (dialect , 'supports_multivalues_insert' , False ):
596
- return self .table .insert (data ),
597
- return self .table .insert (), data
594
+ data = [{k : v for k , v in zip (keys , row )} for row in data_iter ]
595
+ conn .execute (self .table .insert (data ))
596
+
597
+ def _exec_insert_copy (self , conn , keys , data_iter ):
598
+ """Alternative to _exec_insert for DBs that support COPY FROM
599
+ """
600
+ # gets a DBAPI connection that can provide a cursor
601
+ dbapi_conn = conn .connection
602
+ with dbapi_conn .cursor () as cur :
603
+ s_buf = StringIO ()
604
+ writer = csv .writer (s_buf )
605
+ writer .writerows (data_iter )
606
+ s_buf .seek (0 )
607
+
608
+ columns = ', ' .join ('"{}"' .format (k ) for k in keys )
609
+ if self .schema :
610
+ table_name = '{}.{}' .format (self .schema , self .name )
611
+ else :
612
+ table_name = self .name
613
+
614
+ sql = 'COPY {} ({}) FROM STDIN WITH CSV' .format (
615
+ table_name , columns )
616
+ cur .copy_expert (sql = sql , file = s_buf )
617
+
598
618
599
619
def insert_data (self ):
600
620
if self .index is not None :
@@ -632,12 +652,20 @@ def insert_data(self):
632
652
633
653
return column_names , data_list
634
654
635
- def _execute_insert (self , conn , keys , data_iter ):
636
- """Insert data into this table with database connection"""
637
- data = [{k : v for k , v in zip (keys , row )} for row in data_iter ]
638
- conn .execute (* self .insert_statement (data , conn ))
639
655
640
- def insert (self , chunksize = None ):
656
+ def insert (self , chunksize = None , method = None ):
657
+
658
+ # set insert method
659
+ if method in (None , 'default' ):
660
+ exec_insert = self ._exec_insert
661
+ elif method == 'multi' :
662
+ exec_insert = self ._exec_insert_multi
663
+ elif method == 'copy' :
664
+ exec_insert = self ._exec_insert_copy
665
+ else :
666
+ # TODO: support callables?
667
+ raise ValueError ('Invalid parameter `method`: {}' .format (method ))
668
+
641
669
keys , data_list = self .insert_data ()
642
670
643
671
nrows = len (self .frame )
@@ -660,7 +688,9 @@ def insert(self, chunksize=None):
660
688
break
661
689
662
690
chunk_iter = zip (* [arr [start_i :end_i ] for arr in data_list ])
663
- self ._execute_insert (conn , keys , chunk_iter )
691
+ exec_insert (conn , keys , chunk_iter )
692
+
693
+
664
694
665
695
def _query_iterator (self , result , chunksize , columns , coerce_float = True ,
666
696
parse_dates = None ):
@@ -1100,7 +1130,8 @@ def read_query(self, sql, index_col=None, coerce_float=True,
1100
1130
read_sql = read_query
1101
1131
1102
1132
def to_sql (self , frame , name , if_exists = 'fail' , index = True ,
1103
- index_label = None , schema = None , chunksize = None , dtype = None ):
1133
+ index_label = None , schema = None , chunksize = None , dtype = None ,
1134
+ method = None ):
1104
1135
"""
1105
1136
Write records stored in a DataFrame to a SQL database.
1106
1137
@@ -1146,7 +1177,7 @@ def to_sql(self, frame, name, if_exists='fail', index=True,
1146
1177
if_exists = if_exists , index_label = index_label ,
1147
1178
schema = schema , dtype = dtype )
1148
1179
table .create ()
1149
- table .insert (chunksize )
1180
+ table .insert (chunksize , method = method )
1150
1181
if (not name .isdigit () and not name .islower ()):
1151
1182
# check for potentially case sensitivity issues (GH7815)
1152
1183
# Only check when name is not a number and name is not lower case
0 commit comments