8
8
9
9
from contextlib import contextmanager
10
10
from datetime import date , datetime , time
11
+ from functools import partial
11
12
import re
12
13
import warnings
13
14
@@ -395,7 +396,7 @@ def read_sql(sql, con, index_col=None, coerce_float=True, params=None,
395
396
396
397
397
398
def to_sql (frame , name , con , schema = None , if_exists = 'fail' , index = True ,
398
- index_label = None , chunksize = None , dtype = None ):
399
+ index_label = None , chunksize = None , dtype = None , method = None ):
399
400
"""
400
401
Write records stored in a DataFrame to a SQL database.
401
402
@@ -429,6 +430,17 @@ def to_sql(frame, name, con, schema=None, if_exists='fail', index=True,
429
430
Optional specifying the datatype for columns. The SQL type should
430
431
be a SQLAlchemy type, or a string for sqlite3 fallback connection.
431
432
If all columns are of the same type, one single value can be used.
433
+ method : {None, 'multi', callable}, default None
434
+ Controls the SQL insertion clause used:
435
+
436
+ - None : Uses standard SQL ``INSERT`` clause (one per row).
437
+ - 'multi': Pass multiple values in a single ``INSERT`` clause.
438
+ - callable with signature ``(pd_table, conn, keys, data_iter)``.
439
+
440
+ Details and a sample callable implementation can be found in the
441
+ section :ref:`insert method <io.sql.method>`.
442
+
443
+ .. versionadded:: 0.24.0
432
444
"""
433
445
if if_exists not in ('fail' , 'replace' , 'append' ):
434
446
raise ValueError ("'{0}' is not valid for if_exists" .format (if_exists ))
@@ -443,7 +455,7 @@ def to_sql(frame, name, con, schema=None, if_exists='fail', index=True,
443
455
444
456
pandas_sql .to_sql (frame , name , if_exists = if_exists , index = index ,
445
457
index_label = index_label , schema = schema ,
446
- chunksize = chunksize , dtype = dtype )
458
+ chunksize = chunksize , dtype = dtype , method = method )
447
459
448
460
449
461
def has_table (table_name , con , schema = None ):
@@ -568,8 +580,29 @@ def create(self):
568
580
else :
569
581
self ._execute_create ()
570
582
571
- def insert_statement (self ):
572
- return self .table .insert ()
583
+ def _execute_insert (self , conn , keys , data_iter ):
584
+ """Execute SQL statement inserting data
585
+
586
+ Parameters
587
+ ----------
588
+ conn : sqlalchemy.engine.Engine or sqlalchemy.engine.Connection
589
+ keys : list of str
590
+ Column names
591
+ data_iter : generator of list
592
+ Each item contains a list of values to be inserted
593
+ """
594
+ data = [dict (zip (keys , row )) for row in data_iter ]
595
+ conn .execute (self .table .insert (), data )
596
+
597
+ def _execute_insert_multi (self , conn , keys , data_iter ):
598
+ """Alternative to _execute_insert for DBs support multivalue INSERT.
599
+
600
+ Note: multi-value insert is usually faster for analytics DBs
601
+ and tables containing a few columns
602
+ but performance degrades quickly with increase of columns.
603
+ """
604
+ data = [dict (zip (keys , row )) for row in data_iter ]
605
+ conn .execute (self .table .insert (data ))
573
606
574
607
def insert_data (self ):
575
608
if self .index is not None :
@@ -612,11 +645,18 @@ def insert_data(self):
612
645
613
646
return column_names , data_list
614
647
615
- def _execute_insert (self , conn , keys , data_iter ):
616
- data = [dict (zip (keys , row )) for row in data_iter ]
617
- conn .execute (self .insert_statement (), data )
648
+ def insert (self , chunksize = None , method = None ):
649
+
650
+ # set insert method
651
+ if method is None :
652
+ exec_insert = self ._execute_insert
653
+ elif method == 'multi' :
654
+ exec_insert = self ._execute_insert_multi
655
+ elif callable (method ):
656
+ exec_insert = partial (method , self )
657
+ else :
658
+ raise ValueError ('Invalid parameter `method`: {}' .format (method ))
618
659
619
- def insert (self , chunksize = None ):
620
660
keys , data_list = self .insert_data ()
621
661
622
662
nrows = len (self .frame )
@@ -639,7 +679,7 @@ def insert(self, chunksize=None):
639
679
break
640
680
641
681
chunk_iter = zip (* [arr [start_i :end_i ] for arr in data_list ])
642
- self . _execute_insert (conn , keys , chunk_iter )
682
+ exec_insert (conn , keys , chunk_iter )
643
683
644
684
def _query_iterator (self , result , chunksize , columns , coerce_float = True ,
645
685
parse_dates = None ):
@@ -1085,7 +1125,8 @@ def read_query(self, sql, index_col=None, coerce_float=True,
1085
1125
read_sql = read_query
1086
1126
1087
1127
def to_sql (self , frame , name , if_exists = 'fail' , index = True ,
1088
- index_label = None , schema = None , chunksize = None , dtype = None ):
1128
+ index_label = None , schema = None , chunksize = None , dtype = None ,
1129
+ method = None ):
1089
1130
"""
1090
1131
Write records stored in a DataFrame to a SQL database.
1091
1132
@@ -1115,7 +1156,17 @@ def to_sql(self, frame, name, if_exists='fail', index=True,
1115
1156
Optional specifying the datatype for columns. The SQL type should
1116
1157
be a SQLAlchemy type. If all columns are of the same type, one
1117
1158
single value can be used.
1159
+ method : {None', 'multi', callable}, default None
1160
+ Controls the SQL insertion clause used:
1161
+
1162
+ * None : Uses standard SQL ``INSERT`` clause (one per row).
1163
+ * 'multi': Pass multiple values in a single ``INSERT`` clause.
1164
+ * callable with signature ``(pd_table, conn, keys, data_iter)``.
1165
+
1166
+ Details and a sample callable implementation can be found in the
1167
+ section :ref:`insert method <io.sql.method>`.
1118
1168
1169
+ .. versionadded:: 0.24.0
1119
1170
"""
1120
1171
if dtype and not is_dict_like (dtype ):
1121
1172
dtype = {col_name : dtype for col_name in frame }
@@ -1131,7 +1182,7 @@ def to_sql(self, frame, name, if_exists='fail', index=True,
1131
1182
if_exists = if_exists , index_label = index_label ,
1132
1183
schema = schema , dtype = dtype )
1133
1184
table .create ()
1134
- table .insert (chunksize )
1185
+ table .insert (chunksize , method = method )
1135
1186
if (not name .isdigit () and not name .islower ()):
1136
1187
# check for potentially case sensitivity issues (GH7815)
1137
1188
# Only check when name is not a number and name is not lower case
@@ -1442,7 +1493,8 @@ def _fetchall_as_list(self, cur):
1442
1493
return result
1443
1494
1444
1495
def to_sql (self , frame , name , if_exists = 'fail' , index = True ,
1445
- index_label = None , schema = None , chunksize = None , dtype = None ):
1496
+ index_label = None , schema = None , chunksize = None , dtype = None ,
1497
+ method = None ):
1446
1498
"""
1447
1499
Write records stored in a DataFrame to a SQL database.
1448
1500
@@ -1471,7 +1523,17 @@ def to_sql(self, frame, name, if_exists='fail', index=True,
1471
1523
Optional specifying the datatype for columns. The SQL type should
1472
1524
be a string. If all columns are of the same type, one single value
1473
1525
can be used.
1526
+ method : {None, 'multi', callable}, default None
1527
+ Controls the SQL insertion clause used:
1528
+
1529
+ * None : Uses standard SQL ``INSERT`` clause (one per row).
1530
+ * 'multi': Pass multiple values in a single ``INSERT`` clause.
1531
+ * callable with signature ``(pd_table, conn, keys, data_iter)``.
1532
+
1533
+ Details and a sample callable implementation can be found in the
1534
+ section :ref:`insert method <io.sql.method>`.
1474
1535
1536
+ .. versionadded:: 0.24.0
1475
1537
"""
1476
1538
if dtype and not is_dict_like (dtype ):
1477
1539
dtype = {col_name : dtype for col_name in frame }
@@ -1486,7 +1548,7 @@ def to_sql(self, frame, name, if_exists='fail', index=True,
1486
1548
if_exists = if_exists , index_label = index_label ,
1487
1549
dtype = dtype )
1488
1550
table .create ()
1489
- table .insert (chunksize )
1551
+ table .insert (chunksize , method )
1490
1552
1491
1553
def has_table (self , name , schema = None ):
1492
1554
# TODO(wesm): unused?
0 commit comments