Skip to content

Commit cfc90d7

Browse files
committed
Merge pull request #6420 from mangecoeur/sql-perf
ENH #6416: performance improvements on write
2 parents d837bd4 + c67ae75 commit cfc90d7

File tree

1 file changed

+44
-30
lines changed

1 file changed

+44
-30
lines changed

pandas/io/sql.py

+44-30
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@
22
Collection of query wrappers / abstractions to both facilitate data
33
retrieval and to reduce dependency on DB-specific API.
44
"""
5-
from __future__ import print_function
6-
from datetime import datetime, date
5+
from __future__ import print_function, division
6+
from datetime import datetime, date, timedelta
7+
78
import warnings
8-
from pandas.compat import lzip, map, zip, raise_with_traceback, string_types
9+
import itertools
910
import numpy as np
1011

11-
12+
import pandas.core.common as com
13+
from pandas.compat import lzip, map, zip, raise_with_traceback, string_types
1214
from pandas.core.api import DataFrame
1315
from pandas.core.base import PandasObject
1416
from pandas.tseries.tools import to_datetime
@@ -360,7 +362,7 @@ def pandasSQL_builder(con, flavor=None, meta=None):
360362

361363

362364
class PandasSQLTable(PandasObject):
363-
"""
365+
"""
364366
For mapping Pandas tables to SQL tables.
365367
Uses fact that table is reflected by SQLAlchemy to
366368
do better type convertions.
@@ -419,13 +421,20 @@ def maybe_asscalar(self, i):
419421

420422
def insert(self):
421423
ins = self.insert_statement()
422-
423-
for t in self.frame.iterrows():
424-
data = dict((k, self.maybe_asscalar(v))
425-
for k, v in t[1].iteritems())
426-
if self.index is not None:
424+
data_list = []
425+
# to avoid if check for every row
426+
if self.index is not None:
427+
for t in self.frame.iterrows():
428+
data = dict((k, self.maybe_asscalar(v))
429+
for k, v in t[1].iteritems())
427430
data[self.index] = self.maybe_asscalar(t[0])
428-
self.pd_sql.execute(ins, **data)
431+
data_list.append(data)
432+
else:
433+
for t in self.frame.iterrows():
434+
data = dict((k, self.maybe_asscalar(v))
435+
for k, v in t[1].iteritems())
436+
data_list.append(data)
437+
self.pd_sql.execute(ins, data_list)
429438

430439
def read(self, coerce_float=True, parse_dates=None, columns=None):
431440

@@ -480,7 +489,7 @@ def _create_table_statement(self):
480489
if self.index is not None:
481490
columns.insert(0, Column(self.index,
482491
self._sqlalchemy_type(
483-
self.frame.index.dtype),
492+
self.frame.index),
484493
index=True))
485494

486495
return Table(self.name, self.pd_sql.meta, *columns)
@@ -537,22 +546,25 @@ def _harmonize_columns(self, parse_dates=None):
537546
except KeyError:
538547
pass # this column not in results
539548

540-
def _sqlalchemy_type(self, dtype):
541-
from sqlalchemy.types import Integer, Float, Text, Boolean, DateTime, Date
549+
def _sqlalchemy_type(self, arr_or_dtype):
550+
from sqlalchemy.types import Integer, Float, Text, Boolean, DateTime, Date, Interval
542551

543-
pytype = dtype.type
544-
545-
if pytype is date:
552+
if arr_or_dtype is date:
546553
return Date
547-
if issubclass(pytype, np.datetime64) or pytype is datetime:
548-
# Caution: np.datetime64 is also a subclass of np.number.
549-
return DateTime
550-
if issubclass(pytype, np.floating):
554+
if com.is_datetime64_dtype(arr_or_dtype):
555+
try:
556+
tz = arr_or_dtype.tzinfo
557+
return DateTime(timezone=True)
558+
except:
559+
return DateTime
560+
if com.is_timedelta64_dtype(arr_or_dtype):
561+
return Interval
562+
elif com.is_float_dtype(arr_or_dtype):
551563
return Float
552-
if issubclass(pytype, np.integer):
564+
elif com.is_integer_dtype(arr_or_dtype):
553565
# TODO: Refine integer size.
554566
return Integer
555-
if issubclass(pytype, np.bool_):
567+
elif com.is_bool(arr_or_dtype):
556568
return Boolean
557569
return Text
558570

@@ -638,14 +650,18 @@ def to_sql(self, frame, name, if_exists='fail', index=True):
638650
name, self, frame=frame, index=index, if_exists=if_exists)
639651
table.insert()
640652

653+
@property
654+
def tables(self):
655+
return self.meta.tables
656+
641657
def has_table(self, name):
642-
return self.engine.has_table(name)
658+
if self.meta.tables.get(name) is not None:
659+
return True
660+
else:
661+
return False
643662

644663
def get_table(self, table_name):
645-
if self.engine.has_table(table_name):
646-
return self.meta.tables[table_name]
647-
else:
648-
return None
664+
return self.meta.tables.get(table_name)
649665

650666
def read_table(self, table_name, index_col=None, coerce_float=True,
651667
parse_dates=None, columns=None):
@@ -746,8 +762,6 @@ def insert(self):
746762
data = [self.maybe_asscalar(v) for v in r[1].values]
747763
if self.index is not None:
748764
data.insert(0, self.maybe_asscalar(r[0]))
749-
print(type(data[2]))
750-
print(type(r[0]))
751765
cur.execute(ins, tuple(data))
752766
cur.close()
753767

0 commit comments

Comments
 (0)