Skip to content

Commit 6680b6b

Browse files
committed
ENH pandas-dev#6416: performance improvements on write - tradoff higher memory use for faster writes.
1 parent 150f323 commit 6680b6b

File tree

1 file changed

+49
-26
lines changed

1 file changed

+49
-26
lines changed

pandas/io/sql.py

+49-26
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@
22
Collection of query wrappers / abstractions to both facilitate data
33
retrieval and to reduce dependency on DB-specific API.
44
"""
5-
from __future__ import print_function
6-
from datetime import datetime, date
5+
from __future__ import print_function, division
6+
from datetime import datetime, date, timedelta
77
import warnings
88
from pandas.compat import lzip, map, zip, raise_with_traceback, string_types
99
import numpy as np
1010

11-
11+
import pandas.core.common as com
1212
from pandas.core.api import DataFrame
1313
from pandas.core.base import PandasObject
1414
from pandas.tseries.tools import to_datetime
@@ -360,7 +360,7 @@ def pandasSQL_builder(con, flavor=None, meta=None):
360360

361361

362362
class PandasSQLTable(PandasObject):
363-
"""
363+
"""
364364
For mapping Pandas tables to SQL tables.
365365
Uses fact that table is reflected by SQLAlchemy to
366366
do better type convertions.
@@ -419,13 +419,21 @@ def maybe_asscalar(self, i):
419419

420420
def insert(self):
421421
ins = self.insert_statement()
422-
423-
for t in self.frame.iterrows():
424-
data = dict((k, self.maybe_asscalar(v))
425-
for k, v in t[1].iteritems())
426-
if self.index is not None:
422+
data_list = []
423+
# to avoid if check for every row
424+
if self.index is not None:
425+
for t in self.frame.iterrows():
426+
data = dict((k, self.maybe_asscalar(v))
427+
for k, v in t[1].iteritems())
427428
data[self.index] = self.maybe_asscalar(t[0])
428-
self.pd_sql.execute(ins, **data)
429+
data_list.append(data)
430+
else:
431+
for t in self.frame.iterrows():
432+
data = dict((k, self.maybe_asscalar(v))
433+
for k, v in t[1].iteritems())
434+
data_list.append(data)
435+
#self.pd_sql.execute(ins, **data)
436+
self.pd_sql.execute(ins, data_list)
429437

430438
def read(self, coerce_float=True, parse_dates=None, columns=None):
431439

@@ -480,7 +488,7 @@ def _create_table_statement(self):
480488
if self.index is not None:
481489
columns.insert(0, Column(self.index,
482490
self._sqlalchemy_type(
483-
self.frame.index.dtype),
491+
self.frame.index),
484492
index=True))
485493

486494
return Table(self.name, self.pd_sql.meta, *columns)
@@ -537,22 +545,33 @@ def _harmonize_columns(self, parse_dates=None):
537545
except KeyError:
538546
pass # this column not in results
539547

540-
def _sqlalchemy_type(self, dtype):
541-
from sqlalchemy.types import Integer, Float, Text, Boolean, DateTime, Date
548+
def _sqlalchemy_type(self, arr_or_dtype):
549+
from sqlalchemy.types import Integer, Float, Text, Boolean, DateTime, Date, Interval
542550

543-
pytype = dtype.type
551+
if isinstance(arr_or_dtype, np.dtype):
552+
tipo = arr_or_dtype
553+
elif isinstance(arr_or_dtype, type):
554+
tipo = np.dtype(arr_or_dtype)
555+
else:
556+
tipo = arr_or_dtype.dtype
544557

545-
if pytype is date:
558+
if arr_or_dtype is date:
546559
return Date
547-
if issubclass(pytype, np.datetime64) or pytype is datetime:
548-
# Caution: np.datetime64 is also a subclass of np.number.
549-
return DateTime
550-
if issubclass(pytype, np.floating):
560+
if com.is_datetime64_dtype(arr_or_dtype):
561+
try:
562+
tz = arr_or_dtype.tzinfo
563+
return DateTime(timezone=True)
564+
except:
565+
print('no tzinfo')
566+
return DateTime
567+
if com.is_timedelta64_dtype(arr_or_dtype):
568+
return Interval
569+
if com.is_float_dtype(arr_or_dtype):
551570
return Float
552-
if issubclass(pytype, np.integer):
571+
if com.is_integer_dtype(arr_or_dtype):
553572
# TODO: Refine integer size.
554573
return Integer
555-
if issubclass(pytype, np.bool_):
574+
if isinstance(tipo, np.bool_):
556575
return Boolean
557576
return Text
558577

@@ -638,14 +657,18 @@ def to_sql(self, frame, name, if_exists='fail', index=True):
638657
name, self, frame=frame, index=index, if_exists=if_exists)
639658
table.insert()
640659

660+
@property
661+
def tables(self):
662+
return self.meta.tables
663+
641664
def has_table(self, name):
642-
return self.engine.has_table(name)
665+
if self.meta.tables.get(name) is not None:
666+
return True
667+
else:
668+
return False
643669

644670
def get_table(self, table_name):
645-
if self.engine.has_table(table_name):
646-
return self.meta.tables[table_name]
647-
else:
648-
return None
671+
return self.meta.tables.get(table_name)
649672

650673
def read_table(self, table_name, index_col=None, coerce_float=True,
651674
parse_dates=None, columns=None):

0 commit comments

Comments
 (0)