Skip to content

ENH #6416: performance improvements on write #6420

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 20, 2014
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 49 additions & 26 deletions pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
Collection of query wrappers / abstractions to both facilitate data
retrieval and to reduce dependency on DB-specific API.
"""
from __future__ import print_function
from datetime import datetime, date
from __future__ import print_function, division
from datetime import datetime, date, timedelta
import warnings
from pandas.compat import lzip, map, zip, raise_with_traceback, string_types
import numpy as np


import pandas.core.common as com
from pandas.core.api import DataFrame
from pandas.core.base import PandasObject
from pandas.tseries.tools import to_datetime
Expand Down Expand Up @@ -360,7 +360,7 @@ def pandasSQL_builder(con, flavor=None, meta=None):


class PandasSQLTable(PandasObject):
"""
"""
For mapping Pandas tables to SQL tables.
Uses fact that table is reflected by SQLAlchemy to
do better type convertions.
Expand Down Expand Up @@ -419,13 +419,21 @@ def maybe_asscalar(self, i):

def insert(self):
ins = self.insert_statement()

for t in self.frame.iterrows():
data = dict((k, self.maybe_asscalar(v))
for k, v in t[1].iteritems())
if self.index is not None:
data_list = []
# to avoid if check for every row
if self.index is not None:
for t in self.frame.iterrows():
data = dict((k, self.maybe_asscalar(v))
for k, v in t[1].iteritems())
data[self.index] = self.maybe_asscalar(t[0])
self.pd_sql.execute(ins, **data)
data_list.append(data)
else:
for t in self.frame.iterrows():
data = dict((k, self.maybe_asscalar(v))
for k, v in t[1].iteritems())
data_list.append(data)
#self.pd_sql.execute(ins, **data)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can be removed?

self.pd_sql.execute(ins, data_list)

def read(self, coerce_float=True, parse_dates=None, columns=None):

Expand Down Expand Up @@ -480,7 +488,7 @@ def _create_table_statement(self):
if self.index is not None:
columns.insert(0, Column(self.index,
self._sqlalchemy_type(
self.frame.index.dtype),
self.frame.index),
index=True))

return Table(self.name, self.pd_sql.meta, *columns)
Expand Down Expand Up @@ -537,22 +545,33 @@ def _harmonize_columns(self, parse_dates=None):
except KeyError:
pass # this column not in results

def _sqlalchemy_type(self, dtype):
from sqlalchemy.types import Integer, Float, Text, Boolean, DateTime, Date
def _sqlalchemy_type(self, arr_or_dtype):
from sqlalchemy.types import Integer, Float, Text, Boolean, DateTime, Date, Interval

pytype = dtype.type
if isinstance(arr_or_dtype, np.dtype):
tipo = arr_or_dtype
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does tipo stand for? And you don't seem to use it much after this? Only for the bool.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tipo is from spanish for "type" (to avoid using word type) - copied from the pandas.core.common.is_* function. For some reason there isn't one for "is_boolean" - might be a good idea to add it to core

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these functions are already in core/common.py

e.g. is_integer_dtype.....you only really want to use these on an unknown array type (I can't really see what you are doing with it here)

elif isinstance(arr_or_dtype, type):
tipo = np.dtype(arr_or_dtype)
else:
tipo = arr_or_dtype.dtype

if pytype is date:
if arr_or_dtype is date:
return Date
if issubclass(pytype, np.datetime64) or pytype is datetime:
# Caution: np.datetime64 is also a subclass of np.number.
return DateTime
if issubclass(pytype, np.floating):
if com.is_datetime64_dtype(arr_or_dtype):
try:
tz = arr_or_dtype.tzinfo
return DateTime(timezone=True)
except:
print('no tzinfo')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should be removed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

presumably you mean the print statement? if so yes…

On 20 Feb 2014, at 14:43, Joris Van den Bossche [email protected] wrote:

In pandas/io/sql.py:

         return Date
  •    if issubclass(pytype, np.datetime64) or pytype is datetime:
    
  •        # Caution: np.datetime64 is also a subclass of np.number.
    
  •        return DateTime
    
  •    if issubclass(pytype, np.floating):
    
  •    if com.is_datetime64_dtype(arr_or_dtype):
    
  •        try:
    
  •            tz = arr_or_dtype.tzinfo
    
  •            return DateTime(timezone=True)
    
  •        except:
    
  •            print('no tzinfo')
    
    should be removed?


Reply to this email directly or view it on GitHub.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

indeed

return DateTime
if com.is_timedelta64_dtype(arr_or_dtype):
return Interval
if com.is_float_dtype(arr_or_dtype):
return Float
if issubclass(pytype, np.integer):
if com.is_integer_dtype(arr_or_dtype):
# TODO: Refine integer size.
return Integer
if issubclass(pytype, np.bool_):
if isinstance(tipo, np.bool_):
return Boolean
return Text

Expand Down Expand Up @@ -638,14 +657,18 @@ def to_sql(self, frame, name, if_exists='fail', index=True):
name, self, frame=frame, index=index, if_exists=if_exists)
table.insert()

@property
def tables(self):
return self.meta.tables

def has_table(self, name):
return self.engine.has_table(name)
if self.meta.tables.get(name) is not None:
return True
else:
return False

def get_table(self, table_name):
if self.engine.has_table(table_name):
return self.meta.tables[table_name]
else:
return None
return self.meta.tables.get(table_name)

def read_table(self, table_name, index_col=None, coerce_float=True,
parse_dates=None, columns=None):
Expand Down