diff --git a/.travis.yml b/.travis.yml index 48199c57d8b49..3e930d306c1bc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -73,6 +73,7 @@ install: before_script: - mysql -e 'create database pandas_nosetest;' + - psql -c 'create database pandas_nosetest;' -U postgres script: - echo "script" diff --git a/ci/requirements-2.6.txt b/ci/requirements-2.6.txt index ef48d33a5065d..e4ea174fe56f9 100644 --- a/ci/requirements-2.6.txt +++ b/ci/requirements-2.6.txt @@ -5,5 +5,6 @@ pytz==2013b http://www.crummy.com/software/BeautifulSoup/bs4/download/4.2/beautifulsoup4-4.2.0.tar.gz html5lib==1.0b2 bigquery==2.0.17 +sqlalchemy==0.8.1 numexpr==1.4.2 sqlalchemy==0.8.1 diff --git a/ci/requirements-2.7.txt b/ci/requirements-2.7.txt index 477ba83ddf141..b2d4ab500f08b 100644 --- a/ci/requirements-2.7.txt +++ b/ci/requirements-2.7.txt @@ -19,3 +19,5 @@ beautifulsoup4==4.2.1 statsmodels==0.5.0 bigquery==2.0.17 sqlalchemy==0.8.1 +pymysql==0.6.1 +psycopg2==2.5.2 diff --git a/ci/requirements-3.3.txt b/ci/requirements-3.3.txt index 73009b572c4c2..7ac8f6f313b19 100644 --- a/ci/requirements-3.3.txt +++ b/ci/requirements-3.3.txt @@ -15,3 +15,5 @@ scipy==0.12.0 beautifulsoup4==4.2.1 statsmodels==0.4.3 sqlalchemy==0.9.1 +pymysql==0.6.1 +psycopg2==2.5.2 \ No newline at end of file diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 989f6983b28d3..eaa664839dd60 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -2,16 +2,17 @@ Collection of query wrappers / abstractions to both facilitate data retrieval and to reduce dependency on DB-specific API. """ -from __future__ import print_function -from datetime import datetime, date +from __future__ import print_function, division +from datetime import datetime, date, timedelta import warnings from pandas.compat import lzip, map, zip, raise_with_traceback, string_types import numpy as np - +import pandas.core.common as com from pandas.core.api import DataFrame from pandas.core.base import PandasObject from pandas.tseries.tools import to_datetime +#from pandas.tseries.index import DateTimeIndex class SQLAlchemyRequired(ImportError): @@ -360,7 +361,7 @@ def pandasSQL_builder(con, flavor=None, meta=None): class PandasSQLTable(PandasObject): - """ + """ For mapping Pandas tables to SQL tables. Uses fact that table is reflected by SQLAlchemy to do better type convertions. @@ -419,13 +420,21 @@ def maybe_asscalar(self, i): def insert(self): ins = self.insert_statement() - - for t in self.frame.iterrows(): - data = dict((k, self.maybe_asscalar(v)) - for k, v in t[1].iteritems()) - if self.index is not None: + data_list = [] + # to avoid if check for every row + if self.index is not None: + for t in self.frame.iterrows(): + data = dict((k, self.maybe_asscalar(v)) + for k, v in t[1].iteritems()) data[self.index] = self.maybe_asscalar(t[0]) - self.pd_sql.execute(ins, **data) + data_list.append(data) + else: + for t in self.frame.iterrows(): + data = dict((k, self.maybe_asscalar(v)) + for k, v in t[1].iteritems()) + data_list.append(data) + #self.pd_sql.execute(ins, **data) + self.pd_sql.execute(ins, data_list) def read(self, coerce_float=True, parse_dates=None, columns=None): @@ -480,7 +489,7 @@ def _create_table_statement(self): if self.index is not None: columns.insert(0, Column(self.index, self._sqlalchemy_type( - self.frame.index.dtype), + self.frame.index), index=True)) return Table(self.name, self.pd_sql.meta, *columns) @@ -537,22 +546,33 @@ def _harmonize_columns(self, parse_dates=None): except KeyError: pass # this column not in results - def _sqlalchemy_type(self, dtype): - from sqlalchemy.types import Integer, Float, Text, Boolean, DateTime, Date + def _sqlalchemy_type(self, arr_or_dtype): + from sqlalchemy.types import Integer, Float, Text, Boolean, DateTime, Date, Interval - pytype = dtype.type + if isinstance(arr_or_dtype, np.dtype): + tipo = arr_or_dtype + elif isinstance(arr_or_dtype, type): + tipo = np.dtype(arr_or_dtype) + else: + tipo = arr_or_dtype.dtype - if pytype is date: + if arr_or_dtype is date: return Date - if issubclass(pytype, np.datetime64) or pytype is datetime: - # Caution: np.datetime64 is also a subclass of np.number. - return DateTime - if issubclass(pytype, np.floating): + if com.is_datetime64_dtype(arr_or_dtype): + try: + tz = arr_or_dtype.tzinfo + return DateTime(timezone=True) + except: + print('no tzinfo') + return DateTime + if com.is_timedelta64_dtype(arr_or_dtype): + return Interval + if com.is_float_dtype(arr_or_dtype): return Float - if issubclass(pytype, np.integer): + if com.is_integer_dtype(arr_or_dtype): # TODO: Refine integer size. return Integer - if issubclass(pytype, np.bool_): + if issubclass(tipo, np.bool_): return Boolean return Text @@ -638,14 +658,18 @@ def to_sql(self, frame, name, if_exists='fail', index=True): name, self, frame=frame, index=index, if_exists=if_exists) table.insert() + @property + def tables(self): + return self.meta.tables + def has_table(self, name): - return self.engine.has_table(name) + if self.meta.tables[name] is not None: + return True + else: + return False def get_table(self, table_name): - if self.engine.has_table(table_name): - return self.meta.tables[table_name] - else: - return None + return self.meta.tables.get(table_name) def read_table(self, table_name, index_col=None, coerce_float=True, parse_dates=None, columns=None): diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 4785c4aa8b79d..c623ad43ee56c 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -1,4 +1,4 @@ -from __future__ import print_function +from __future__ import print_function, division import unittest import sqlite3 import csv @@ -36,11 +36,19 @@ `PetalLength` DOUBLE, `PetalWidth` DOUBLE, `Name` VARCHAR(200) + )""", + 'postgresql': """CREATE TABLE iris ( + "SepalLength" DOUBLE PRECISION, + "SepalWidth" DOUBLE PRECISION, + "PetalLength" DOUBLE PRECISION, + "PetalWidth" DOUBLE PRECISION, + "Name" VARCHAR(200) )""" }, 'insert_iris': { 'sqlite': """INSERT INTO iris VALUES(?, ?, ?, ?, ?)""", - 'mysql': """INSERT INTO iris VALUES(%s, %s, %s, %s, "%s");""" + 'mysql': """INSERT INTO iris VALUES(%s, %s, %s, %s, "%s");""", + 'postgresql': """INSERT INTO iris VALUES(%s, %s, %s, %s, %s);""" }, 'create_test_types': { 'sqlite': """CREATE TABLE types_test_data ( @@ -62,6 +70,16 @@ `BoolCol` BOOLEAN, `IntColWithNull` INTEGER, `BoolColWithNull` BOOLEAN + )""", + 'postgresql': """CREATE TABLE types_test_data ( + "TextCol" TEXT, + "DateCol" TIMESTAMP, + "IntDateCol" INTEGER, + "FloatCol" DOUBLE PRECISION, + "IntCol" INTEGER, + "BoolCol" BOOLEAN, + "IntColWithNull" INTEGER, + "BoolColWithNull" BOOLEAN )""" }, 'insert_test_types': { @@ -72,6 +90,10 @@ 'mysql': """ INSERT INTO types_test_data VALUES("%s", %s, %s, %s, %s, %s, %s, %s) + """, + 'postgresql': """ + INSERT INTO types_test_data + VALUES(%s, %s, %s, %s, %s, %s, %s, %s) """ } } @@ -403,29 +425,13 @@ def test_date_and_index(self): "IntDateCol loaded with incorrect type") -class TestSQLAlchemy(PandasSQLTest): +class _TestSQLAlchemy(PandasSQLTest): - ''' - Test the sqlalchemy backend against an in-memory sqlite database. + """ + Base class for testing the sqlalchemy backend. Subclasses for specific + database types are created below. Assume that sqlalchemy takes case of the DB specifics - ''' - flavor = 'sqlite' - - def connect(self): - return sqlalchemy.create_engine('sqlite:///:memory:') - - def setUp(self): - # Skip this test if SQLAlchemy not available - if not SQLALCHEMY_INSTALLED: - raise nose.SkipTest('SQLAlchemy not installed') - - self.conn = self.connect() - self.pandasSQL = sql.PandasSQLAlchemy(self.conn) - - self._load_iris_data() - self._load_raw_sql() - - self._load_test1_data() + """ def test_read_sql(self): self._read_sql_iris() @@ -491,32 +497,31 @@ def test_read_table_absent(self): ValueError, sql.read_table, "this_doesnt_exist", con=self.conn) def test_default_type_convertion(self): - """ Test default type conversion""" df = sql.read_table("types_test_data", self.conn) - self.assertTrue( - issubclass(df.FloatCol.dtype.type, np.floating), "FloatCol loaded with incorrect type") - self.assertTrue( - issubclass(df.IntCol.dtype.type, np.integer), "IntCol loaded with incorrect type") - self.assertTrue( - issubclass(df.BoolCol.dtype.type, np.integer), "BoolCol loaded with incorrect type") + + self.assertTrue(issubclass(df.FloatCol.dtype.type, np.floating), + "FloatCol loaded with incorrect type") + self.assertTrue(issubclass(df.IntCol.dtype.type, np.integer), + "IntCol loaded with incorrect type") + self.assertTrue(issubclass(df.BoolCol.dtype.type, np.bool_), + "BoolCol loaded with incorrect type") # Int column with NA values stays as float self.assertTrue(issubclass(df.IntColWithNull.dtype.type, np.floating), "IntColWithNull loaded with incorrect type") - # Non-native Bool column with NA values stays as float - self.assertTrue( - issubclass(df.BoolColWithNull.dtype.type, np.floating), "BoolCol loaded with incorrect type") + # Bool column with NA values becomes object + self.assertTrue(issubclass(df.BoolColWithNull.dtype.type, np.object), + "BoolColWithNull loaded with incorrect type") def test_default_date_load(self): df = sql.read_table("types_test_data", self.conn) # IMPORTANT - sqlite has no native date type, so shouldn't parse, but # MySQL SHOULD be converted. - self.assertFalse( + self.assertTrue( issubclass(df.DateCol.dtype.type, np.datetime64), "DateCol loaded with incorrect type") def test_date_parsing(self): - """ Test date parsing """ # No Parsing df = sql.read_table("types_test_data", self.conn) @@ -551,6 +556,55 @@ def test_date_parsing(self): "IntDateCol loaded with incorrect type") +class TestSQLiteAlchemy(_TestSQLAlchemy): + + """ + Test the sqlalchemy backend against an in-memory sqlite database. + """ + flavor = 'sqlite' + + def connect(self): + return sqlalchemy.create_engine('sqlite:///:memory:') + + def setUp(self): + # Skip this test if SQLAlchemy not available + if not SQLALCHEMY_INSTALLED: + raise nose.SkipTest('SQLAlchemy not installed') + + self.conn = self.connect() + self.pandasSQL = sql.PandasSQLAlchemy(self.conn) + + self._load_iris_data() + self._load_raw_sql() + + self._load_test1_data() + + def test_default_type_convertion(self): + df = sql.read_table("types_test_data", self.conn) + + self.assertTrue(issubclass(df.FloatCol.dtype.type, np.floating), + "FloatCol loaded with incorrect type") + self.assertTrue(issubclass(df.IntCol.dtype.type, np.integer), + "IntCol loaded with incorrect type") + # sqlite has no boolean type, so integer type is returned + self.assertTrue(issubclass(df.BoolCol.dtype.type, np.integer), + "BoolCol loaded with incorrect type") + + # Int column with NA values stays as float + self.assertTrue(issubclass(df.IntColWithNull.dtype.type, np.floating), + "IntColWithNull loaded with incorrect type") + # Non-native Bool column with NA values stays as float + self.assertTrue(issubclass(df.BoolColWithNull.dtype.type, np.floating), + "BoolColWithNull loaded with incorrect type") + + def test_default_date_load(self): + df = sql.read_table("types_test_data", self.conn) + + # IMPORTANT - sqlite has no native date type, so shouldn't parse, but + self.assertFalse(issubclass(df.DateCol.dtype.type, np.datetime64), + "DateCol loaded with incorrect type") + + # --- Test SQLITE fallback class TestSQLite(PandasSQLTest): @@ -660,7 +714,7 @@ def tearDown(self): self.conn.close() -class TestMySQLAlchemy(TestSQLAlchemy): +class TestMySQLAlchemy(_TestSQLAlchemy): flavor = 'mysql' def connect(self): @@ -691,13 +745,39 @@ def tearDown(self): for table in c.fetchall(): self.conn.execute('DROP TABLE %s' % table[0]) - def test_default_date_load(self): - df = sql.read_table("types_test_data", self.conn) - # IMPORTANT - sqlite has no native date type, so shouldn't parse, - # but MySQL SHOULD be converted. - self.assertTrue( - issubclass(df.DateCol.dtype.type, np.datetime64), "DateCol loaded with incorrect type") +class TestPostgreSQLAlchemy(_TestSQLAlchemy): + flavor = 'postgresql' + + def connect(self): + return sqlalchemy.create_engine( + 'postgresql+{driver}://postgres@localhost/pandas_nosetest'.format(driver=self.driver)) + + def setUp(self): + if not SQLALCHEMY_INSTALLED: + raise nose.SkipTest('SQLAlchemy not installed') + + try: + import psycopg2 + self.driver = 'psycopg2' + + except ImportError: + raise nose.SkipTest + + self.conn = self.connect() + self.pandasSQL = sql.PandasSQLAlchemy(self.conn) + + self._load_iris_data() + self._load_raw_sql() + + self._load_test1_data() + + def tearDown(self): + c = self.conn.execute( + "SELECT table_name FROM information_schema.tables" + " WHERE table_schema = 'public'") + for table in c.fetchall(): + self.conn.execute("DROP TABLE %s" % table[0]) if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],