diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt index b3ac58a9fb84a..999f0cd0be8e7 100644 --- a/doc/source/whatsnew/v0.16.0.txt +++ b/doc/source/whatsnew/v0.16.0.txt @@ -202,3 +202,4 @@ Bug Fixes - Fixed issue in the ``xlsxwriter`` engine where it added a default 'General' format to cells if no other format wass applied. This prevented other row or column formatting being applied. (:issue:`9167`) - Fixes issue with ``index_col=False`` when ``usecols`` is also specified in ``read_csv``. (:issue:`9082`) - Bug where ``wide_to_long`` would modify the input stubnames list (:issue:`9204`) +- Bug in to_sql not storing float64 values using double precision. (:issue:`9009`) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index b4318bdc2a3bf..cd1c40b7b075a 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -908,7 +908,7 @@ def _sqlalchemy_type(self, col): col_type = self._get_notnull_col_dtype(col) - from sqlalchemy.types import (BigInteger, Float, Text, Boolean, + from sqlalchemy.types import (BigInteger, Integer, Float, Text, Boolean, DateTime, Date, Time) if col_type == 'datetime64' or col_type == 'datetime': @@ -923,10 +923,15 @@ def _sqlalchemy_type(self, col): "database.", UserWarning) return BigInteger elif col_type == 'floating': - return Float + if col.dtype == 'float32': + return Float(precision=23) + else: + return Float(precision=53) elif col_type == 'integer': - # TODO: Refine integer size. - return BigInteger + if col.dtype == 'int32': + return Integer + else: + return BigInteger elif col_type == 'boolean': return Boolean elif col_type == 'date': @@ -1187,9 +1192,17 @@ def has_table(self, name, schema=None): def get_table(self, table_name, schema=None): schema = schema or self.meta.schema if schema: - return self.meta.tables.get('.'.join([schema, table_name])) + tbl = self.meta.tables.get('.'.join([schema, table_name])) else: - return self.meta.tables.get(table_name) + tbl = self.meta.tables.get(table_name) + + # Avoid casting double-precision floats into decimals + from sqlalchemy import Numeric + for column in tbl.columns: + if isinstance(column.type, Numeric): + column.type.asdecimal = False + + return tbl def drop_table(self, table_name, schema=None): schema = schema or self.meta.schema @@ -1198,8 +1211,9 @@ def drop_table(self, table_name, schema=None): self.get_table(table_name, schema).drop() self.meta.clear() - def _create_sql_schema(self, frame, table_name, keys=None): - table = SQLTable(table_name, self, frame=frame, index=False, keys=keys) + def _create_sql_schema(self, frame, table_name, keys=None, dtype=None): + table = SQLTable(table_name, self, frame=frame, index=False, keys=keys, + dtype=dtype) return str(table.sql_schema()) @@ -1213,7 +1227,7 @@ def _create_sql_schema(self, frame, table_name, keys=None): 'sqlite': 'TEXT', }, 'floating': { - 'mysql': 'FLOAT', + 'mysql': 'DOUBLE', 'sqlite': 'REAL', }, 'integer': { @@ -1520,13 +1534,13 @@ def drop_table(self, name, schema=None): drop_sql = "DROP TABLE %s" % name self.execute(drop_sql) - def _create_sql_schema(self, frame, table_name, keys=None): + def _create_sql_schema(self, frame, table_name, keys=None, dtype=None): table = SQLiteTable(table_name, self, frame=frame, index=False, - keys=keys) + keys=keys, dtype=dtype) return str(table.sql_schema()) -def get_schema(frame, name, flavor='sqlite', keys=None, con=None): +def get_schema(frame, name, flavor='sqlite', keys=None, con=None, dtype=None): """ Get the SQL db table schema for the given frame. @@ -1545,11 +1559,14 @@ def get_schema(frame, name, flavor='sqlite', keys=None, con=None): Using SQLAlchemy makes it possible to use any DB supported by that library. If a DBAPI2 object, only sqlite3 is supported. + dtype : dict of column name to SQL type, default None + Optional specifying the datatype for columns. The SQL type should + be a SQLAlchemy type, or a string for sqlite3 fallback connection. """ pandas_sql = pandasSQL_builder(con=con, flavor=flavor) - return pandas_sql._create_sql_schema(frame, name, keys=keys) + return pandas_sql._create_sql_schema(frame, name, keys=keys, dtype=dtype) # legacy names, with depreciation warnings and copied docs diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index b185d530e056c..1d581b00e4b3c 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -651,6 +651,14 @@ def test_get_schema(self): con=self.conn) self.assertTrue('CREATE' in create_sql) + def test_get_schema_dtypes(self): + float_frame = DataFrame({'a':[1.1,1.2], 'b':[2.1,2.2]}) + dtype = sqlalchemy.Integer if self.mode == 'sqlalchemy' else 'INTEGER' + create_sql = sql.get_schema(float_frame, 'test', 'sqlite', + con=self.conn, dtype={'b':dtype}) + self.assertTrue('CREATE' in create_sql) + self.assertTrue('INTEGER' in create_sql) + def test_chunksize_read(self): df = DataFrame(np.random.randn(22, 5), columns=list('abcde')) df.to_sql('test_chunksize', self.conn, index=False) @@ -1233,7 +1241,6 @@ def test_dtype(self): df.to_sql('dtype_test3', self.conn, dtype={'B': sqlalchemy.String(10)}) meta.reflect() sqltype = meta.tables['dtype_test3'].columns['B'].type - print(sqltype) self.assertTrue(isinstance(sqltype, sqlalchemy.String)) self.assertEqual(sqltype.length, 10) @@ -1262,6 +1269,36 @@ def test_notnull_dtype(self): self.assertTrue(isinstance(col_dict['Int'].type, sqltypes.Integer)) self.assertTrue(isinstance(col_dict['Float'].type, sqltypes.Float)) + def test_double_precision(self): + V = 1.23456789101112131415 + + df = DataFrame({'f32':Series([V,], dtype='float32'), + 'f64':Series([V,], dtype='float64'), + 'f64_as_f32':Series([V,], dtype='float64'), + 'i32':Series([5,], dtype='int32'), + 'i64':Series([5,], dtype='int64'), + }) + + df.to_sql('test_dtypes', self.conn, index=False, if_exists='replace', + dtype={'f64_as_f32':sqlalchemy.Float(precision=23)}) + res = sql.read_sql_table('test_dtypes', self.conn) + + # check precision of float64 + self.assertEqual(np.round(df['f64'].iloc[0],14), + np.round(res['f64'].iloc[0],14)) + + # check sql types + meta = sqlalchemy.schema.MetaData(bind=self.conn) + meta.reflect() + col_dict = meta.tables['test_dtypes'].columns + self.assertEqual(str(col_dict['f32'].type), + str(col_dict['f64_as_f32'].type)) + self.assertTrue(isinstance(col_dict['f32'].type, sqltypes.Float)) + self.assertTrue(isinstance(col_dict['f64'].type, sqltypes.Float)) + self.assertTrue(isinstance(col_dict['i32'].type, sqltypes.Integer)) + self.assertTrue(isinstance(col_dict['i64'].type, sqltypes.BigInteger)) + + class TestSQLiteAlchemy(_TestSQLAlchemy): """