Skip to content

Commit 3fbc1ee

Browse files
Merge pull request #9041 from artemyk/doubleprecisionsql
ENH: Store in SQL using double precision
2 parents 3030bba + bb0c2e8 commit 3fbc1ee

File tree

3 files changed

+69
-14
lines changed

3 files changed

+69
-14
lines changed

doc/source/whatsnew/v0.16.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -202,3 +202,4 @@ Bug Fixes
202202
- Fixed issue in the ``xlsxwriter`` engine where it added a default 'General' format to cells if no other format wass applied. This prevented other row or column formatting being applied. (:issue:`9167`)
203203
- Fixes issue with ``index_col=False`` when ``usecols`` is also specified in ``read_csv``. (:issue:`9082`)
204204
- Bug where ``wide_to_long`` would modify the input stubnames list (:issue:`9204`)
205+
- Bug in to_sql not storing float64 values using double precision. (:issue:`9009`)

pandas/io/sql.py

+30-13
Original file line numberDiff line numberDiff line change
@@ -908,7 +908,7 @@ def _sqlalchemy_type(self, col):
908908

909909
col_type = self._get_notnull_col_dtype(col)
910910

911-
from sqlalchemy.types import (BigInteger, Float, Text, Boolean,
911+
from sqlalchemy.types import (BigInteger, Integer, Float, Text, Boolean,
912912
DateTime, Date, Time)
913913

914914
if col_type == 'datetime64' or col_type == 'datetime':
@@ -923,10 +923,15 @@ def _sqlalchemy_type(self, col):
923923
"database.", UserWarning)
924924
return BigInteger
925925
elif col_type == 'floating':
926-
return Float
926+
if col.dtype == 'float32':
927+
return Float(precision=23)
928+
else:
929+
return Float(precision=53)
927930
elif col_type == 'integer':
928-
# TODO: Refine integer size.
929-
return BigInteger
931+
if col.dtype == 'int32':
932+
return Integer
933+
else:
934+
return BigInteger
930935
elif col_type == 'boolean':
931936
return Boolean
932937
elif col_type == 'date':
@@ -1187,9 +1192,17 @@ def has_table(self, name, schema=None):
11871192
def get_table(self, table_name, schema=None):
11881193
schema = schema or self.meta.schema
11891194
if schema:
1190-
return self.meta.tables.get('.'.join([schema, table_name]))
1195+
tbl = self.meta.tables.get('.'.join([schema, table_name]))
11911196
else:
1192-
return self.meta.tables.get(table_name)
1197+
tbl = self.meta.tables.get(table_name)
1198+
1199+
# Avoid casting double-precision floats into decimals
1200+
from sqlalchemy import Numeric
1201+
for column in tbl.columns:
1202+
if isinstance(column.type, Numeric):
1203+
column.type.asdecimal = False
1204+
1205+
return tbl
11931206

11941207
def drop_table(self, table_name, schema=None):
11951208
schema = schema or self.meta.schema
@@ -1198,8 +1211,9 @@ def drop_table(self, table_name, schema=None):
11981211
self.get_table(table_name, schema).drop()
11991212
self.meta.clear()
12001213

1201-
def _create_sql_schema(self, frame, table_name, keys=None):
1202-
table = SQLTable(table_name, self, frame=frame, index=False, keys=keys)
1214+
def _create_sql_schema(self, frame, table_name, keys=None, dtype=None):
1215+
table = SQLTable(table_name, self, frame=frame, index=False, keys=keys,
1216+
dtype=dtype)
12031217
return str(table.sql_schema())
12041218

12051219

@@ -1213,7 +1227,7 @@ def _create_sql_schema(self, frame, table_name, keys=None):
12131227
'sqlite': 'TEXT',
12141228
},
12151229
'floating': {
1216-
'mysql': 'FLOAT',
1230+
'mysql': 'DOUBLE',
12171231
'sqlite': 'REAL',
12181232
},
12191233
'integer': {
@@ -1520,13 +1534,13 @@ def drop_table(self, name, schema=None):
15201534
drop_sql = "DROP TABLE %s" % name
15211535
self.execute(drop_sql)
15221536

1523-
def _create_sql_schema(self, frame, table_name, keys=None):
1537+
def _create_sql_schema(self, frame, table_name, keys=None, dtype=None):
15241538
table = SQLiteTable(table_name, self, frame=frame, index=False,
1525-
keys=keys)
1539+
keys=keys, dtype=dtype)
15261540
return str(table.sql_schema())
15271541

15281542

1529-
def get_schema(frame, name, flavor='sqlite', keys=None, con=None):
1543+
def get_schema(frame, name, flavor='sqlite', keys=None, con=None, dtype=None):
15301544
"""
15311545
Get the SQL db table schema for the given frame.
15321546
@@ -1545,11 +1559,14 @@ def get_schema(frame, name, flavor='sqlite', keys=None, con=None):
15451559
Using SQLAlchemy makes it possible to use any DB supported by that
15461560
library.
15471561
If a DBAPI2 object, only sqlite3 is supported.
1562+
dtype : dict of column name to SQL type, default None
1563+
Optional specifying the datatype for columns. The SQL type should
1564+
be a SQLAlchemy type, or a string for sqlite3 fallback connection.
15481565
15491566
"""
15501567

15511568
pandas_sql = pandasSQL_builder(con=con, flavor=flavor)
1552-
return pandas_sql._create_sql_schema(frame, name, keys=keys)
1569+
return pandas_sql._create_sql_schema(frame, name, keys=keys, dtype=dtype)
15531570

15541571

15551572
# legacy names, with depreciation warnings and copied docs

pandas/io/tests/test_sql.py

+38-1
Original file line numberDiff line numberDiff line change
@@ -651,6 +651,14 @@ def test_get_schema(self):
651651
con=self.conn)
652652
self.assertTrue('CREATE' in create_sql)
653653

654+
def test_get_schema_dtypes(self):
655+
float_frame = DataFrame({'a':[1.1,1.2], 'b':[2.1,2.2]})
656+
dtype = sqlalchemy.Integer if self.mode == 'sqlalchemy' else 'INTEGER'
657+
create_sql = sql.get_schema(float_frame, 'test', 'sqlite',
658+
con=self.conn, dtype={'b':dtype})
659+
self.assertTrue('CREATE' in create_sql)
660+
self.assertTrue('INTEGER' in create_sql)
661+
654662
def test_chunksize_read(self):
655663
df = DataFrame(np.random.randn(22, 5), columns=list('abcde'))
656664
df.to_sql('test_chunksize', self.conn, index=False)
@@ -1233,7 +1241,6 @@ def test_dtype(self):
12331241
df.to_sql('dtype_test3', self.conn, dtype={'B': sqlalchemy.String(10)})
12341242
meta.reflect()
12351243
sqltype = meta.tables['dtype_test3'].columns['B'].type
1236-
print(sqltype)
12371244
self.assertTrue(isinstance(sqltype, sqlalchemy.String))
12381245
self.assertEqual(sqltype.length, 10)
12391246

@@ -1262,6 +1269,36 @@ def test_notnull_dtype(self):
12621269
self.assertTrue(isinstance(col_dict['Int'].type, sqltypes.Integer))
12631270
self.assertTrue(isinstance(col_dict['Float'].type, sqltypes.Float))
12641271

1272+
def test_double_precision(self):
1273+
V = 1.23456789101112131415
1274+
1275+
df = DataFrame({'f32':Series([V,], dtype='float32'),
1276+
'f64':Series([V,], dtype='float64'),
1277+
'f64_as_f32':Series([V,], dtype='float64'),
1278+
'i32':Series([5,], dtype='int32'),
1279+
'i64':Series([5,], dtype='int64'),
1280+
})
1281+
1282+
df.to_sql('test_dtypes', self.conn, index=False, if_exists='replace',
1283+
dtype={'f64_as_f32':sqlalchemy.Float(precision=23)})
1284+
res = sql.read_sql_table('test_dtypes', self.conn)
1285+
1286+
# check precision of float64
1287+
self.assertEqual(np.round(df['f64'].iloc[0],14),
1288+
np.round(res['f64'].iloc[0],14))
1289+
1290+
# check sql types
1291+
meta = sqlalchemy.schema.MetaData(bind=self.conn)
1292+
meta.reflect()
1293+
col_dict = meta.tables['test_dtypes'].columns
1294+
self.assertEqual(str(col_dict['f32'].type),
1295+
str(col_dict['f64_as_f32'].type))
1296+
self.assertTrue(isinstance(col_dict['f32'].type, sqltypes.Float))
1297+
self.assertTrue(isinstance(col_dict['f64'].type, sqltypes.Float))
1298+
self.assertTrue(isinstance(col_dict['i32'].type, sqltypes.Integer))
1299+
self.assertTrue(isinstance(col_dict['i64'].type, sqltypes.BigInteger))
1300+
1301+
12651302

12661303
class TestSQLiteAlchemy(_TestSQLAlchemy):
12671304
"""

0 commit comments

Comments
 (0)