Skip to content

Fixed incorrect datatype conversion on multi-indexes #8022

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/v0.15.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,7 @@ There are no experimental changes in 0.15.0

Bug Fixes
~~~~~~~~~
- Bug in multiindexes dtypes getting mixed up when DataFrame is saved to SQL table (:issue:`8021`)
- Bug in Series 0-division with a float and integer operand dtypes (:issue:`7785`)
- Bug in ``Series.astype("unicode")`` not calling ``unicode`` on the values correctly (:issue:`7758`)
- Bug in ``DataFrame.as_matrix()`` with mixed ``datetime64[ns]`` and ``timedelta64[ns]`` dtypes (:issue:`7778`)
Expand Down
41 changes: 23 additions & 18 deletions pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,20 +664,28 @@ def _index_name(self, index, index_label):
else:
return None

def _get_column_names_and_types(self, dtype_mapper):
column_names_and_types = []
if self.index is not None:
for i, idx_label in enumerate(self.index):
idx_type = dtype_mapper(
self.frame.index.get_level_values(i).dtype)
column_names_and_types.append((idx_label, idx_type))

column_names_and_types += zip(
list(map(str, self.frame.columns)),
map(dtype_mapper, self.frame.dtypes)
)
return column_names_and_types

def _create_table_statement(self):
from sqlalchemy import Table, Column

columns = list(map(str, self.frame.columns))
column_types = map(self._sqlalchemy_type, self.frame.dtypes)
column_names_and_types = \
self._get_column_names_and_types(self._sqlalchemy_type)

columns = [Column(name, typ)
for name, typ in zip(columns, column_types)]

if self.index is not None:
for i, idx_label in enumerate(self.index[::-1]):
idx_type = self._sqlalchemy_type(
self.frame.index.get_level_values(i))
columns.insert(0, Column(idx_label, idx_type, index=True))
for name, typ in column_names_and_types]

return Table(self.name, self.pd_sql.meta, *columns)

Expand Down Expand Up @@ -957,16 +965,13 @@ def insert(self):
def _create_table_statement(self):
"Return a CREATE TABLE statement to suit the contents of a DataFrame."

columns = list(map(str, self.frame.columns))
column_names_and_types = \
self._get_column_names_and_types(self._sql_type_name)

pat = re.compile('\s+')
if any(map(pat.search, columns)):
column_names = [col_name for col_name, _ in column_names_and_types]
if any(map(pat.search, column_names)):
warnings.warn(_SAFE_NAMES_WARNING)
column_types = [self._sql_type_name(typ) for typ in self.frame.dtypes]

if self.index is not None:
for i, idx_label in enumerate(self.index[::-1]):
columns.insert(0, idx_label)
column_types.insert(0, self._sql_type_name(self.frame.index.get_level_values(i).dtype))

flv = self.pd_sql.flavor

Expand All @@ -976,7 +981,7 @@ def _create_table_statement(self):
col_template = br_l + '%s' + br_r + ' %s'

columns = ',\n '.join(col_template %
x for x in zip(columns, column_types))
x for x in column_names_and_types)
template = """CREATE TABLE %(name)s (
%(columns)s
)"""
Expand Down
17 changes: 11 additions & 6 deletions pandas/io/tests/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,15 @@ def test_to_sql_index_label_multiindex(self):
'test_index_label', self.conn, if_exists='replace',
index_label='C')

def test_multiindex_roundtrip(self):
df = DataFrame.from_records([(1,2.1,'line1'), (2,1.5,'line2')],
columns=['A','B','C'], index=['A','B'])

df.to_sql('test_multiindex_roundtrip', self.conn)
result = sql.read_sql_query('SELECT * FROM test_multiindex_roundtrip',
self.conn, index_col=['A','B'])
tm.assert_frame_equal(df, result, check_index_type=True)

def test_integer_col_names(self):
df = DataFrame([[1, 2], [3, 4]], columns=[0, 1])
sql.to_sql(df, "test_frame_integer_col_names", self.conn,
Expand Down Expand Up @@ -641,9 +650,7 @@ def test_read_sql_delegate(self):
"SELECT * FROM iris", self.conn)
iris_frame2 = sql.read_sql(
"SELECT * FROM iris", self.conn)
tm.assert_frame_equal(iris_frame1, iris_frame2,
"read_sql and read_sql_query have not the same"
" result with a query")
tm.assert_frame_equal(iris_frame1, iris_frame2)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is unrelated, but I changed this because I think it is a bug --- assert_frame_equal does not take an error message argument.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep, indeed, good catch


iris_frame1 = sql.read_sql_table('iris', self.conn)
iris_frame2 = sql.read_sql('iris', self.conn)
Expand Down Expand Up @@ -697,9 +704,7 @@ def test_sql_open_close(self):
def test_read_sql_delegate(self):
iris_frame1 = sql.read_sql_query("SELECT * FROM iris", self.conn)
iris_frame2 = sql.read_sql("SELECT * FROM iris", self.conn)
tm.assert_frame_equal(iris_frame1, iris_frame2,
"read_sql and read_sql_query have not the same"
" result with a query")
tm.assert_frame_equal(iris_frame1, iris_frame2)

self.assertRaises(sql.DatabaseError, sql.read_sql, 'iris', self.conn)

Expand Down