Skip to content

Commit 9c0b0e9

Browse files
Merge pull request #11432 from jorisvandenbossche/sql-unicode-column
BUG: fix UnicodeEncodeError with to_sql and unicode column names (GH11431)
2 parents 18ae314 + edd79ef commit 9c0b0e9

File tree

3 files changed

+17
-5
lines changed

3 files changed

+17
-5
lines changed

doc/source/whatsnew/v0.17.1.txt

+7
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,13 @@ Bug Fixes
130130
- Bug in ``squeeze()`` with zero length arrays (:issue:`11230`, :issue:`8999`)
131131
- Bug in ``describe()`` dropping column names for hierarchical indexes (:issue:`11517`)
132132
- Bug in ``DataFrame.pct_change()`` not propagating ``axis`` keyword on ``.fillna`` method (:issue:`11150`)
133+
134+
135+
136+
137+
138+
139+
- Bug in ``to_sql`` using unicode column names giving UnicodeEncodeError with (:issue:`11431`).
133140
- Fix regression in setting of ``xticks`` in ``plot`` (:issue:`11529`).
134141
- Bug in ``holiday.dates`` where observance rules could not be applied to holiday and doc enhancement (:issue:`11477`, :issue:`11533`)
135142
- Fix plotting issues when having plain ``Axes`` instances instead of ``SubplotAxes`` (:issue:`11520`, :issue:`11556`).

pandas/io/sql.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
import pandas.lib as lib
1616
import pandas.core.common as com
17-
from pandas.compat import lzip, map, zip, raise_with_traceback, string_types
17+
from pandas.compat import lzip, map, zip, raise_with_traceback, string_types, text_type
1818
from pandas.core.api import DataFrame, Series
1919
from pandas.core.common import isnull
2020
from pandas.core.base import PandasObject
@@ -711,7 +711,7 @@ def insert_data(self):
711711
else:
712712
temp = self.frame
713713

714-
column_names = list(map(str, temp.columns))
714+
column_names = list(map(text_type, temp.columns))
715715
ncols = len(column_names)
716716
data_list = [None] * ncols
717717
blocks = temp._data.blocks
@@ -853,7 +853,7 @@ def _get_column_names_and_types(self, dtype_mapper):
853853
column_names_and_types.append((idx_label, idx_type, True))
854854

855855
column_names_and_types += [
856-
(str(self.frame.columns[i]),
856+
(text_type(self.frame.columns[i]),
857857
dtype_mapper(self.frame.iloc[:, i]),
858858
False)
859859
for i in range(len(self.frame.columns))
@@ -1400,7 +1400,7 @@ def _execute_create(self):
14001400
conn.execute(stmt)
14011401

14021402
def insert_statement(self):
1403-
names = list(map(str, self.frame.columns))
1403+
names = list(map(text_type, self.frame.columns))
14041404
flv = self.pd_sql.flavor
14051405
wld = _SQL_WILDCARD[flv] # wildcard char
14061406
escape = _SQL_GET_IDENTIFIER[flv]

pandas/io/tests/test_sql.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -834,6 +834,11 @@ def test_categorical(self):
834834

835835
tm.assert_frame_equal(res, df)
836836

837+
def test_unicode_column_name(self):
838+
# GH 11431
839+
df = DataFrame([[1,2],[3,4]], columns = [u'\xe9',u'b'])
840+
df.to_sql('test_unicode', self.conn, index=False)
841+
837842

838843
class TestSQLApi(SQLAlchemyMixIn, _TestSQLApi):
839844
"""
@@ -1992,7 +1997,7 @@ def test_illegal_names(self):
19921997
for ndx, weird_name in enumerate(['test_weird_name]','test_weird_name[',
19931998
'test_weird_name`','test_weird_name"', 'test_weird_name\'',
19941999
'_b.test_weird_name_01-30', '"_b.test_weird_name_01-30"',
1995-
'99beginswithnumber', '12345']):
2000+
'99beginswithnumber', '12345', u'\xe9']):
19962001
df.to_sql(weird_name, self.conn, flavor=self.flavor)
19972002
sql.table_exists(weird_name, self.conn)
19982003

0 commit comments

Comments
 (0)