Merge pull request #11432 from jorisvandenbossche/sql-unicode-column

jorisvandenbossche · jorisvandenbossche · commit 9c0b0e98f47a · 2015-11-16T13:15:32.000+01:00
BUG: fix UnicodeEncodeError with to_sql and unicode column names (GH11431)
diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt
@@ -130,6 +130,13 @@ Bug Fixes
 - Bug in ``squeeze()`` with zero length arrays (:issue:`11230`, :issue:`8999`)
 - Bug in ``describe()`` dropping column names for hierarchical indexes (:issue:`11517`)
 - Bug in ``DataFrame.pct_change()`` not propagating ``axis`` keyword on ``.fillna`` method (:issue:`11150`)
+
+
+
+
+
+
+- Bug in ``to_sql`` using unicode column names giving UnicodeEncodeError with (:issue:`11431`).
 - Fix regression in setting of ``xticks`` in ``plot`` (:issue:`11529`).
 - Bug in ``holiday.dates`` where observance rules could not be applied to holiday and doc enhancement (:issue:`11477`, :issue:`11533`)
 - Fix plotting issues when having plain ``Axes`` instances instead of ``SubplotAxes`` (:issue:`11520`, :issue:`11556`).
diff --git a/pandas/io/sql.py b/pandas/io/sql.py
@@ -14,7 +14,7 @@
 
 import pandas.lib as lib
 import pandas.core.common as com
-from pandas.compat import lzip, map, zip, raise_with_traceback, string_types
+from pandas.compat import lzip, map, zip, raise_with_traceback, string_types, text_type
 from pandas.core.api import DataFrame, Series
 from pandas.core.common import isnull
 from pandas.core.base import PandasObject
@@ -711,7 +711,7 @@ def insert_data(self):
         else:
             temp = self.frame
 
-        column_names = list(map(str, temp.columns))
+        column_names = list(map(text_type, temp.columns))
         ncols = len(column_names)
         data_list = [None] * ncols
         blocks = temp._data.blocks
@@ -853,7 +853,7 @@ def _get_column_names_and_types(self, dtype_mapper):
                 column_names_and_types.append((idx_label, idx_type, True))
 
         column_names_and_types += [
-            (str(self.frame.columns[i]),
+            (text_type(self.frame.columns[i]),
              dtype_mapper(self.frame.iloc[:, i]),
              False)
             for i in range(len(self.frame.columns))
@@ -1400,7 +1400,7 @@ def _execute_create(self):
                 conn.execute(stmt)
 
     def insert_statement(self):
-        names = list(map(str, self.frame.columns))
+        names = list(map(text_type, self.frame.columns))
         flv = self.pd_sql.flavor
         wld = _SQL_WILDCARD[flv]  # wildcard char
         escape = _SQL_GET_IDENTIFIER[flv]
diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py
@@ -834,6 +834,11 @@ def test_categorical(self):
 
         tm.assert_frame_equal(res, df)
 
+    def test_unicode_column_name(self):
+        # GH 11431
+        df = DataFrame([[1,2],[3,4]], columns = [u'\xe9',u'b'])
+        df.to_sql('test_unicode', self.conn, index=False)
+
 
 class TestSQLApi(SQLAlchemyMixIn, _TestSQLApi):
     """
@@ -1992,7 +1997,7 @@ def test_illegal_names(self):
         for ndx, weird_name in enumerate(['test_weird_name]','test_weird_name[',
             'test_weird_name`','test_weird_name"', 'test_weird_name\'',
             '_b.test_weird_name_01-30', '"_b.test_weird_name_01-30"',
-            '99beginswithnumber', '12345']):
+            '99beginswithnumber', '12345', u'\xe9']):
             df.to_sql(weird_name, self.conn, flavor=self.flavor)
             sql.table_exists(weird_name, self.conn)