BUG: better unicode compatibility from DataFrame.to_html, close #891

wesm · wesm · commit 3b8a1924e38a · 2012-03-15T17:09:53.000-04:00
diff --git a/pandas/core/format.py b/pandas/core/format.py
@@ -222,33 +222,39 @@ def to_html(self):
         """
         Render a DataFrame to a html table.
         """
-        def write(buf, s, indent=0):
-            buf.write(unicode((' ' * indent) + str(s) + '\n'))
+        def _str(x):
+            if not isinstance(x, basestring):
+                return str(x)
+            return x
 
-        def write_th(buf, s, indent=0):
-            write(buf, '<th>%s</th>' % str(s), indent)
+        elements = []
+        def write(s, indent=0):
+            elements.append(' ' * indent + _str(s))
 
-        def write_td(buf, s, indent=0):
-            write(buf, '<td>%s</td>' % str(s), indent)
 
-        def write_tr(buf, l, indent=0, indent_delta=4, header=False):
-            write(buf, '<tr>', indent)
+        def write_th(s, indent=0):
+            write('<th>%s</th>' % _str(s), indent)
+
+        def write_td(s, indent=0):
+            write('<td>%s</td>' % _str(s), indent)
+
+        def write_tr(l, indent=0, indent_delta=4, header=False):
+            write('<tr>', indent)
             indent += indent_delta
             if header:
                 for s in l:
-                    write_th(buf, s, indent)
+                    write_th(s, indent)
             else:
                 for s in l:
-                    write_td(buf, s, indent)
+                    write_td(s, indent)
             indent -= indent_delta
-            write(buf, '</tr>', indent)
+            write('</tr>', indent)
 
         indent = 0
         indent_delta = 2
         frame = self.frame
-        buf = self.buf
 
-        write(buf, '<table border="1">', indent)
+        write('<table border="1">', indent)
 
         def _column_header():
             row = [''] * (frame.index.nlevels - 1)
@@ -263,31 +269,30 @@ def _column_header():
             return row
 
         if len(frame.columns) == 0 or len(frame.index) == 0:
-            write(buf, '<tbody>', indent  + indent_delta)
-            write_tr(buf,
-                     [repr(frame.index),
+            write('<tbody>', indent  + indent_delta)
+            write_tr([repr(frame.index),
                       'Empty %s' % type(self.frame).__name__],
                      indent + (2 * indent_delta),
                      indent_delta)
-            write(buf, '</tbody>', indent  + indent_delta)
+            write('</tbody>', indent  + indent_delta)
         else:
             indent += indent_delta
 
             # header row
             if self.header:
-                write(buf, '<thead>', indent)
+                write('<thead>', indent)
                 row = []
 
                 col_row = _column_header()
                 indent += indent_delta
-                write_tr(buf, col_row, indent, indent_delta, header=True)
+                write_tr(col_row, indent, indent_delta, header=True)
                 if self.has_index_names:
                     row = frame.index.names + [''] * len(self.columns)
-                    write_tr(buf, row, indent, indent_delta, header=True)
+                    write_tr(row, indent, indent_delta, header=True)
 
-                write(buf, '</thead>', indent)
+                write('</thead>', indent)
 
-            write(buf, '<tbody>', indent)
+            write('<tbody>', indent)
 
             _bold_row = self.kwds.get('bold_rows', False)
             def _maybe_bold_row(x):
@@ -311,12 +316,14 @@ def _maybe_bold_row(x):
                     row.append(_maybe_bold_row(frame.index[i]))
                 for col in self.columns:
                     row.append(fmt_values[col][i])
-                write_tr(buf, row, indent, indent_delta)
+                write_tr(row, indent, indent_delta)
             indent -= indent_delta
-            write(buf, '</tbody>', indent)
+            write('</tbody>', indent)
             indent -= indent_delta
 
-        write(buf, '</table>', indent)
+        write('</table>', indent)
+
+        _put_lines(self.buf, elements)
 
     def _get_formatted_column_labels(self):
         from pandas.core.index import _sparsify
@@ -768,6 +775,12 @@ def reset(self):
 print_config = _GlobalPrintConfig()
 
 
+def _put_lines(buf, lines):
+    if any(isinstance(x, unicode) for x in lines):
+        lines = [unicode(x) for x in lines]
+    print >> buf, '\n'.join(lines)
+
+
 if __name__ == '__main__':
     arr = np.array([746.03, 0.00, 5620.00, 1592.36])
     # arr = np.array([11111111.1, 1.55])
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1117,14 +1117,11 @@ def info(self, verbose=True, buf=None):
             If False, don't print column count summary
         buf : writable buffer, defaults to sys.stdout
         """
+        from pandas.core.format import _put_lines
+
         if buf is None:  # pragma: no cover
             buf = sys.stdout
 
-        def _put_lines(buf, lines):
-            if any(isinstance(x, unicode) for x in lines):
-                lines = [unicode(x) for x in lines]
-            print >> buf, '\n'.join(lines)
-
         lines = []
 
         lines.append(str(type(self)))
@@ -3866,7 +3863,7 @@ def plot(self, subplots=False, sharex=True, sharey=False, use_index=True,
             Use index as ticks for x axis
         kind : {'line', 'bar'}
         sort_columns: boolean, default True
-            Sort column names to determine plot ordering 
+            Sort column names to determine plot ordering
         kwds : keywords
             Options to pass to Axis.plot
 
diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py
@@ -128,6 +128,13 @@ def test_to_string_buffer_all_unicode(self):
         # this should work
         buf.getvalue()
 
+    def test_to_html_unicode(self):
+        # it works!
+        df = DataFrame({u'\u03c3' : np.arange(10.)})
+        df.to_html()
+        df = DataFrame({'A' : [u'\u03c3']})
+        df.to_html()
+
     def test_unicode_problem_decoding_as_ascii(self):
         dm = DataFrame({u'c/\u03c3': Series({'test':np.NaN})})
         unicode(dm.to_string())