Skip to content

Commit 3b8a192

Browse files
committed
BUG: better unicode compatibility from DataFrame.to_html, close #891
1 parent 0fb5725 commit 3b8a192

File tree

3 files changed

+48
-31
lines changed

3 files changed

+48
-31
lines changed

pandas/core/format.py

+38-25
Original file line numberDiff line numberDiff line change
@@ -222,33 +222,39 @@ def to_html(self):
222222
"""
223223
Render a DataFrame to a html table.
224224
"""
225-
def write(buf, s, indent=0):
226-
buf.write(unicode((' ' * indent) + str(s) + '\n'))
225+
def _str(x):
226+
if not isinstance(x, basestring):
227+
return str(x)
228+
return x
227229

228-
def write_th(buf, s, indent=0):
229-
write(buf, '<th>%s</th>' % str(s), indent)
230+
elements = []
231+
def write(s, indent=0):
232+
elements.append(' ' * indent + _str(s))
230233

231-
def write_td(buf, s, indent=0):
232-
write(buf, '<td>%s</td>' % str(s), indent)
233234

234-
def write_tr(buf, l, indent=0, indent_delta=4, header=False):
235-
write(buf, '<tr>', indent)
235+
def write_th(s, indent=0):
236+
write('<th>%s</th>' % _str(s), indent)
237+
238+
def write_td(s, indent=0):
239+
write('<td>%s</td>' % _str(s), indent)
240+
241+
def write_tr(l, indent=0, indent_delta=4, header=False):
242+
write('<tr>', indent)
236243
indent += indent_delta
237244
if header:
238245
for s in l:
239-
write_th(buf, s, indent)
246+
write_th(s, indent)
240247
else:
241248
for s in l:
242-
write_td(buf, s, indent)
249+
write_td(s, indent)
243250
indent -= indent_delta
244-
write(buf, '</tr>', indent)
251+
write('</tr>', indent)
245252

246253
indent = 0
247254
indent_delta = 2
248255
frame = self.frame
249-
buf = self.buf
250256

251-
write(buf, '<table border="1">', indent)
257+
write('<table border="1">', indent)
252258

253259
def _column_header():
254260
row = [''] * (frame.index.nlevels - 1)
@@ -263,31 +269,30 @@ def _column_header():
263269
return row
264270

265271
if len(frame.columns) == 0 or len(frame.index) == 0:
266-
write(buf, '<tbody>', indent + indent_delta)
267-
write_tr(buf,
268-
[repr(frame.index),
272+
write('<tbody>', indent + indent_delta)
273+
write_tr([repr(frame.index),
269274
'Empty %s' % type(self.frame).__name__],
270275
indent + (2 * indent_delta),
271276
indent_delta)
272-
write(buf, '</tbody>', indent + indent_delta)
277+
write('</tbody>', indent + indent_delta)
273278
else:
274279
indent += indent_delta
275280

276281
# header row
277282
if self.header:
278-
write(buf, '<thead>', indent)
283+
write('<thead>', indent)
279284
row = []
280285

281286
col_row = _column_header()
282287
indent += indent_delta
283-
write_tr(buf, col_row, indent, indent_delta, header=True)
288+
write_tr(col_row, indent, indent_delta, header=True)
284289
if self.has_index_names:
285290
row = frame.index.names + [''] * len(self.columns)
286-
write_tr(buf, row, indent, indent_delta, header=True)
291+
write_tr(row, indent, indent_delta, header=True)
287292

288-
write(buf, '</thead>', indent)
293+
write('</thead>', indent)
289294

290-
write(buf, '<tbody>', indent)
295+
write('<tbody>', indent)
291296

292297
_bold_row = self.kwds.get('bold_rows', False)
293298
def _maybe_bold_row(x):
@@ -311,12 +316,14 @@ def _maybe_bold_row(x):
311316
row.append(_maybe_bold_row(frame.index[i]))
312317
for col in self.columns:
313318
row.append(fmt_values[col][i])
314-
write_tr(buf, row, indent, indent_delta)
319+
write_tr(row, indent, indent_delta)
315320
indent -= indent_delta
316-
write(buf, '</tbody>', indent)
321+
write('</tbody>', indent)
317322
indent -= indent_delta
318323

319-
write(buf, '</table>', indent)
324+
write('</table>', indent)
325+
326+
_put_lines(self.buf, elements)
320327

321328
def _get_formatted_column_labels(self):
322329
from pandas.core.index import _sparsify
@@ -768,6 +775,12 @@ def reset(self):
768775
print_config = _GlobalPrintConfig()
769776

770777

778+
def _put_lines(buf, lines):
779+
if any(isinstance(x, unicode) for x in lines):
780+
lines = [unicode(x) for x in lines]
781+
print >> buf, '\n'.join(lines)
782+
783+
771784
if __name__ == '__main__':
772785
arr = np.array([746.03, 0.00, 5620.00, 1592.36])
773786
# arr = np.array([11111111.1, 1.55])

pandas/core/frame.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -1117,14 +1117,11 @@ def info(self, verbose=True, buf=None):
11171117
If False, don't print column count summary
11181118
buf : writable buffer, defaults to sys.stdout
11191119
"""
1120+
from pandas.core.format import _put_lines
1121+
11201122
if buf is None: # pragma: no cover
11211123
buf = sys.stdout
11221124

1123-
def _put_lines(buf, lines):
1124-
if any(isinstance(x, unicode) for x in lines):
1125-
lines = [unicode(x) for x in lines]
1126-
print >> buf, '\n'.join(lines)
1127-
11281125
lines = []
11291126

11301127
lines.append(str(type(self)))
@@ -3866,7 +3863,7 @@ def plot(self, subplots=False, sharex=True, sharey=False, use_index=True,
38663863
Use index as ticks for x axis
38673864
kind : {'line', 'bar'}
38683865
sort_columns: boolean, default True
3869-
Sort column names to determine plot ordering
3866+
Sort column names to determine plot ordering
38703867
kwds : keywords
38713868
Options to pass to Axis.plot
38723869

pandas/tests/test_format.py

+7
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,13 @@ def test_to_string_buffer_all_unicode(self):
128128
# this should work
129129
buf.getvalue()
130130

131+
def test_to_html_unicode(self):
132+
# it works!
133+
df = DataFrame({u'\u03c3' : np.arange(10.)})
134+
df.to_html()
135+
df = DataFrame({'A' : [u'\u03c3']})
136+
df.to_html()
137+
131138
def test_unicode_problem_decoding_as_ascii(self):
132139
dm = DataFrame({u'c/\u03c3': Series({'test':np.NaN})})
133140
unicode(dm.to_string())

0 commit comments

Comments
 (0)