Skip to content

Commit 742d7fb

Browse files
committed
Merge remote branch 'blais/master' into to-latex
* blais/master: BUG: Converted more unsafe characters for LaTeX. DataFrameFormatter: Implemented really basic (but working) LaTeX serialization support in DataFrame.to_latex(). DataFormatter: Refactored to_string() in order to let the future to_latex() take advantage of it. DataFormatter: Added support for formatting the index in to_html() via a special '__index__' key to the formatters dict. Formatting: Added classes argument to DataFrame.to_html(). Conflicts: pandas/core/format.py pandas/core/frame.py pandas/tests/test_format.py
2 parents 24c5b8f + bfee97f commit 742d7fb

File tree

3 files changed

+155
-50
lines changed

3 files changed

+155
-50
lines changed

pandas/core/format.py

Lines changed: 99 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ class DataFrameFormatter(object):
152152
153153
self.to_string() : console-friendly tabular output
154154
self.to_html() : html table
155+
self.to_latex() : LaTeX tabular environment table
155156
156157
"""
157158

@@ -190,63 +191,103 @@ def __init__(self, frame, buf=None, columns=None, col_space=None,
190191
else:
191192
self.columns = frame.columns
192193

193-
def to_string(self, force_unicode=False):
194+
def _to_str_columns(self, force_unicode=False):
194195
"""
195-
Render a DataFrame to a console-friendly tabular output.
196+
Render a DataFrame to a list of columns (as lists of strings).
196197
"""
197198
frame = self.frame
198199

199-
to_write = []
200+
# may include levels names also
201+
str_index = self._get_formatted_index()
202+
str_columns = self._get_formatted_column_labels()
200203

201-
if len(frame.columns) == 0 or len(frame.index) == 0:
202-
info_line = (u'Empty %s\nColumns: %s\nIndex: %s'
203-
% (type(self.frame).__name__,
204-
frame.columns, frame.index))
205-
to_write.append(info_line)
206-
else:
207-
# may include levels names also
208-
str_index = self._get_formatted_index()
209-
str_columns = self._get_formatted_column_labels()
210-
211-
stringified = []
212-
213-
for i, c in enumerate(self.columns):
214-
if self.header:
215-
fmt_values = self._format_col(i)
216-
cheader = str_columns[i]
217-
max_len = max(max(_strlen(x) for x in fmt_values),
218-
max(len(x) for x in cheader))
219-
if self.justify == 'left':
220-
cheader = [x.ljust(max_len) for x in cheader]
221-
else:
222-
cheader = [x.rjust(max_len) for x in cheader]
223-
fmt_values = cheader + fmt_values
224-
stringified.append(_make_fixed_width(fmt_values,
225-
self.justify))
226-
else:
227-
stringified = [_make_fixed_width(self._format_col(i),
228-
self.justify)
229-
for i, c in enumerate(self.columns)]
204+
stringified = []
230205

231-
if self.index:
232-
to_write.append(adjoin(1, str_index, *stringified))
206+
for i, c in enumerate(self.columns):
207+
if self.header:
208+
fmt_values = self._format_col(i)
209+
cheader = str_columns[i]
210+
max_len = max(max(_strlen(x) for x in fmt_values),
211+
max(len(x) for x in cheader))
212+
if self.justify == 'left':
213+
cheader = [x.ljust(max_len) for x in cheader]
214+
else:
215+
cheader = [x.rjust(max_len) for x in cheader]
216+
fmt_values = cheader + fmt_values
217+
stringified.append(_make_fixed_width(fmt_values,
218+
self.justify))
233219
else:
234-
to_write.append(adjoin(1, *stringified))
220+
stringified = [_make_fixed_width(self._format_col(i),
221+
self.justify)
222+
for i, c in enumerate(self.columns)]
223+
224+
strcols = stringified
225+
if self.index:
226+
strcols.insert(0, str_index)
235227

236228
if not py3compat.PY3:
237229
if force_unicode:
238-
to_write = [unicode(s) for s in to_write]
230+
strcols = map(lambda col: map(unicode, col), strcols)
239231
else:
240232
# generally everything is plain strings, which has ascii
241233
# encoding. problem is when there is a char with value over 127
242234
# - everything then gets converted to unicode.
243235
try:
244-
for s in to_write:
245-
str(s)
236+
map(lambda col: map(str, col), strcols)
246237
except UnicodeError:
247-
to_write = [unicode(s) for s in to_write]
238+
strcols = map(lambda col: map(unicode, col), strcols)
239+
240+
return strcols
241+
242+
def to_string(self, force_unicode=False):
243+
"""
244+
Render a DataFrame to a console-friendly tabular output.
245+
"""
246+
frame = self.frame
247+
248+
if len(frame.columns) == 0 or len(frame.index) == 0:
249+
info_line = (u'Empty %s\nColumns: %s\nIndex: %s'
250+
% (type(self.frame).__name__,
251+
frame.columns, frame.index))
252+
text = info_line
253+
else:
254+
strcols = self._to_str_columns(force_unicode)
255+
text = adjoin(1, *strcols)
256+
257+
self.buf.writelines(text)
258+
259+
def to_latex(self, force_unicode=False, column_format=None):
260+
"""
261+
Render a DataFrame to a LaTeX tabular environment output.
262+
"""
263+
frame = self.frame
264+
265+
if len(frame.columns) == 0 or len(frame.index) == 0:
266+
info_line = (u'Empty %s\nColumns: %s\nIndex: %s'
267+
% (type(self.frame).__name__,
268+
frame.columns, frame.index))
269+
strcols = [[info_line]]
270+
else:
271+
strcols = self._to_str_columns(force_unicode)
272+
273+
if column_format is None:
274+
column_format = '|l|%s|' % '|'.join('c' for _ in strcols)
275+
else:
276+
assert isinstance(column_format, str)
248277

249-
self.buf.writelines(to_write)
278+
self.buf.write('\\begin{tabular}{%s}\n' % column_format)
279+
self.buf.write('\\hline\n')
280+
281+
nlevels = frame.index.nlevels
282+
for i, row in enumerate(izip(*strcols)):
283+
if i == nlevels:
284+
self.buf.write('\\hline\n') # End of header
285+
crow = [(x.replace('_', '\\_').replace('%', '\\%').replace('&', '\\&') if x else '{}') for x in row]
286+
self.buf.write(' & '.join(crow))
287+
self.buf.write(' \\\\\n')
288+
289+
self.buf.write('\\hline\n')
290+
self.buf.write('\\end{tabular}\n')
250291

251292
def _format_col(self, i):
252293
col = self.columns[i]
@@ -256,7 +297,7 @@ def _format_col(self, i):
256297
na_rep=self.na_rep,
257298
space=self.col_space)
258299

259-
def to_html(self):
300+
def to_html(self, classes=None):
260301
"""
261302
Render a DataFrame to a html table.
262303
"""
@@ -295,7 +336,13 @@ def write_tr(l, indent=0, indent_delta=4, header=False, align=None):
295336
indent_delta = 2
296337
frame = self.frame
297338

298-
write('<table border="1">', indent)
339+
_classes = ['dataframe'] # Default class.
340+
if classes is not None:
341+
if isinstance(classes, str):
342+
classes = classes.split()
343+
assert isinstance(classes, (list, tuple))
344+
_classes.extend(classes)
345+
write('<table border="1" class="%s">' % ' '.join(_classes), indent)
299346

300347
def _column_header():
301348
if self.index:
@@ -364,13 +411,20 @@ def _maybe_bold_row(x):
364411
fmt_values[i] = self._format_col(i)
365412

366413
# write values
414+
index_formatter = self.formatters.get('__index__', None)
367415
for i in range(len(frame)):
368416
row = []
417+
369418
if self.index:
419+
index_value = frame.index[i]
420+
if index_formatter:
421+
index_value = index_formatter(index_value)
422+
370423
if isinstance(frame.index, MultiIndex):
371-
row.extend(_maybe_bold_row(frame.index[i]))
424+
row.extend(_maybe_bold_row(index_value))
372425
else:
373-
row.append(_maybe_bold_row(frame.index[i]))
426+
row.append(_maybe_bold_row(index_value))
427+
374428
for j in range(len(self.columns)):
375429
row.append(fmt_values[j][i])
376430
write_tr(row, indent, indent_delta)
@@ -425,6 +479,7 @@ def has_column_names(self):
425479
return _has_names(self.frame.columns)
426480

427481
def _get_formatted_index(self):
482+
# Note: this is only used by to_string(), not by to_html().
428483
index = self.frame.index
429484
columns = self.frame.columns
430485

pandas/core/frame.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1242,7 +1242,8 @@ def to_string(self, buf=None, columns=None, col_space=None, colSpace=None,
12421242
def to_html(self, buf=None, columns=None, col_space=None, colSpace=None,
12431243
header=True, index=True, na_rep='NaN', formatters=None,
12441244
float_format=None, sparsify=None, index_names=True,
1245-
justify=None, force_unicode=False, bold_rows=True):
1245+
justify=None, force_unicode=False, bold_rows=True,
1246+
classes=None):
12461247
"""
12471248
to_html-specific options
12481249
bold_rows : boolean, default True
@@ -1266,7 +1267,33 @@ def to_html(self, buf=None, columns=None, col_space=None, colSpace=None,
12661267
index_names=index_names,
12671268
header=header, index=index,
12681269
bold_rows=bold_rows)
1269-
formatter.to_html()
1270+
formatter.to_html(classes=classes)
1271+
1272+
if buf is None:
1273+
return formatter.buf.getvalue()
1274+
1275+
@Appender(fmt.docstring_to_string, indents=1)
1276+
def to_latex(self, buf=None, columns=None, col_space=None, colSpace=None,
1277+
header=True, index=True, na_rep='NaN', formatters=None,
1278+
float_format=None, sparsify=None, index_names=True,
1279+
bold_rows=True):
1280+
"""
1281+
to_latex-specific options
1282+
bold_rows : boolean, default True
1283+
Make the row labels bold in the output
1284+
1285+
Render a DataFrame to a tabular environment table.
1286+
You can splice this into a LaTeX document.
1287+
"""
1288+
formatter = fmt.DataFrameFormatter(self, buf=buf, columns=columns,
1289+
col_space=col_space, na_rep=na_rep,
1290+
header=header, index=index,
1291+
formatters=formatters,
1292+
float_format=float_format,
1293+
bold_rows=bold_rows,
1294+
sparsify=sparsify,
1295+
index_names=index_names)
1296+
formatter.to_latex()
12701297

12711298
if buf is None:
12721299
return formatter.buf.getvalue()

pandas/tests/test_format.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import os
77
import sys
88
import unittest
9+
from textwrap import dedent
910

1011
from numpy import nan
1112
from numpy.random import randn
@@ -413,7 +414,7 @@ def test_to_html_multiindex(self):
413414
names=['CL0', 'CL1'])
414415
df = pandas.DataFrame([list('abcd'), list('efgh')], columns=columns)
415416
result = df.to_html(justify='left')
416-
expected = ('<table border="1">\n'
417+
expected = ('<table border="1" class="dataframe">\n'
417418
' <thead>\n'
418419
' <tr>\n'
419420
' <th><table><tbody><tr><td>CL0</td></tr><tr>'
@@ -455,8 +456,9 @@ def test_to_html_multiindex(self):
455456
columns = pandas.MultiIndex.from_tuples(zip(range(4),
456457
np.mod(range(4), 2)))
457458
df = pandas.DataFrame([list('abcd'), list('efgh')], columns=columns)
459+
458460
result = df.to_html(justify='right')
459-
expected = ('<table border="1">\n'
461+
expected = ('<table border="1" class="dataframe">\n'
460462
' <thead>\n'
461463
' <tr>\n'
462464
' <th></th>\n'
@@ -500,7 +502,7 @@ def test_to_html_justify(self):
500502
'C': [223442, 0, 1]},
501503
columns=['A', 'B', 'C'])
502504
result = df.to_html(justify='left')
503-
expected = ('<table border="1">\n'
505+
expected = ('<table border="1" class="dataframe">\n'
504506
' <thead>\n'
505507
' <tr style="text-align: left;">\n'
506508
' <th></th>\n'
@@ -534,7 +536,7 @@ def test_to_html_justify(self):
534536
self.assertEqual(result, expected)
535537

536538
result = df.to_html(justify='right')
537-
expected = ('<table border="1">\n'
539+
expected = ('<table border="1" class="dataframe">\n'
538540
' <thead>\n'
539541
' <tr style="text-align: right;">\n'
540542
' <th></th>\n'
@@ -594,6 +596,27 @@ def test_repr_html(self):
594596

595597
fmt.reset_printoptions()
596598

599+
def test_to_html_with_classes(self):
600+
df = pandas.DataFrame()
601+
result = df.to_html(classes="sortable draggable")
602+
expected = dedent("""
603+
604+
<table border="1" class="dataframe sortable draggable">
605+
<tbody>
606+
<tr>
607+
<td>Index([], dtype=object)</td>
608+
<td>Empty DataFrame</td>
609+
</tr>
610+
</tbody>
611+
</table>
612+
613+
""").strip()
614+
self.assertEqual(result, expected)
615+
616+
result = df.to_html(classes=["sortable", "draggable"])
617+
self.assertEqual(result, expected)
618+
619+
597620
class TestSeriesFormatting(unittest.TestCase):
598621

599622
def setUp(self):

0 commit comments

Comments
 (0)