Skip to content

Commit 3af4e33

Browse files
committed
BUG: to_html now handles column/row names and non-MultiIndex correctly, create format.py module
1 parent 8818b31 commit 3af4e33

File tree

4 files changed

+273
-269
lines changed

4 files changed

+273
-269
lines changed

RELEASE.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,9 @@ pandas 0.6.0
7272
- DataFrame constructor will use Series name if no columns passed (GH #373)
7373
- Support regular expressions and longer delimiters in read_table/read_csv,
7474
but does not handle quoted strings yet (GH #364)
75-
- Add `DataFrame.to_html` (PR #387)
75+
- Add `DataFrame.to_html` for formatting DataFrame to HTML (PR #387)
76+
- MaskedArray can be passed to DataFrame constructor and masked values will be
77+
converted to NaN (PR #396)
7678
7779
**Improvements to existing features**
7880

pandas/core/format.py

+250
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
from StringIO import StringIO
2+
from pandas.core.common import adjoin, _pfixed
3+
from pandas.core.index import MultiIndex, _ensure_index
4+
5+
6+
class DataFrameFormatter(object):
7+
"""
8+
Render a DataFrame
9+
10+
self.to_string() : console-friendly tabular output
11+
self.to_html() : html table
12+
"""
13+
def __init__(self, frame, buf=None, columns=None, col_space=None,
14+
na_rep='NaN', formatters=None, float_format=None,
15+
sparsify=True, index_names=True):
16+
17+
self.frame = frame
18+
self.buf = buf if buf is not None else StringIO()
19+
self.show_index_names = index_names
20+
self.sparsify = sparsify
21+
self.float_format = float_format
22+
self.formatters = formatters
23+
self.na_rep = na_rep
24+
self.col_space = col_space
25+
26+
if columns is not None:
27+
self.columns = _ensure_index(columns)
28+
else:
29+
self.columns = frame.columns
30+
31+
def to_string(self):
32+
"""
33+
Render a DataFrame to a console-friendly tabular output.
34+
"""
35+
frame = self.frame
36+
format_col = self._get_column_formatter()
37+
38+
to_write = []
39+
40+
if len(frame.columns) == 0 or len(frame.index) == 0:
41+
info_line = 'Empty %s\nColumns: %s\nIndex: %s'
42+
to_write.append(info_line % (type(self.frame).__name__,
43+
repr(frame.columns),
44+
repr(frame.index)))
45+
else:
46+
# may include levels names also
47+
str_index = self._get_formatted_index()
48+
str_columns = self._get_formatted_column_labels()
49+
50+
stringified = [str_columns[i] + format_col(c)
51+
for i, c in enumerate(self.columns)]
52+
53+
to_write.append(adjoin(1, str_index, *stringified))
54+
55+
for s in to_write:
56+
if isinstance(s, unicode):
57+
to_write = [unicode(s) for s in to_write]
58+
break
59+
60+
self.buf.writelines(to_write)
61+
62+
def to_html(self):
63+
"""
64+
Render a DataFrame to a html table.
65+
"""
66+
def write(buf, s, indent=0):
67+
buf.write(unicode((' ' * indent) + str(s) + '\n'))
68+
69+
def write_th(buf, s, indent=0):
70+
write(buf, '<th>%s</th>' % str(s), indent)
71+
72+
def write_td(buf, s, indent=0):
73+
write(buf, '<td>%s</td>' % str(s), indent)
74+
75+
def write_tr(buf, l, indent=0, indent_delta=4, header=False):
76+
write(buf, '<tr>', indent)
77+
indent += indent_delta
78+
if header:
79+
for s in l:
80+
write_th(buf, s, indent)
81+
else:
82+
for s in l:
83+
write_td(buf, s, indent)
84+
indent -= indent_delta
85+
write(buf, '</tr>', indent)
86+
87+
def single_column_table(column):
88+
table = '<table><tbody>'
89+
for i in column:
90+
table += ('<tr><td>%s</td></tr>' % str(i))
91+
table += '</tbody></table>'
92+
return table
93+
94+
def single_row_table(row):
95+
table = '<table><tbody><tr>'
96+
for i in row:
97+
table += ('<td>%s</td>' % str(i))
98+
table += '</tr></tbody></table>'
99+
return table
100+
101+
indent = 0
102+
indent_delta = 2
103+
frame = self.frame
104+
buf = self.buf
105+
format_col = self._get_column_formatter()
106+
107+
write(buf, '<table border="1">', indent)
108+
109+
def _column_header():
110+
row = [''] * (frame.index.nlevels - 1)
111+
112+
if isinstance(frame.columns, MultiIndex):
113+
if self.has_column_names:
114+
row.append(single_column_table(frame.columns.names))
115+
row.extend([single_column_table(c) for c in frame.columns])
116+
else:
117+
row.append(frame.columns.name or '')
118+
row.extend(frame.columns)
119+
return row
120+
121+
if len(frame.columns) == 0 or len(frame.index) == 0:
122+
write(buf, '<tbody>', indent + indent_delta)
123+
write_tr(buf,
124+
[repr(frame.index),
125+
'Empty %s' % type(self.frame).__name__],
126+
indent + (2 * indent_delta),
127+
indent_delta)
128+
write(buf, '</tbody>', indent + indent_delta)
129+
else:
130+
indent += indent_delta
131+
write(buf, '<thead>', indent)
132+
row = []
133+
134+
# header row
135+
col_row = _column_header()
136+
indent += indent_delta
137+
write_tr(buf, col_row, indent, indent_delta, header=True)
138+
if self.has_index_names:
139+
row = frame.index.names + [''] * len(frame.columns)
140+
write_tr(buf, row, indent, indent_delta, header=True)
141+
write(buf, '</thead>', indent)
142+
write(buf, '<tbody>', indent)
143+
144+
# write values
145+
for i in range(len(frame)):
146+
row = []
147+
try:
148+
row.extend(frame.index[i])
149+
except TypeError:
150+
row.append(frame.index[i])
151+
for column in frame.columns:
152+
row.append(format_col(column, i))
153+
write_tr(buf, row, indent, indent_delta)
154+
indent -= indent_delta
155+
write(buf, '</body>', indent)
156+
indent -= indent_delta
157+
158+
write(buf, '</table>', indent)
159+
160+
def _get_column_formatter(self):
161+
from pandas.core.common import _format
162+
163+
col_space = self.col_space
164+
165+
if col_space is None:
166+
def _myformat(v):
167+
return _format(v, na_rep=self.na_rep,
168+
float_format=self.float_format)
169+
else:
170+
def _myformat(v):
171+
return _pfixed(v, col_space, na_rep=self.na_rep,
172+
float_format=self.float_format)
173+
174+
formatters = {} if self.formatters is None else self.formatters
175+
176+
def _format_col(col, i=None):
177+
formatter = formatters.get(col, _myformat)
178+
if i == None:
179+
return [formatter(x) for x in self.frame[col]]
180+
else:
181+
return formatter(self.frame[col][i])
182+
183+
return _format_col
184+
185+
def _get_formatted_column_labels(self):
186+
from pandas.core.index import _sparsify
187+
188+
if isinstance(self.columns, MultiIndex):
189+
fmt_columns = self.columns.format(sparsify=False, adjoin=False)
190+
str_columns = zip(*[[' %s' % y for y in x]
191+
for x in zip(*fmt_columns)])
192+
if self.sparsify:
193+
str_columns = _sparsify(str_columns)
194+
195+
str_columns = [list(x) for x in zip(*str_columns)]
196+
else:
197+
str_columns = [[' %s' % x] for x in self.columns.format()]
198+
199+
if self.show_index_names and self.has_index_names:
200+
for x in str_columns:
201+
x.append('')
202+
203+
return str_columns
204+
205+
@property
206+
def has_index_names(self):
207+
return _has_names(self.frame.index)
208+
209+
@property
210+
def has_column_names(self):
211+
return _has_names(self.frame.columns)
212+
213+
def _get_formatted_index(self):
214+
index = self.frame.index
215+
columns = self.frame.columns
216+
217+
show_index_names = self.show_index_names and self.has_index_names
218+
show_col_names = self.show_index_names and self.has_column_names
219+
220+
if isinstance(index, MultiIndex):
221+
fmt_index = index.format(sparsify=self.sparsify, adjoin=False,
222+
names=show_index_names)
223+
else:
224+
fmt_index = [index.format(name=show_index_names)]
225+
226+
adjoined = adjoin(1, *fmt_index).split('\n')
227+
228+
# empty space for columns
229+
if show_col_names:
230+
col_header = [' %s' % x for x in self._get_column_name_list()]
231+
else:
232+
col_header = [''] * columns.nlevels
233+
234+
return col_header + adjoined
235+
236+
def _get_column_name_list(self):
237+
names = []
238+
columns = self.frame.columns
239+
if isinstance(columns, MultiIndex):
240+
names.extend('' if name is None else name
241+
for name in columns.names)
242+
else:
243+
names.append('' if columns.name is None else columns.name)
244+
return names
245+
246+
def _has_names(index):
247+
if isinstance(index, MultiIndex):
248+
return any([x is not None for x in index.names])
249+
else:
250+
return index.name is not None

0 commit comments

Comments
 (0)