From 191bfe8bcb15e1a5fb05641ff1926fcaace44969 Mon Sep 17 00:00:00 2001 From: Wouter Overmeire Date: Fri, 18 Nov 2011 08:16:15 +0100 Subject: [PATCH] Add DataFrame.to_html() --- pandas/core/frame.py | 146 +++++++++++++++++++++++++++++++++++-- pandas/tests/test_frame.py | 28 +++++++ 2 files changed, 166 insertions(+), 8 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0e22965d13e21..2868cb5927270 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -678,8 +678,9 @@ def to_csv(self, path, sep=",", na_rep='', cols=None, header=True, def to_string(self, buf=None, columns=None, colSpace=None, na_rep='NaN', formatters=None, float_format=None, sparsify=True, nanRep=None, index_names=True): - - + """ + Render a DataFrame to a console-friendly tabular output. + """ if nanRep is not None: # pragma: no cover import warnings warnings.warn("nanRep is deprecated, use na_rep", @@ -693,6 +694,24 @@ def to_string(self, buf=None, columns=None, colSpace=None, float_format=float_format, sparsify=sparsify, index_names=index_names) + formatter.to_string() + + if buf is None: + return formatter.buf.getvalue() + + def to_html(self, buf=None, columns=None, colSpace=None, + na_rep='NaN', formatters=None, float_format=None, + sparsify=True, index_names=True): + """ + Render a DataFrame to a html table. + """ + formatter = _DataFrameFormatter(self, buf=buf, columns=columns, + col_space=colSpace, na_rep=na_rep, + formatters=formatters, + float_format=float_format, + sparsify=sparsify, + index_names=index_names) + formatter.to_html() if buf is None: return formatter.buf.getvalue() @@ -3230,7 +3249,10 @@ def combineMult(self, other): class _DataFrameFormatter(object): """ - Render a console-friendly tabular output of a DataFrame + Render a DataFrame + + self.to_string() : console-friendly tabular output + self.to_html() : html table """ def __init__(self, frame, buf=None, columns=None, col_space=None, na_rep='NaN', formatters=None, float_format=None, @@ -3250,9 +3272,10 @@ def __init__(self, frame, buf=None, columns=None, col_space=None, else: self.columns = frame.columns - self._write_to_buffer() - - def _write_to_buffer(self): + def to_string(self): + """ + Render a DataFrame to a console-friendly tabular output. + """ frame = self.frame format_col = self._get_column_formatter() @@ -3280,6 +3303,110 @@ def _write_to_buffer(self): self.buf.writelines(to_write) + def to_html(self): + """ + Render a DataFrame to a html table. + """ + def write(buf, s, indent=0): + buf.write(unicode((' ' * indent) + str(s) + '\n')) + + def write_th(buf, s, indent=0): + write(buf, '%s' % str(s), indent) + + def write_td(buf, s, indent=0): + write(buf, '%s' % str(s), indent) + + def write_tr(buf, l, indent=0, indent_delta=4, header=False): + write(buf, '', indent) + indent += indent_delta + if header: + for s in l: + write_th(buf, s, indent) + else: + for s in l: + write_td(buf, s, indent) + indent -= indent_delta + write(buf, '', indent) + + def single_column_table(column): + table = '' + for i in column: + table += ('' % str(i)) + table += '
%s
' + return table + + def single_row_table(row): + table = '' + for i in row: + table += ('' % str(i)) + table += '
%s
' + return table + + indent = 0 + indent_delta = 2 + frame = self.frame + buf = self.buf + format_col = self._get_column_formatter() + + write(buf, '', indent) + + if len(frame.columns) == 0 or len(frame.index) == 0: + write(buf, '', indent + indent_delta) + write_tr(buf, + [repr(frame.index), + 'Empty %s' % type(self.frame).__name__], + indent + (2 * indent_delta), + indent_delta) + write(buf, '', indent + indent_delta) + else: + indent += indent_delta + write(buf, '', indent) + row = [] + + if isinstance(frame.index, MultiIndex): + if self.has_index_names: + row.extend(frame.index.names) + else: + row.extend([''] * frame.index.nlevels) + else: + row.append(' ') + + if isinstance(frame.columns, MultiIndex): + row.extend([single_column_table(c) for c in frame.columns]) + if self.has_column_names: + names = single_column_table(frame.columns.names) + idx = len(frame.columns) + row[-idx] = single_row_table([names, row[-idx]]) + else: + row.append('') + + indent += indent_delta + write_tr(buf, + row, + indent, + indent_delta, + header=True) + write(buf, '', indent) + + write(buf, '', indent) + for i in range(len(frame)): + row = [] + try: + row.extend(frame.index[i]) + except TypeError: + row.append(frame.index[i]) + for column in frame.columns: + row.append(format_col(column, i)) + write_tr(buf, + row, + indent, + indent_delta) + indent -= indent_delta + write(buf, '', indent) + indent -= indent_delta + + write(buf, '
', indent) + def _get_column_formatter(self): from pandas.core.common import _format @@ -3296,9 +3423,12 @@ def _myformat(v): formatters = {} if self.formatters is None else self.formatters - def _format_col(col): + def _format_col(col, i=None): formatter = formatters.get(col, _myformat) - return [formatter(x) for x in self.frame[col]] + if i == None: + return [formatter(x) for x in self.frame[col]] + else: + return formatter(self.frame[col][i]) return _format_col diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index ab54610572586..e89a3315e44e5 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1415,6 +1415,34 @@ def test_to_string(self): frame = DataFrame(index=np.arange(1000)) frame.to_string() + def test_to_html(self): + # big mixed + biggie = DataFrame({'A' : randn(1000), + 'B' : tm.makeStringIndex(1000)}, + index=range(1000)) + + biggie['A'][:20] = nan + biggie['B'][:20] = nan + s = biggie.to_html() + + buf = StringIO() + retval = biggie.to_html(buf=buf) + self.assert_(retval is None) + self.assertEqual(buf.getvalue(), s) + + self.assert_(isinstance(s, basestring)) + + biggie.to_html(columns=['B', 'A'], colSpace=17) + biggie.to_html(columns=['B', 'A'], + formatters={'A' : lambda x: '%.1f' % x}) + + biggie.to_html(columns=['B', 'A'], float_format=str) + biggie.to_html(columns=['B', 'A'], colSpace=12, + float_format=str) + + frame = DataFrame(index=np.arange(1000)) + frame.to_html() + def test_insert(self): df = DataFrame(np.random.randn(5, 3), index=np.arange(5), columns=['c', 'b', 'a'])