Skip to content

ENH: IO header formatting #15548

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Mar 9, 2017
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ Other enhancements
- ``pd.TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`)
- ``pd.types.concat.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs <categorical.union>` for more information.
- ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`)
- ``pd.DataFrame.to_latex`` and ``pd.DataFrame.to_string`` now allow optional header aliases. (:issue:`15536`)

.. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations

Expand Down
5 changes: 5 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1529,6 +1529,8 @@ def to_feather(self, fname):
from pandas.io.feather_format import to_feather
to_feather(self, fname)

@Substitution(header='Write out column names. If a list of string is given, \
it is assumed to be aliases for the column names')
@Appender(fmt.docstring_to_string, indents=1)
def to_string(self, buf=None, columns=None, col_space=None, header=True,
index=True, na_rep='NaN', formatters=None, float_format=None,
Expand Down Expand Up @@ -1556,6 +1558,7 @@ def to_string(self, buf=None, columns=None, col_space=None, header=True,
result = formatter.buf.getvalue()
return result

@Substitution(header='whether to print column labels, default True')
@Appender(fmt.docstring_to_string, indents=1)
def to_html(self, buf=None, columns=None, col_space=None, header=True,
index=True, na_rep='NaN', formatters=None, float_format=None,
Expand Down Expand Up @@ -1609,6 +1612,8 @@ def to_html(self, buf=None, columns=None, col_space=None, header=True,
if buf is None:
return formatter.buf.getvalue()

@Substitution(header='Write out column names. If a list of string is given, \
it is assumed to be aliases for the column names.')
@Appender(fmt.common_docstring + fmt.return_docstring, indents=1)
def to_latex(self, buf=None, columns=None, col_space=None, header=True,
index=True, na_rep='NaN', formatters=None, float_format=None,
Expand Down
38 changes: 22 additions & 16 deletions pandas/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
is_float,
is_numeric_dtype,
is_datetime64_dtype,
is_timedelta64_dtype)
is_timedelta64_dtype,
is_list_like)
from pandas.types.generic import ABCSparseArray

from pandas.core.base import PandasObject
from pandas.core.index import Index, MultiIndex, _ensure_index
from pandas import compat
Expand Down Expand Up @@ -53,7 +53,7 @@
col_space : int, optional
the minimum width of each column
header : bool, optional
whether to print column labels, default True
%(header)s
index : bool, optional
whether to print index (row) labels, default True
na_rep : string, optional
Expand Down Expand Up @@ -487,32 +487,38 @@ def _to_str_columns(self):
# may include levels names also

str_index = self._get_formatted_index(frame)
str_columns = self._get_formatted_column_labels(frame)

if self.header:
if not is_list_like(self.header) and not self.header:
stringified = []
for i, c in enumerate(frame):
cheader = str_columns[i]
max_colwidth = max(self.col_space or 0, *(self.adj.len(x)
for x in cheader))
fmt_values = self._format_col(i)
fmt_values = _make_fixed_width(fmt_values, self.justify,
minimum=max_colwidth,
minimum=(self.col_space or 0),
adj=self.adj)

max_len = max(np.max([self.adj.len(x) for x in fmt_values]),
max_colwidth)
cheader = self.adj.justify(cheader, max_len, mode=self.justify)
stringified.append(cheader + fmt_values)
stringified.append(fmt_values)
else:
if is_list_like(self.header):
if len(self.header) != len(self.columns):
raise ValueError(('Writing %d cols but got %d aliases'
% (len(self.columns), len(self.header))))
str_columns = [[label] for label in self.header]
else:
str_columns = self._get_formatted_column_labels(frame)

stringified = []
for i, c in enumerate(frame):
cheader = str_columns[i]
header_colwidth = max(self.col_space or 0,
*(self.adj.len(x) for x in cheader))
fmt_values = self._format_col(i)
fmt_values = _make_fixed_width(fmt_values, self.justify,
minimum=(self.col_space or 0),
minimum=header_colwidth,
adj=self.adj)

stringified.append(fmt_values)
max_len = max(np.max([self.adj.len(x) for x in fmt_values]),
header_colwidth)
cheader = self.adj.justify(cheader, max_len, mode=self.justify)
stringified.append(cheader + fmt_values)

strcols = stringified
if self.index:
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/formats/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1125,6 +1125,17 @@ def test_to_string_no_header(self):

self.assertEqual(df_s, expected)

def test_to_string_specified_header(self):
df = DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})

df_s = df.to_string(header=['X', 'Y'])
expected = ' X Y\n0 1 4\n1 2 5\n2 3 6'

self.assertEqual(df_s, expected)

with tm.assertRaises(ValueError):
df.to_string(header=['X'])

def test_to_string_no_index(self):
df = DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})

Expand Down
45 changes: 45 additions & 0 deletions pandas/tests/formats/test_to_latex.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,51 @@ def test_to_latex_no_header(self):

assert withoutindex_result == withoutindex_expected

def test_to_latex_specified_header(self):
# GH 7124
df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']})
withindex_result = df.to_latex(header=['AA', 'BB'])
withindex_expected = r"""\begin{tabular}{lrl}
\toprule
{} & AA & BB \\
\midrule
0 & 1 & b1 \\
1 & 2 & b2 \\
\bottomrule
\end{tabular}
"""

assert withindex_result == withindex_expected

withoutindex_result = df.to_latex(header=['AA', 'BB'], index=False)
withoutindex_expected = r"""\begin{tabular}{rl}
\toprule
AA & BB \\
\midrule
1 & b1 \\
2 & b2 \\
\bottomrule
\end{tabular}
"""

assert withoutindex_result == withoutindex_expected

withoutescape_result = df.to_latex(header=['$A$', '$B$'], escape=False)
withoutescape_expected = r"""\begin{tabular}{lrl}
\toprule
{} & $A$ & $B$ \\
\midrule
0 & 1 & b1 \\
1 & 2 & b2 \\
\bottomrule
\end{tabular}
"""

assert withoutescape_result == withoutescape_expected

with tm.assertRaises(ValueError):
df.to_latex(header=['A'])

def test_to_latex_decimal(self, frame):
# GH 12031
frame.to_latex()
Expand Down