diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 725dc7fc52ed0..e45484ed17251 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -228,6 +228,7 @@ Other enhancements - ``pd.TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`) - ``pd.types.concat.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs ` for more information. - ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) +- ``pd.DataFrame.to_latex`` and ``pd.DataFrame.to_string`` now allow optional header aliases. (:issue:`15536`) .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b3e43edc3eb55..a857e18d96272 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1529,6 +1529,8 @@ def to_feather(self, fname): from pandas.io.feather_format import to_feather to_feather(self, fname) + @Substitution(header='Write out column names. If a list of string is given, \ +it is assumed to be aliases for the column names') @Appender(fmt.docstring_to_string, indents=1) def to_string(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, @@ -1556,6 +1558,7 @@ def to_string(self, buf=None, columns=None, col_space=None, header=True, result = formatter.buf.getvalue() return result + @Substitution(header='whether to print column labels, default True') @Appender(fmt.docstring_to_string, indents=1) def to_html(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, @@ -1609,6 +1612,8 @@ def to_html(self, buf=None, columns=None, col_space=None, header=True, if buf is None: return formatter.buf.getvalue() + @Substitution(header='Write out column names. If a list of string is given, \ +it is assumed to be aliases for the column names.') @Appender(fmt.common_docstring + fmt.return_docstring, indents=1) def to_latex(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, diff --git a/pandas/formats/format.py b/pandas/formats/format.py index 9dde3b0001c31..d90edbe7b7e7c 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -20,9 +20,9 @@ is_float, is_numeric_dtype, is_datetime64_dtype, - is_timedelta64_dtype) + is_timedelta64_dtype, + is_list_like) from pandas.types.generic import ABCSparseArray - from pandas.core.base import PandasObject from pandas.core.index import Index, MultiIndex, _ensure_index from pandas import compat @@ -53,7 +53,7 @@ col_space : int, optional the minimum width of each column header : bool, optional - whether to print column labels, default True + %(header)s index : bool, optional whether to print index (row) labels, default True na_rep : string, optional @@ -487,32 +487,38 @@ def _to_str_columns(self): # may include levels names also str_index = self._get_formatted_index(frame) - str_columns = self._get_formatted_column_labels(frame) - if self.header: + if not is_list_like(self.header) and not self.header: stringified = [] for i, c in enumerate(frame): - cheader = str_columns[i] - max_colwidth = max(self.col_space or 0, *(self.adj.len(x) - for x in cheader)) fmt_values = self._format_col(i) fmt_values = _make_fixed_width(fmt_values, self.justify, - minimum=max_colwidth, + minimum=(self.col_space or 0), adj=self.adj) - - max_len = max(np.max([self.adj.len(x) for x in fmt_values]), - max_colwidth) - cheader = self.adj.justify(cheader, max_len, mode=self.justify) - stringified.append(cheader + fmt_values) + stringified.append(fmt_values) else: + if is_list_like(self.header): + if len(self.header) != len(self.columns): + raise ValueError(('Writing %d cols but got %d aliases' + % (len(self.columns), len(self.header)))) + str_columns = [[label] for label in self.header] + else: + str_columns = self._get_formatted_column_labels(frame) + stringified = [] for i, c in enumerate(frame): + cheader = str_columns[i] + header_colwidth = max(self.col_space or 0, + *(self.adj.len(x) for x in cheader)) fmt_values = self._format_col(i) fmt_values = _make_fixed_width(fmt_values, self.justify, - minimum=(self.col_space or 0), + minimum=header_colwidth, adj=self.adj) - stringified.append(fmt_values) + max_len = max(np.max([self.adj.len(x) for x in fmt_values]), + header_colwidth) + cheader = self.adj.justify(cheader, max_len, mode=self.justify) + stringified.append(cheader + fmt_values) strcols = stringified if self.index: diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/formats/test_format.py index ddf9d35841ce7..b1f163ccf9429 100644 --- a/pandas/tests/formats/test_format.py +++ b/pandas/tests/formats/test_format.py @@ -1125,6 +1125,17 @@ def test_to_string_no_header(self): self.assertEqual(df_s, expected) + def test_to_string_specified_header(self): + df = DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) + + df_s = df.to_string(header=['X', 'Y']) + expected = ' X Y\n0 1 4\n1 2 5\n2 3 6' + + self.assertEqual(df_s, expected) + + with tm.assertRaises(ValueError): + df.to_string(header=['X']) + def test_to_string_no_index(self): df = DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) diff --git a/pandas/tests/formats/test_to_latex.py b/pandas/tests/formats/test_to_latex.py index 17e1e18f03dd6..29ead83f3bcd9 100644 --- a/pandas/tests/formats/test_to_latex.py +++ b/pandas/tests/formats/test_to_latex.py @@ -428,6 +428,51 @@ def test_to_latex_no_header(self): assert withoutindex_result == withoutindex_expected + def test_to_latex_specified_header(self): + # GH 7124 + df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) + withindex_result = df.to_latex(header=['AA', 'BB']) + withindex_expected = r"""\begin{tabular}{lrl} +\toprule +{} & AA & BB \\ +\midrule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withindex_result == withindex_expected + + withoutindex_result = df.to_latex(header=['AA', 'BB'], index=False) + withoutindex_expected = r"""\begin{tabular}{rl} +\toprule +AA & BB \\ +\midrule + 1 & b1 \\ + 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withoutindex_result == withoutindex_expected + + withoutescape_result = df.to_latex(header=['$A$', '$B$'], escape=False) + withoutescape_expected = r"""\begin{tabular}{lrl} +\toprule +{} & $A$ & $B$ \\ +\midrule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withoutescape_result == withoutescape_expected + + with tm.assertRaises(ValueError): + df.to_latex(header=['A']) + def test_to_latex_decimal(self, frame): # GH 12031 frame.to_latex()