ENH: to_string/to_latex now accept list-like header arg for overwriting column names (pandas-dev#15548)

mcocdawc · jorisvandenbossche · commit ae0a92a68b98 · 2017-03-09T11:58:48.000+01:00
closes pandas-dev#15536
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
@@ -227,6 +227,7 @@ Other enhancements
 - ``pd.TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`)
 - ``pd.types.concat.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs <categorical.union>` for more information.
 - ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`)
+- ``pd.DataFrame.to_latex`` and ``pd.DataFrame.to_string`` now allow optional header aliases. (:issue:`15536`)
 
 .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1516,6 +1516,8 @@ def to_feather(self, fname):
         from pandas.io.feather_format import to_feather
         to_feather(self, fname)
 
+    @Substitution(header='Write out column names. If a list of string is given, \
+it is assumed to be aliases for the column names')
     @Appender(fmt.docstring_to_string, indents=1)
     def to_string(self, buf=None, columns=None, col_space=None, header=True,
                   index=True, na_rep='NaN', formatters=None, float_format=None,
@@ -1543,6 +1545,7 @@ def to_string(self, buf=None, columns=None, col_space=None, header=True,
             result = formatter.buf.getvalue()
             return result
 
+    @Substitution(header='whether to print column labels, default True')
     @Appender(fmt.docstring_to_string, indents=1)
     def to_html(self, buf=None, columns=None, col_space=None, header=True,
                 index=True, na_rep='NaN', formatters=None, float_format=None,
@@ -1596,6 +1599,8 @@ def to_html(self, buf=None, columns=None, col_space=None, header=True,
         if buf is None:
             return formatter.buf.getvalue()
 
+    @Substitution(header='Write out column names. If a list of string is given, \
+it is assumed to be aliases for the column names.')
     @Appender(fmt.common_docstring + fmt.return_docstring, indents=1)
     def to_latex(self, buf=None, columns=None, col_space=None, header=True,
                  index=True, na_rep='NaN', formatters=None, float_format=None,
diff --git a/pandas/formats/format.py b/pandas/formats/format.py
@@ -20,9 +20,9 @@
                                  is_float,
                                  is_numeric_dtype,
                                  is_datetime64_dtype,
-                                 is_timedelta64_dtype)
+                                 is_timedelta64_dtype,
+                                 is_list_like)
 from pandas.types.generic import ABCSparseArray
-
 from pandas.core.base import PandasObject
 from pandas.core.index import Index, MultiIndex, _ensure_index
 from pandas import compat
@@ -54,7 +54,7 @@
     col_space : int, optional
         the minimum width of each column
     header : bool, optional
-        whether to print column labels, default True
+        %(header)s
     index : bool, optional
         whether to print index (row) labels, default True
     na_rep : string, optional
@@ -488,32 +488,38 @@ def _to_str_columns(self):
         # may include levels names also
 
         str_index = self._get_formatted_index(frame)
-        str_columns = self._get_formatted_column_labels(frame)
 
-        if self.header:
+        if not is_list_like(self.header) and not self.header:
             stringified = []
             for i, c in enumerate(frame):
-                cheader = str_columns[i]
-                max_colwidth = max(self.col_space or 0, *(self.adj.len(x)
-                                                          for x in cheader))
                 fmt_values = self._format_col(i)
                 fmt_values = _make_fixed_width(fmt_values, self.justify,
-                                               minimum=max_colwidth,
+                                               minimum=(self.col_space or 0),
                                                adj=self.adj)
-
-                max_len = max(np.max([self.adj.len(x) for x in fmt_values]),
-                              max_colwidth)
-                cheader = self.adj.justify(cheader, max_len, mode=self.justify)
-                stringified.append(cheader + fmt_values)
+                stringified.append(fmt_values)
         else:
+            if is_list_like(self.header):
+                if len(self.header) != len(self.columns):
+                    raise ValueError(('Writing %d cols but got %d aliases'
+                                      % (len(self.columns), len(self.header))))
+                str_columns = [[label] for label in self.header]
+            else:
+                str_columns = self._get_formatted_column_labels(frame)
+
             stringified = []
             for i, c in enumerate(frame):
+                cheader = str_columns[i]
+                header_colwidth = max(self.col_space or 0,
+                                      *(self.adj.len(x) for x in cheader))
                 fmt_values = self._format_col(i)
                 fmt_values = _make_fixed_width(fmt_values, self.justify,
-                                               minimum=(self.col_space or 0),
+                                               minimum=header_colwidth,
                                                adj=self.adj)
 
-                stringified.append(fmt_values)
+                max_len = max(np.max([self.adj.len(x) for x in fmt_values]),
+                              header_colwidth)
+                cheader = self.adj.justify(cheader, max_len, mode=self.justify)
+                stringified.append(cheader + fmt_values)
 
         strcols = stringified
         if self.index:
diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/formats/test_format.py
@@ -1125,6 +1125,17 @@ def test_to_string_no_header(self):
 
         self.assertEqual(df_s, expected)
 
+    def test_to_string_specified_header(self):
+        df = DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
+
+        df_s = df.to_string(header=['X', 'Y'])
+        expected = '   X  Y\n0  1  4\n1  2  5\n2  3  6'
+
+        self.assertEqual(df_s, expected)
+
+        with tm.assertRaises(ValueError):
+            df.to_string(header=['X'])
+
     def test_to_string_no_index(self):
         df = DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
 
diff --git a/pandas/tests/formats/test_to_latex.py b/pandas/tests/formats/test_to_latex.py
@@ -428,6 +428,51 @@ def test_to_latex_no_header(self):
 
         assert withoutindex_result == withoutindex_expected
 
+    def test_to_latex_specified_header(self):
+        # GH 7124
+        df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']})
+        withindex_result = df.to_latex(header=['AA', 'BB'])
+        withindex_expected = r"""\begin{tabular}{lrl}
+\toprule
+{} & AA &  BB \\
+\midrule
+0 &  1 &  b1 \\
+1 &  2 &  b2 \\
+\bottomrule
+\end{tabular}
+"""
+
+        assert withindex_result == withindex_expected
+
+        withoutindex_result = df.to_latex(header=['AA', 'BB'], index=False)
+        withoutindex_expected = r"""\begin{tabular}{rl}
+\toprule
+AA &  BB \\
+\midrule
+ 1 &  b1 \\
+ 2 &  b2 \\
+\bottomrule
+\end{tabular}
+"""
+
+        assert withoutindex_result == withoutindex_expected
+
+        withoutescape_result = df.to_latex(header=['$A$', '$B$'], escape=False)
+        withoutescape_expected = r"""\begin{tabular}{lrl}
+\toprule
+{} & $A$ & $B$ \\
+\midrule
+0 &   1 &  b1 \\
+1 &   2 &  b2 \\
+\bottomrule
+\end{tabular}
+"""
+
+        assert withoutescape_result == withoutescape_expected
+
+        with tm.assertRaises(ValueError):
+            df.to_latex(header=['A'])
+
     def test_to_latex_decimal(self, frame):
         # GH 12031
         frame.to_latex()