diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index cd0714838a3f1..4090a07fee5a5 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -20,8 +20,7 @@ including other versions of pandas. Enhancements ~~~~~~~~~~~~ - -- +- :meth:`DataFrame.to_latex` now accepts ``caption`` and ``label`` arguments (:issue:`25436`) - .. _whatsnew_1000.enhancements.other: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1a5b36b07e93c..b427b1f0ac858 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2925,15 +2925,21 @@ def to_latex( multicolumn=None, multicolumn_format=None, multirow=None, + caption=None, + label=None, ): r""" - Render an object to a LaTeX tabular environment table. + Render object to a LaTeX tabular, longtable, or nested table/tabular. - Render an object to a tabular environment table. You can splice - this into a LaTeX document. Requires \usepackage{booktabs}. + Requires ``\usepackage{booktabs}``. The output can be copy/pasted + into a main LaTeX document or read from an external file + with ``\input{table.tex}``. .. versionchanged:: 0.20.2 - Added to Series + Added to Series. + + .. versionchanged:: 1.0.0 + Added caption and label arguments. Parameters ---------- @@ -3002,6 +3008,17 @@ def to_latex( from the pandas config module. .. versionadded:: 0.20.0 + + caption : str, optional + The LaTeX caption to be placed inside ``\caption{}`` in the output. + + .. versionadded:: 1.0.0 + + label : str, optional + The LaTeX label to be placed inside ``\label{}`` in the output. + This is used with ``\ref{}`` in the main ``.tex`` file. + + .. versionadded:: 1.0.0 %(returns)s See Also -------- @@ -3014,7 +3031,7 @@ def to_latex( >>> df = pd.DataFrame({'name': ['Raphael', 'Donatello'], ... 'mask': ['red', 'purple'], ... 'weapon': ['sai', 'bo staff']}) - >>> print(df.to_latex(index=False)) # doctest: +NORMALIZE_WHITESPACE + >>> print(df.to_latex(index=False)) # doctest: +NORMALIZE_WHITESPACE \begin{tabular}{lll} \toprule name & mask & weapon \\ @@ -3061,6 +3078,8 @@ def to_latex( multicolumn=multicolumn, multicolumn_format=multicolumn_format, multirow=multirow, + caption=caption, + label=label, ) def to_csv( diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 8ff4b9bda0430..f8db1b19dadfa 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -888,6 +888,8 @@ def to_latex( multicolumn: bool = False, multicolumn_format: Optional[str] = None, multirow: bool = False, + caption: Optional[str] = None, + label: Optional[str] = None, ) -> Optional[str]: """ Render a DataFrame to a LaTeX tabular/longtable environment output. @@ -902,6 +904,8 @@ def to_latex( multicolumn=multicolumn, multicolumn_format=multicolumn_format, multirow=multirow, + caption=caption, + label=label, ).get_result(buf=buf, encoding=encoding) def _format_col(self, i: int) -> List[str]: diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index 4c4d5ec73269a..ca9db88ae7be4 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -36,6 +36,8 @@ def __init__( multicolumn: bool = False, multicolumn_format: Optional[str] = None, multirow: bool = False, + caption: Optional[str] = None, + label: Optional[str] = None, ): self.fmt = formatter self.frame = self.fmt.frame @@ -45,11 +47,14 @@ def __init__( self.multicolumn = multicolumn self.multicolumn_format = multicolumn_format self.multirow = multirow + self.caption = caption + self.label = label self.escape = self.fmt.escape def write_result(self, buf: IO[str]) -> None: """ - Render a DataFrame to a LaTeX tabular/longtable environment output. + Render a DataFrame to a LaTeX tabular, longtable, or table/tabular + environment output. """ # string representation of the columns @@ -114,12 +119,12 @@ def pad_empties(x): "not {typ}".format(typ=type(column_format)) ) - if not self.longtable: - buf.write("\\begin{{tabular}}{{{fmt}}}\n".format(fmt=column_format)) - buf.write("\\toprule\n") + if self.longtable: + self._write_longtable_begin(buf, column_format) else: - buf.write("\\begin{{longtable}}{{{fmt}}}\n".format(fmt=column_format)) - buf.write("\\toprule\n") + self._write_tabular_begin(buf, column_format) + + buf.write("\\toprule\n") ilevels = self.frame.index.nlevels clevels = self.frame.columns.nlevels @@ -183,11 +188,10 @@ def pad_empties(x): if self.multirow and i < len(strrows) - 1: self._print_cline(buf, i, len(strcols)) - if not self.longtable: - buf.write("\\bottomrule\n") - buf.write("\\end{tabular}\n") + if self.longtable: + self._write_longtable_end(buf) else: - buf.write("\\end{longtable}\n") + self._write_tabular_end(buf) def _format_multicolumn(self, row: List[str], ilevels: int) -> List[str]: r""" @@ -268,3 +272,107 @@ def _print_cline(self, buf: IO[str], i: int, icol: int) -> None: buf.write("\\cline{{{cl:d}-{icol:d}}}\n".format(cl=cl[1], icol=icol)) # remove entries that have been written to buffer self.clinebuf = [x for x in self.clinebuf if x[0] != i] + + def _write_tabular_begin(self, buf, column_format): + """ + Write the beginning of a tabular environment or + nested table/tabular environments including caption and label. + + Parameters + ---------- + buf : string or file handle + File path or object. If not specified, the result is returned as + a string. + column_format : str, default None + The columns format as specified in `LaTeX table format + `__ e.g 'rcl' + for 3 columns + + """ + if self.caption is not None or self.label is not None: + # then write output in a nested table/tabular environment + if self.caption is None: + caption_ = "" + else: + caption_ = "\n\\caption{{{}}}".format(self.caption) + + if self.label is None: + label_ = "" + else: + label_ = "\n\\label{{{}}}".format(self.label) + + buf.write("\\begin{{table}}\n\\centering{}{}\n".format(caption_, label_)) + else: + # then write output only in a tabular environment + pass + + buf.write("\\begin{{tabular}}{{{fmt}}}\n".format(fmt=column_format)) + + def _write_tabular_end(self, buf): + """ + Write the end of a tabular environment or nested table/tabular + environment. + + Parameters + ---------- + buf : string or file handle + File path or object. If not specified, the result is returned as + a string. + + """ + buf.write("\\bottomrule\n") + buf.write("\\end{tabular}\n") + if self.caption is not None or self.label is not None: + buf.write("\\end{table}\n") + else: + pass + + def _write_longtable_begin(self, buf, column_format): + """ + Write the beginning of a longtable environment including caption and + label if provided by user. + + Parameters + ---------- + buf : string or file handle + File path or object. If not specified, the result is returned as + a string. + column_format : str, default None + The columns format as specified in `LaTeX table format + `__ e.g 'rcl' + for 3 columns + + """ + buf.write("\\begin{{longtable}}{{{fmt}}}\n".format(fmt=column_format)) + + if self.caption is not None or self.label is not None: + if self.caption is None: + pass + else: + buf.write("\\caption{{{}}}".format(self.caption)) + + if self.label is None: + pass + else: + buf.write("\\label{{{}}}".format(self.label)) + + # a double-backslash is required at the end of the line + # as discussed here: + # https://tex.stackexchange.com/questions/219138 + buf.write("\\\\\n") + else: + pass + + @staticmethod + def _write_longtable_end(buf): + """ + Write the end of a longtable environment. + + Parameters + ---------- + buf : string or file handle + File path or object. If not specified, the result is returned as + a string. + + """ + buf.write("\\end{longtable}\n") diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index 924b2a19e8504..9ffb54d23e37e 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -388,8 +388,7 @@ def test_to_latex_special_escape(self): """ assert escaped_result == escaped_expected - def test_to_latex_longtable(self, float_frame): - float_frame.to_latex(longtable=True) + def test_to_latex_longtable(self): df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) withindex_result = df.to_latex(longtable=True) @@ -439,6 +438,141 @@ def test_to_latex_longtable(self, float_frame): with3columns_result = df.to_latex(index=False, longtable=True) assert r"\multicolumn{3}" in with3columns_result + def test_to_latex_caption_label(self): + # GH 25436 + the_caption = "a table in a \\texttt{table/tabular} environment" + the_label = "tab:table_tabular" + + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + + # test when only the caption is provided + result_c = df.to_latex(caption=the_caption) + + expected_c = r"""\begin{table} +\centering +\caption{a table in a \texttt{table/tabular} environment} +\begin{tabular}{lrl} +\toprule +{} & a & b \\ +\midrule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +\end{table} +""" + assert result_c == expected_c + + # test when only the label is provided + result_l = df.to_latex(label=the_label) + + expected_l = r"""\begin{table} +\centering +\label{tab:table_tabular} +\begin{tabular}{lrl} +\toprule +{} & a & b \\ +\midrule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +\end{table} +""" + assert result_l == expected_l + + # test when the caption and the label are provided + result_cl = df.to_latex(caption=the_caption, label=the_label) + + expected_cl = r"""\begin{table} +\centering +\caption{a table in a \texttt{table/tabular} environment} +\label{tab:table_tabular} +\begin{tabular}{lrl} +\toprule +{} & a & b \\ +\midrule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +\end{table} +""" + assert result_cl == expected_cl + + def test_to_latex_longtable_caption_label(self): + # GH 25436 + the_caption = "a table in a \\texttt{longtable} environment" + the_label = "tab:longtable" + + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + + # test when only the caption is provided + result_c = df.to_latex(longtable=True, caption=the_caption) + + expected_c = r"""\begin{longtable}{lrl} +\caption{a table in a \texttt{longtable} environment}\\ +\toprule +{} & a & b \\ +\midrule +\endhead +\midrule +\multicolumn{3}{r}{{Continued on next page}} \\ +\midrule +\endfoot + +\bottomrule +\endlastfoot +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\end{longtable} +""" + assert result_c == expected_c + + # test when only the label is provided + result_l = df.to_latex(longtable=True, label=the_label) + + expected_l = r"""\begin{longtable}{lrl} +\label{tab:longtable}\\ +\toprule +{} & a & b \\ +\midrule +\endhead +\midrule +\multicolumn{3}{r}{{Continued on next page}} \\ +\midrule +\endfoot + +\bottomrule +\endlastfoot +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\end{longtable} +""" + assert result_l == expected_l + + # test when the caption and the label are provided + result_cl = df.to_latex(longtable=True, caption=the_caption, label=the_label) + + expected_cl = r"""\begin{longtable}{lrl} +\caption{a table in a \texttt{longtable} environment}\label{tab:longtable}\\ +\toprule +{} & a & b \\ +\midrule +\endhead +\midrule +\multicolumn{3}{r}{{Continued on next page}} \\ +\midrule +\endfoot + +\bottomrule +\endlastfoot +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\end{longtable} +""" + assert result_cl == expected_cl + def test_to_latex_escape_special_chars(self): special_characters = ["&", "%", "$", "#", "_", "{", "}", "~", "^", "\\"] df = DataFrame(data=special_characters)