Skip to content

Backport PR #40731 on branch 1.3.x (ENH: Styler.to_latex conversion from CSS) #42040

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ which has been revised and improved (:issue:`39720`, :issue:`39317`, :issue:`404
- Many features of the :class:`.Styler` class are now either partially or fully usable on a DataFrame with a non-unique indexes or columns (:issue:`41143`)
- One has greater control of the display through separate sparsification of the index or columns using the :ref:`new styler options <options.available>`, which are also usable via :func:`option_context` (:issue:`41142`)
- Added the option ``styler.render.max_elements`` to avoid browser overload when styling large DataFrames (:issue:`40712`)
- Added the method :meth:`.Styler.to_latex` (:issue:`21673`)
- Added the method :meth:`.Styler.to_latex` (:issue:`21673`), which also allows some limited CSS conversion (:issue:`40731`)
- Added the method :meth:`.Styler.to_html` (:issue:`13379`)

.. _whatsnew_130.enhancements.dataframe_honors_copy_with_dict:
Expand Down
45 changes: 45 additions & 0 deletions pandas/io/formats/style.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,7 @@ def to_latex(
multicol_align: str = "r",
siunitx: bool = False,
encoding: str | None = None,
convert_css: bool = False,
):
r"""
Write Styler to a file, buffer or string in LaTeX format.
Expand Down Expand Up @@ -482,6 +483,10 @@ def to_latex(
Set to ``True`` to structure LaTeX compatible with the {siunitx} package.
encoding : str, default "utf-8"
Character encoding setting.
convert_css : bool, default False
Convert simple cell-styles from CSS to LaTeX format. Any CSS not found in
conversion table is dropped. A style can be forced by adding option
`--latex`. See notes.

Returns
-------
Expand Down Expand Up @@ -661,6 +666,45 @@ def to_latex(
& ix2 & \$3 & 4.400 & CATS \\
L1 & ix3 & \$2 & 6.600 & COWS \\
\end{tabular}

**CSS Conversion**

This method can convert a Styler constructured with HTML-CSS to LaTeX using
the following limited conversions.

================== ==================== ============= ==========================
CSS Attribute CSS value LaTeX Command LaTeX Options
================== ==================== ============= ==========================
font-weight | bold | bfseries
| bolder | bfseries
font-style | italic | itshape
| oblique | slshape
background-color | red cellcolor | {red}--lwrap
| #fe01ea | [HTML]{FE01EA}--lwrap
| #f0e | [HTML]{FF00EE}--lwrap
| rgb(128,255,0) | [rgb]{0.5,1,0}--lwrap
| rgba(128,0,0,0.5) | [rgb]{0.5,0,0}--lwrap
| rgb(25%,255,50%) | [rgb]{0.25,1,0.5}--lwrap
color | red color | {red}
| #fe01ea | [HTML]{FE01EA}
| #f0e | [HTML]{FF00EE}
| rgb(128,255,0) | [rgb]{0.5,1,0}
| rgba(128,0,0,0.5) | [rgb]{0.5,0,0}
| rgb(25%,255,50%) | [rgb]{0.25,1,0.5}
================== ==================== ============= ==========================

It is also possible to add user-defined LaTeX only styles to a HTML-CSS Styler
using the ``--latex`` flag, and to add LaTeX parsing options that the
converter will detect within a CSS-comment.

>>> df = pd.DataFrame([[1]])
>>> df.style.set_properties(
... **{"font-weight": "bold /* --dwrap */", "Huge": "--latex--rwrap"}
... ).to_latex(css_convert=True)
\begin{tabular}{lr}
{} & {0} \\
0 & {\bfseries}{\Huge{1}} \\
\end{tabular}
"""
table_selectors = (
[style["selector"] for style in self.table_styles]
Expand Down Expand Up @@ -740,6 +784,7 @@ def to_latex(
sparse_columns=sparse_columns,
multirow_align=multirow_align,
multicol_align=multicol_align,
convert_css=convert_css,
)

return save_to_buffer(latex, buf=buf, encoding=encoding)
Expand Down
83 changes: 82 additions & 1 deletion pandas/io/formats/style_render.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from collections import defaultdict
from functools import partial
import re
from typing import (
Any,
Callable,
Expand Down Expand Up @@ -1253,7 +1254,9 @@ def _parse_latex_table_styles(table_styles: CSSStyles, selector: str) -> str | N
return None


def _parse_latex_cell_styles(latex_styles: CSSList, display_value: str) -> str:
def _parse_latex_cell_styles(
latex_styles: CSSList, display_value: str, convert_css: bool = False
) -> str:
r"""
Mutate the ``display_value`` string including LaTeX commands from ``latex_styles``.

Expand All @@ -1279,6 +1282,8 @@ def _parse_latex_cell_styles(latex_styles: CSSList, display_value: str) -> str:
For example for styles:
`[('c1', 'o1--wrap'), ('c2', 'o2')]` this returns: `{\c1o1 \c2o2{display_value}}
"""
if convert_css:
latex_styles = _parse_latex_css_conversion(latex_styles)
for (command, options) in latex_styles[::-1]: # in reverse for most recent style
formatter = {
"--wrap": f"{{\\{command}--to_parse {display_value}}}",
Expand Down Expand Up @@ -1351,6 +1356,82 @@ def _parse_latex_options_strip(value: str | int | float, arg: str) -> str:
return str(value).replace(arg, "").replace("/*", "").replace("*/", "").strip()


def _parse_latex_css_conversion(styles: CSSList) -> CSSList:
"""
Convert CSS (attribute,value) pairs to equivalent LaTeX (command,options) pairs.

Ignore conversion if tagged with `--latex` option, skipped if no conversion found.
"""

def font_weight(value, arg):
if value == "bold" or value == "bolder":
return "bfseries", f"{arg}"
return None

def font_style(value, arg):
if value == "italic":
return "itshape", f"{arg}"
elif value == "oblique":
return "slshape", f"{arg}"
return None

def color(value, user_arg, command, comm_arg):
"""
CSS colors have 5 formats to process:

- 6 digit hex code: "#ff23ee" --> [HTML]{FF23EE}
- 3 digit hex code: "#f0e" --> [HTML]{FF00EE}
- rgba: rgba(128, 255, 0, 0.5) --> [rgb]{0.502, 1.000, 0.000}
- rgb: rgb(128, 255, 0,) --> [rbg]{0.502, 1.000, 0.000}
- string: red --> {red}

Additionally rgb or rgba can be expressed in % which is also parsed.
"""
arg = user_arg if user_arg != "" else comm_arg

if value[0] == "#" and len(value) == 7: # color is hex code
return command, f"[HTML]{{{value[1:].upper()}}}{arg}"
if value[0] == "#" and len(value) == 4: # color is short hex code
val = f"{value[1].upper()*2}{value[2].upper()*2}{value[3].upper()*2}"
return command, f"[HTML]{{{val}}}{arg}"
elif value[:3] == "rgb": # color is rgb or rgba
r = re.findall("(?<=\\()[0-9\\s%]+(?=,)", value)[0].strip()
r = float(r[:-1]) / 100 if "%" in r else int(r) / 255
g = re.findall("(?<=,)[0-9\\s%]+(?=,)", value)[0].strip()
g = float(g[:-1]) / 100 if "%" in g else int(g) / 255
if value[3] == "a": # color is rgba
b = re.findall("(?<=,)[0-9\\s%]+(?=,)", value)[1].strip()
else: # color is rgb
b = re.findall("(?<=,)[0-9\\s%]+(?=\\))", value)[0].strip()
b = float(b[:-1]) / 100 if "%" in b else int(b) / 255
return command, f"[rgb]{{{r:.3f}, {g:.3f}, {b:.3f}}}{arg}"
else:
return command, f"{{{value}}}{arg}" # color is likely string-named

CONVERTED_ATTRIBUTES: dict[str, Callable] = {
"font-weight": font_weight,
"background-color": partial(color, command="cellcolor", comm_arg="--lwrap"),
"color": partial(color, command="color", comm_arg=""),
"font-style": font_style,
}

latex_styles: CSSList = []
for (attribute, value) in styles:
if isinstance(value, str) and "--latex" in value:
# return the style without conversion but drop '--latex'
latex_styles.append((attribute, value.replace("--latex", "")))
if attribute in CONVERTED_ATTRIBUTES.keys():
arg = ""
for x in ["--wrap", "--nowrap", "--lwrap", "--dwrap", "--rwrap"]:
if x in str(value):
arg, value = x, _parse_latex_options_strip(value, x)
break
latex_style = CONVERTED_ATTRIBUTES[attribute](value, arg)
if latex_style is not None:
latex_styles.extend([latex_style])
return latex_styles


def _escape_latex(s):
r"""
Replace the characters ``&``, ``%``, ``$``, ``#``, ``_``, ``{``, ``}``,
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/formats/templates/latex.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
{% endif %}
{% for row in body %}
{% for c in row %}{% if not loop.first %} & {% endif %}
{%- if c.type == 'th' %}{{parse_header(c, multirow_align, multicol_align)}}{% else %}{{parse_cell(c.cellstyle, c.display_value)}}{% endif %}
{%- if c.type == 'th' %}{{parse_header(c, multirow_align, multicol_align)}}{% else %}{{parse_cell(c.cellstyle, c.display_value, convert_css)}}{% endif %}
{%- endfor %} \\
{% endfor %}
{% set bottomrule = parse_table(table_styles, 'bottomrule') %}
Expand Down
46 changes: 46 additions & 0 deletions pandas/tests/io/formats/style/test_to_latex.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from pandas.io.formats.style import Styler
from pandas.io.formats.style_render import (
_parse_latex_cell_styles,
_parse_latex_css_conversion,
_parse_latex_header_span,
_parse_latex_table_styles,
_parse_latex_table_wrapping,
Expand Down Expand Up @@ -443,3 +444,48 @@ def test_parse_latex_table_wrapping(styler):
def test_short_caption(styler):
result = styler.to_latex(caption=("full cap", "short cap"))
assert "\\caption[short cap]{full cap}" in result


@pytest.mark.parametrize(
"css, expected",
[
([("color", "red")], [("color", "{red}")]), # test color and input format types
(
[("color", "rgb(128, 128, 128 )")],
[("color", "[rgb]{0.502, 0.502, 0.502}")],
),
(
[("color", "rgb(128, 50%, 25% )")],
[("color", "[rgb]{0.502, 0.500, 0.250}")],
),
(
[("color", "rgba(128,128,128,1)")],
[("color", "[rgb]{0.502, 0.502, 0.502}")],
),
([("color", "#FF00FF")], [("color", "[HTML]{FF00FF}")]),
([("color", "#F0F")], [("color", "[HTML]{FF00FF}")]),
([("font-weight", "bold")], [("bfseries", "")]), # test font-weight and types
([("font-weight", "bolder")], [("bfseries", "")]),
([("font-weight", "normal")], []),
([("background-color", "red")], [("cellcolor", "{red}--lwrap")]),
(
[("background-color", "#FF00FF")], # test background-color command and wrap
[("cellcolor", "[HTML]{FF00FF}--lwrap")],
),
([("font-style", "italic")], [("itshape", "")]), # test font-style and types
([("font-style", "oblique")], [("slshape", "")]),
([("font-style", "normal")], []),
([("color", "red /*--dwrap*/")], [("color", "{red}--dwrap")]), # css comments
([("background-color", "red /* --dwrap */")], [("cellcolor", "{red}--dwrap")]),
],
)
def test_parse_latex_css_conversion(css, expected):
result = _parse_latex_css_conversion(css)
assert result == expected


def test_parse_latex_css_conversion_option():
css = [("command", "option--latex--wrap")]
expected = [("command", "option--wrap")]
result = _parse_latex_css_conversion(css)
assert result == expected