Skip to content

Commit 3946a96

Browse files
attack68TLouf
authored andcommitted
REF: type change escape in Styler.format to str to allow "html" and "latex" (pandas-dev#41619)
1 parent 94a39df commit 3946a96

File tree

4 files changed

+119
-31
lines changed

4 files changed

+119
-31
lines changed

doc/source/user_guide/style.ipynb

+2-2
Original file line numberDiff line numberDiff line change
@@ -1462,7 +1462,7 @@
14621462
"metadata": {},
14631463
"outputs": [],
14641464
"source": [
1465-
"df4.style.format(escape=True)"
1465+
"df4.style.format(escape=\"html\")"
14661466
]
14671467
},
14681468
{
@@ -1471,7 +1471,7 @@
14711471
"metadata": {},
14721472
"outputs": [],
14731473
"source": [
1474-
"df4.style.format('<a href=\"https://pandas.pydata.org\" target=\"_blank\">{}</a>', escape=True)"
1474+
"df4.style.format('<a href=\"https://pandas.pydata.org\" target=\"_blank\">{}</a>', escape=\"html\")"
14751475
]
14761476
},
14771477
{

pandas/io/formats/style.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def _mpl(func: Callable):
7474

7575

7676
class Styler(StylerRenderer):
77-
"""
77+
r"""
7878
Helps style a DataFrame or Series according to the data with HTML and CSS.
7979
8080
Parameters
@@ -119,9 +119,12 @@ class Styler(StylerRenderer):
119119
120120
.. versionadded:: 1.3.0
121121
122-
escape : bool, default False
123-
Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display
124-
strings with HTML-safe sequences.
122+
escape : str, optional
123+
Use 'html' to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"``
124+
in cell display string with HTML-safe sequences.
125+
Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``,
126+
``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with
127+
LaTeX-safe sequences.
125128
126129
... versionadded:: 1.3.0
127130
@@ -179,7 +182,7 @@ def __init__(
179182
uuid_len: int = 5,
180183
decimal: str = ".",
181184
thousands: str | None = None,
182-
escape: bool = False,
185+
escape: str | None = None,
183186
):
184187
super().__init__(
185188
data=data,

pandas/io/formats/style_render.py

+73-14
Original file line numberDiff line numberDiff line change
@@ -457,9 +457,9 @@ def format(
457457
precision: int | None = None,
458458
decimal: str = ".",
459459
thousands: str | None = None,
460-
escape: bool = False,
460+
escape: str | None = None,
461461
) -> StylerRenderer:
462-
"""
462+
r"""
463463
Format the text display value of cells.
464464
465465
Parameters
@@ -492,9 +492,13 @@ def format(
492492
493493
.. versionadded:: 1.3.0
494494
495-
escape : bool, default False
496-
Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display
497-
string with HTML-safe sequences. Escaping is done before ``formatter``.
495+
escape : str, optional
496+
Use 'html' to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"``
497+
in cell display string with HTML-safe sequences.
498+
Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``,
499+
``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with
500+
LaTeX-safe sequences.
501+
Escaping is done before ``formatter``.
498502
499503
.. versionadded:: 1.3.0
500504
@@ -571,13 +575,26 @@ def format(
571575
Using a ``formatter`` with HTML ``escape`` and ``na_rep``.
572576
573577
>>> df = pd.DataFrame([['<div></div>', '"A&B"', None]])
574-
>>> s = df.style.format('<a href="a.com/{0}">{0}</a>', escape=True, na_rep="NA")
578+
>>> s = df.style.format(
579+
... '<a href="a.com/{0}">{0}</a>', escape="html", na_rep="NA"
580+
... )
575581
>>> s.render()
576582
...
577583
<td .. ><a href="a.com/&lt;div&gt;&lt;/div&gt;">&lt;div&gt;&lt;/div&gt;</a></td>
578584
<td .. ><a href="a.com/&#34;A&amp;B&#34;">&#34;A&amp;B&#34;</a></td>
579585
<td .. >NA</td>
580586
...
587+
588+
Using a ``formatter`` with LaTeX ``escape``.
589+
590+
>>> df = pd.DataFrame([["123"], ["~ ^"], ["$%#"]])
591+
>>> s = df.style.format("\\textbf{{{}}}", escape="latex").to_latex()
592+
\begin{tabular}{ll}
593+
{} & {0} \\
594+
0 & \textbf{123} \\
595+
1 & \textbf{\textasciitilde \space \textasciicircum } \\
596+
2 & \textbf{\$\%\#} \\
597+
\end{tabular}
581598
"""
582599
if all(
583600
(
@@ -587,7 +604,7 @@ def format(
587604
decimal == ".",
588605
thousands is None,
589606
na_rep is None,
590-
escape is False,
607+
escape is None,
591608
)
592609
):
593610
self._display_funcs.clear()
@@ -771,10 +788,17 @@ def wrapper(x):
771788
return wrapper
772789

773790

774-
def _str_escape_html(x):
775-
"""if escaping html: only use on str, else return input"""
791+
def _str_escape(x, escape):
792+
"""if escaping: only use on str, else return input"""
776793
if isinstance(x, str):
777-
return escape_html(x)
794+
if escape == "html":
795+
return escape_html(x)
796+
elif escape == "latex":
797+
return _escape_latex(x)
798+
else:
799+
raise ValueError(
800+
f"`escape` only permitted in {{'html', 'latex'}}, got {escape}"
801+
)
778802
return x
779803

780804

@@ -784,7 +808,7 @@ def _maybe_wrap_formatter(
784808
precision: int | None = None,
785809
decimal: str = ".",
786810
thousands: str | None = None,
787-
escape: bool = False,
811+
escape: str | None = None,
788812
) -> Callable:
789813
"""
790814
Allows formatters to be expressed as str, callable or None, where None returns
@@ -804,9 +828,9 @@ def _maybe_wrap_formatter(
804828
else:
805829
raise TypeError(f"'formatter' expected str or callable, got {type(formatter)}")
806830

807-
# Replace HTML chars if escaping
808-
if escape:
809-
func_1 = lambda x: func_0(_str_escape_html(x))
831+
# Replace chars if escaping
832+
if escape is not None:
833+
func_1 = lambda x: func_0(_str_escape(x, escape=escape))
810834
else:
811835
func_1 = func_0
812836

@@ -1187,3 +1211,38 @@ def _parse_latex_options_strip(value: str | int | float, arg: str) -> str:
11871211
For example: 'red /* --wrap */ ' --> 'red'
11881212
"""
11891213
return str(value).replace(arg, "").replace("/*", "").replace("*/", "").strip()
1214+
1215+
1216+
def _escape_latex(s):
1217+
r"""
1218+
Replace the characters ``&``, ``%``, ``$``, ``#``, ``_``, ``{``, ``}``,
1219+
``~``, ``^``, and ``\`` in the string with LaTeX-safe sequences.
1220+
1221+
Use this if you need to display text that might contain such characters in LaTeX.
1222+
1223+
Parameters
1224+
----------
1225+
s : str
1226+
Input to be escaped
1227+
1228+
Return
1229+
------
1230+
str :
1231+
Escaped string
1232+
"""
1233+
return (
1234+
s.replace("\\", "ab2§=§8yz") # rare string for final conversion: avoid \\ clash
1235+
.replace("ab2§=§8yz ", "ab2§=§8yz\\space ") # since \backslash gobbles spaces
1236+
.replace("&", "\\&")
1237+
.replace("%", "\\%")
1238+
.replace("$", "\\$")
1239+
.replace("#", "\\#")
1240+
.replace("_", "\\_")
1241+
.replace("{", "\\{")
1242+
.replace("}", "\\}")
1243+
.replace("~ ", "~\\space ") # since \textasciitilde gobbles spaces
1244+
.replace("~", "\\textasciitilde ")
1245+
.replace("^ ", "^\\space ") # since \textasciicircum gobbles spaces
1246+
.replace("^", "\\textasciicircum ")
1247+
.replace("ab2§=§8yz", "\\textbackslash ")
1248+
)

pandas/tests/io/formats/style/test_format.py

+36-10
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
pytest.importorskip("jinja2")
1313
from pandas.io.formats.style import Styler
14+
from pandas.io.formats.style_render import _str_escape
1415

1516

1617
@pytest.fixture
@@ -106,22 +107,36 @@ def test_format_clear(styler):
106107
assert (0, 0) not in styler._display_funcs # formatter cleared to default
107108

108109

109-
def test_format_escape():
110-
df = DataFrame([['<>&"']])
111-
s = Styler(df, uuid_len=0).format("X&{0}>X", escape=False)
112-
expected = '<td id="T__row0_col0" class="data row0 col0" >X&<>&">X</td>'
110+
@pytest.mark.parametrize(
111+
"escape, exp",
112+
[
113+
("html", "&lt;&gt;&amp;&#34;%$#_{}~^\\~ ^ \\ "),
114+
(
115+
"latex",
116+
'<>\\&"\\%\\$\\#\\_\\{\\}\\textasciitilde \\textasciicircum '
117+
"\\textbackslash \\textasciitilde \\space \\textasciicircum \\space "
118+
"\\textbackslash \\space ",
119+
),
120+
],
121+
)
122+
def test_format_escape_html(escape, exp):
123+
chars = '<>&"%$#_{}~^\\~ ^ \\ '
124+
df = DataFrame([[chars]])
125+
126+
s = Styler(df, uuid_len=0).format("&{0}&", escape=None)
127+
expected = f'<td id="T__row0_col0" class="data row0 col0" >&{chars}&</td>'
113128
assert expected in s.render()
114129

115130
# only the value should be escaped before passing to the formatter
116-
s = Styler(df, uuid_len=0).format("X&{0}>X", escape=True)
117-
ex = '<td id="T__row0_col0" class="data row0 col0" >X&&lt;&gt;&amp;&#34;>X</td>'
118-
assert ex in s.render()
131+
s = Styler(df, uuid_len=0).format("&{0}&", escape=escape)
132+
expected = f'<td id="T__row0_col0" class="data row0 col0" >&{exp}&</td>'
133+
assert expected in s.render()
119134

120135

121136
def test_format_escape_na_rep():
122137
# tests the na_rep is not escaped
123138
df = DataFrame([['<>&"', None]])
124-
s = Styler(df, uuid_len=0).format("X&{0}>X", escape=True, na_rep="&")
139+
s = Styler(df, uuid_len=0).format("X&{0}>X", escape="html", na_rep="&")
125140
ex = '<td id="T__row0_col0" class="data row0 col0" >X&&lt;&gt;&amp;&#34;>X</td>'
126141
expected2 = '<td id="T__row0_col1" class="data row0 col1" >&</td>'
127142
assert ex in s.render()
@@ -130,11 +145,11 @@ def test_format_escape_na_rep():
130145

131146
def test_format_escape_floats(styler):
132147
# test given formatter for number format is not impacted by escape
133-
s = styler.format("{:.1f}", escape=True)
148+
s = styler.format("{:.1f}", escape="html")
134149
for expected in [">0.0<", ">1.0<", ">-1.2<", ">-0.6<"]:
135150
assert expected in s.render()
136151
# tests precision of floats is not impacted by escape
137-
s = styler.format(precision=1, escape=True)
152+
s = styler.format(precision=1, escape="html")
138153
for expected in [">0<", ">1<", ">-1.2<", ">-0.6<"]:
139154
assert expected in s.render()
140155

@@ -239,3 +254,14 @@ def test_format_decimal(formatter, thousands, precision):
239254
decimal="_", formatter=formatter, thousands=thousands, precision=precision
240255
)._translate(True, True)
241256
assert "000_123" in result["body"][0][1]["display_value"]
257+
258+
259+
def test_str_escape_error():
260+
msg = "`escape` only permitted in {'html', 'latex'}, got "
261+
with pytest.raises(ValueError, match=msg):
262+
_str_escape("text", "bad_escape")
263+
264+
with pytest.raises(ValueError, match=msg):
265+
_str_escape("text", [])
266+
267+
_str_escape(2.00, "bad_escape") # OK since dtype is float

0 commit comments

Comments
 (0)