Skip to content

Commit 3f2599f

Browse files
authored
ENH: escape html argument in Styler.format (#40437)
1 parent 2cea420 commit 3f2599f

File tree

3 files changed

+83
-8
lines changed

3 files changed

+83
-8
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ Other enhancements
136136
- :meth:`.Styler.set_table_styles` amended to optionally allow certain css-string input arguments (:issue:`39564`)
137137
- :meth:`.Styler.apply` now more consistently accepts ndarray function returns, i.e. in all cases for ``axis`` is ``0, 1 or None`` (:issue:`39359`)
138138
- :meth:`.Styler.apply` and :meth:`.Styler.applymap` now raise errors if wrong format CSS is passed on render (:issue:`39660`)
139+
- :meth:`.Styler.format` adds keyword argument ``escape`` for optional HTML escaping (:issue:`40437`)
139140
- Builtin highlighting methods in :class:`Styler` have a more consistent signature and css customisability (:issue:`40242`)
140141
- :meth:`Series.loc.__getitem__` and :meth:`Series.loc.__setitem__` with :class:`MultiIndex` now raising helpful error message when indexer has too many dimensions (:issue:`35349`)
141142
- :meth:`pandas.read_stata` and :class:`StataReader` support reading data from compressed files.

pandas/io/formats/style.py

+52-8
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
from pandas.core.indexes.api import Index
4949

5050
jinja2 = import_optional_dependency("jinja2", extra="DataFrame.style requires jinja2.")
51+
from markupsafe import escape as escape_func # markupsafe is jinja2 dependency
5152

5253
BaseFormatter = Union[str, Callable]
5354
ExtFormatter = Union[BaseFormatter, Dict[Any, Optional[BaseFormatter]]]
@@ -113,6 +114,12 @@ class Styler:
113114
114115
.. versionadded:: 1.2.0
115116
117+
escape : bool, default False
118+
Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display
119+
strings with HTML-safe sequences.
120+
121+
... versionadded:: 1.3.0
122+
116123
Attributes
117124
----------
118125
env : Jinja2 jinja2.Environment
@@ -169,6 +176,7 @@ def __init__(
169176
cell_ids: bool = True,
170177
na_rep: Optional[str] = None,
171178
uuid_len: int = 5,
179+
escape: bool = False,
172180
):
173181
# validate ordered args
174182
if isinstance(data, pd.Series):
@@ -202,7 +210,7 @@ def __init__(
202210
] = defaultdict(lambda: partial(_default_formatter, precision=def_precision))
203211
self.precision = precision # can be removed on set_precision depr cycle
204212
self.na_rep = na_rep # can be removed on set_na_rep depr cycle
205-
self.format(formatter=None, precision=precision, na_rep=na_rep)
213+
self.format(formatter=None, precision=precision, na_rep=na_rep, escape=escape)
206214

207215
def _repr_html_(self) -> str:
208216
"""
@@ -544,6 +552,7 @@ def format(
544552
subset: Optional[Union[slice, Sequence[Any]]] = None,
545553
na_rep: Optional[str] = None,
546554
precision: Optional[int] = None,
555+
escape: bool = False,
547556
) -> Styler:
548557
"""
549558
Format the text display value of cells.
@@ -567,6 +576,12 @@ def format(
567576
568577
.. versionadded:: 1.3.0
569578
579+
escape : bool, default False
580+
Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display
581+
string with HTML-safe sequences. Escaping is done before ``formatter``.
582+
583+
.. versionadded:: 1.3.0
584+
570585
Returns
571586
-------
572587
self : Styler
@@ -606,7 +621,7 @@ def format(
606621
0 MISS 1.000 A
607622
1 2.000 MISS 3.000
608623
609-
Using a format specification on consistent column dtypes
624+
Using a ``formatter`` specification on consistent column dtypes
610625
611626
>>> df.style.format('{:.2f}', na_rep='MISS', subset=[0,1])
612627
0 1 2
@@ -629,15 +644,34 @@ def format(
629644
0 MISS 1.00 A
630645
1 2.0 PASS 3.00
631646
632-
Using a callable formatting function
647+
Using a callable ``formatter`` function.
633648
634649
>>> func = lambda s: 'STRING' if isinstance(s, str) else 'FLOAT'
635650
>>> df.style.format({0: '{:.1f}', 2: func}, precision=4, na_rep='MISS')
636651
0 1 2
637652
0 MISS 1.0000 STRING
638653
1 2.0 MISS FLOAT
639-
"""
640-
if all((formatter is None, subset is None, precision is None, na_rep is None)):
654+
655+
Using a ``formatter`` with HTML ``escape`` and ``na_rep``.
656+
657+
>>> df = pd.DataFrame([['<div></div>', '"A&B"', None]])
658+
>>> s = df.style.format('<a href="a.com/{0}">{0}</a>', escape=True, na_rep="NA")
659+
>>> s.render()
660+
...
661+
<td .. ><a href="a.com/&lt;div&gt;&lt;/div&gt;">&lt;div&gt;&lt;/div&gt;</a></td>
662+
<td .. ><a href="a.com/&#34;A&amp;B&#34;">&#34;A&amp;B&#34;</a></td>
663+
<td .. >NA</td>
664+
...
665+
"""
666+
if all(
667+
(
668+
formatter is None,
669+
subset is None,
670+
precision is None,
671+
na_rep is None,
672+
escape is False,
673+
)
674+
):
641675
self._display_funcs.clear()
642676
return self # clear the formatter / revert to default and avoid looping
643677

@@ -655,7 +689,7 @@ def format(
655689
except KeyError:
656690
format_func = None
657691
format_func = _maybe_wrap_formatter(
658-
format_func, na_rep=na_rep, precision=precision
692+
format_func, na_rep=na_rep, precision=precision, escape=escape
659693
)
660694

661695
for row, value in data[[col]].itertuples():
@@ -2192,6 +2226,7 @@ def _maybe_wrap_formatter(
21922226
formatter: Optional[BaseFormatter] = None,
21932227
na_rep: Optional[str] = None,
21942228
precision: Optional[int] = None,
2229+
escape: bool = False,
21952230
) -> Callable:
21962231
"""
21972232
Allows formatters to be expressed as str, callable or None, where None returns
@@ -2208,10 +2243,19 @@ def _maybe_wrap_formatter(
22082243
else:
22092244
raise TypeError(f"'formatter' expected str or callable, got {type(formatter)}")
22102245

2246+
def _str_escape(x, escape: bool):
2247+
"""if escaping: only use on str, else return input"""
2248+
if escape and isinstance(x, str):
2249+
return escape_func(x)
2250+
else:
2251+
return x
2252+
2253+
display_func = lambda x: formatter_func(partial(_str_escape, escape=escape)(x))
2254+
22112255
if na_rep is None:
2212-
return formatter_func
2256+
return display_func
22132257
else:
2214-
return lambda x: na_rep if pd.isna(x) else formatter_func(x)
2258+
return lambda x: na_rep if pd.isna(x) else display_func(x)
22152259

22162260

22172261
def _maybe_convert_css_to_tuples(style: CSSProperties) -> CSSList:

pandas/tests/io/formats/style/test_style.py

+30
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,36 @@ def test_format_clear(self):
648648
self.styler.format()
649649
assert (0, 0) not in self.styler._display_funcs # formatter cleared to default
650650

651+
def test_format_escape(self):
652+
df = DataFrame([['<>&"']])
653+
s = Styler(df, uuid_len=0).format("X&{0}>X", escape=False)
654+
expected = '<td id="T__row0_col0" class="data row0 col0" >X&<>&">X</td>'
655+
assert expected in s.render()
656+
657+
# only the value should be escaped before passing to the formatter
658+
s = Styler(df, uuid_len=0).format("X&{0}>X", escape=True)
659+
ex = '<td id="T__row0_col0" class="data row0 col0" >X&&lt;&gt;&amp;&#34;>X</td>'
660+
assert ex in s.render()
661+
662+
def test_format_escape_na_rep(self):
663+
# tests the na_rep is not escaped
664+
df = DataFrame([['<>&"', None]])
665+
s = Styler(df, uuid_len=0).format("X&{0}>X", escape=True, na_rep="&")
666+
ex = '<td id="T__row0_col0" class="data row0 col0" >X&&lt;&gt;&amp;&#34;>X</td>'
667+
expected2 = '<td id="T__row0_col1" class="data row0 col1" >&</td>'
668+
assert ex in s.render()
669+
assert expected2 in s.render()
670+
671+
def test_format_escape_floats(self):
672+
# test given formatter for number format is not impacted by escape
673+
s = self.df.style.format("{:.1f}", escape=True)
674+
for expected in [">0.0<", ">1.0<", ">-1.2<", ">-0.6<"]:
675+
assert expected in s.render()
676+
# tests precision of floats is not impacted by escape
677+
s = self.df.style.format(precision=1, escape=True)
678+
for expected in [">0<", ">1<", ">-1.2<", ">-0.6<"]:
679+
assert expected in s.render()
680+
651681
def test_nonunique_raises(self):
652682
df = DataFrame([[1, 2]], columns=["A", "A"])
653683
msg = "style is not supported for non-unique indices."

0 commit comments

Comments
 (0)