pandas-dev · jreback · Mar 23, 2021 · Mar 14, 2021 · Mar 14, 2021 · Mar 14, 2021
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -135,6 +135,7 @@ Other enhancements
 - :meth:`.Styler.set_table_styles` amended to optionally allow certain css-string input arguments (:issue:`39564`)
 - :meth:`.Styler.apply` now more consistently accepts ndarray function returns, i.e. in all cases for ``axis`` is ``0, 1 or None`` (:issue:`39359`)
 - :meth:`.Styler.apply` and :meth:`.Styler.applymap` now raise errors if wrong format CSS is passed on render (:issue:`39660`)
+- :meth:`.Styler.format` adds keyword argument ``escape`` for optional HTML escaping (:issue:`40437`)
 - Builtin highlighting methods in :class:`Styler` have a more consistent signature and css customisability (:issue:`40242`)
 - :meth:`Series.loc.__getitem__` and :meth:`Series.loc.__setitem__` with :class:`MultiIndex` now raising helpful error message when indexer has too many dimensions (:issue:`35349`)
 - :meth:`pandas.read_stata` and :class:`StataReader` support reading data from compressed files.

diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py
@@ -48,6 +48,7 @@
 from pandas.core.indexes.api import Index
 
 jinja2 = import_optional_dependency("jinja2", extra="DataFrame.style requires jinja2.")
+from markupsafe import escape as escape_func  # markupsafe is jinja2 dependency
 
 BaseFormatter = Union[str, Callable]
 ExtFormatter = Union[BaseFormatter, Dict[Any, Optional[BaseFormatter]]]
@@ -113,6 +114,12 @@ class Styler:
 
         .. versionadded:: 1.2.0
 
+    escape : bool, default False
+        Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display
+        strings with HTML-safe sequences.
+
+        ... versionadded:: 1.3.0
+
     Attributes
     ----------
     env : Jinja2 jinja2.Environment
@@ -169,6 +176,7 @@ def __init__(
         cell_ids: bool = True,
         na_rep: Optional[str] = None,
         uuid_len: int = 5,
+        escape: bool = False,
     ):
         # validate ordered args
         if isinstance(data, pd.Series):
@@ -202,7 +210,7 @@ def __init__(
         ] = defaultdict(lambda: partial(_default_formatter, precision=def_precision))
         self.precision = precision  # can be removed on set_precision depr cycle
         self.na_rep = na_rep  # can be removed on set_na_rep depr cycle
-        self.format(formatter=None, precision=precision, na_rep=na_rep)
+        self.format(formatter=None, precision=precision, na_rep=na_rep, escape=escape)
 
     def _repr_html_(self) -> str:
         """
@@ -549,6 +557,7 @@ def format(
         subset: Optional[Union[slice, Sequence[Any]]] = None,
         na_rep: Optional[str] = None,
         precision: Optional[int] = None,
+        escape: bool = False,
     ) -> Styler:
         """
         Format the text display value of cells.
@@ -572,6 +581,12 @@ def format(
 
             .. versionadded:: 1.3.0
 
+        escape : bool, default False
+            Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display
+            string with HTML-safe sequences. Escaping is done before ``formatter``.
+
+            .. versionadded:: 1.3.0
+
         Returns
         -------
         self : Styler
@@ -611,7 +626,7 @@ def format(
         0    MISS   1.000       A
         1   2.000    MISS   3.000
 
-        Using a format specification on consistent column dtypes
+        Using a ``formatter`` specification on consistent column dtypes
 
         >>> df.style.format('{:.2f}', na_rep='MISS', subset=[0,1])
                 0      1          2
@@ -634,15 +649,34 @@ def format(
         0    MISS   1.00      A
         1     2.0   PASS   3.00
 
-        Using a callable formatting function
+        Using a callable ``formatter`` function.
 
         >>> func = lambda s: 'STRING' if isinstance(s, str) else 'FLOAT'
         >>> df.style.format({0: '{:.1f}', 2: func}, precision=4, na_rep='MISS')
                 0        1        2
         0    MISS   1.0000   STRING
         1     2.0     MISS    FLOAT
-        """
-        if all((formatter is None, subset is None, precision is None, na_rep is None)):
+
+        Using a ``formatter`` with HTML ``escape`` and ``na_rep``.
+
+        >>> df = pd.DataFrame([['<div></div>', '"A&B"', None]])
+        >>> s = df.style.format('<a href="a.com/{0}">{0}</a>', escape=True, na_rep="NA")
+        >>> s.render()
+        ...
+        <td .. ><a href="a.com/&lt;div&gt;&lt;/div&gt;">&lt;div&gt;&lt;/div&gt;</a></td>
+        <td .. ><a href="a.com/&#34;A&amp;B&#34;">&#34;A&amp;B&#34;</a></td>
+        <td .. >NA</td>
+        ...
+        """
+        if all(
+            (
+                formatter is None,
+                subset is None,
+                precision is None,
+                na_rep is None,
+                escape is False,
+            )
+        ):
             self._display_funcs.clear()
             return self  # clear the formatter / revert to default and avoid looping
 
@@ -660,7 +694,7 @@ def format(
             except KeyError:
                 format_func = None
             format_func = _maybe_wrap_formatter(
-                format_func, na_rep=na_rep, precision=precision
+                format_func, na_rep=na_rep, precision=precision, escape=escape
             )
 
             for row, value in data[[col]].itertuples():
@@ -2154,6 +2188,7 @@ def _maybe_wrap_formatter(
     formatter: Optional[BaseFormatter] = None,
     na_rep: Optional[str] = None,
     precision: Optional[int] = None,
+    escape: bool = False,
 ) -> Callable:
     """
     Allows formatters to be expressed as str, callable or None, where None returns
@@ -2171,9 +2206,12 @@ def _maybe_wrap_formatter(
         raise TypeError(f"'formatter' expected str or callable, got {type(formatter)}")
 
     if na_rep is None:
-        return formatter_func
+        return (lambda x: formatter_func(escape_func(x))) if escape else formatter_func
     else:
-        return lambda x: na_rep if pd.isna(x) else formatter_func(x)
+        if escape:
+            return lambda x: na_rep if pd.isna(x) else formatter_func(escape_func(x))
+        else:
+            return lambda x: na_rep if pd.isna(x) else formatter_func(x)
 
 
 def _maybe_convert_css_to_tuples(style: CSSProperties) -> CSSList:

diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py
@@ -647,6 +647,26 @@ def test_format_clear(self):
         self.styler.format()
         assert (0, 0) not in self.styler._display_funcs  # formatter cleared to default
 
+    def test_format_escape(self):
+        df = DataFrame([['<>&"']])
+        s = Styler(df, uuid_len=0).format("X&{0}>X", escape=False)
+        expected = '<td id="T__row0_col0" class="data row0 col0" >X&<>&">X</td>'
+        assert expected in s.render()
+
+        # only the value should be escaped before passing to the formatter
+        s = Styler(df, uuid_len=0).format("X&{0}>X", escape=True)
+        ex = '<td id="T__row0_col0" class="data row0 col0" >X&&lt;&gt;&amp;&#34;>X</td>'
+        assert ex in s.render()
+
+    def test_format_escape_na_rep(self):
+        # tests the na_rep is not escaped
+        df = DataFrame([['<>&"', None]])
+        s = Styler(df, uuid_len=0).format("X&{0}>X", escape=True, na_rep="&")
+        ex = '<td id="T__row0_col0" class="data row0 col0" >X&&lt;&gt;&amp;&#34;>X</td>'
+        expected2 = '<td id="T__row0_col1" class="data row0 col1" >&</td>'
+        assert ex in s.render()
+        assert expected2 in s.render()
+
     def test_nonunique_raises(self):
         df = DataFrame([[1, 2]], columns=["A", "A"])
         msg = "style is not supported for non-unique indices."