diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index a9a5041e4a410..87f80ee29a95f 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -135,6 +135,7 @@ Other enhancements - :meth:`.Styler.set_table_styles` amended to optionally allow certain css-string input arguments (:issue:`39564`) - :meth:`.Styler.apply` now more consistently accepts ndarray function returns, i.e. in all cases for ``axis`` is ``0, 1 or None`` (:issue:`39359`) - :meth:`.Styler.apply` and :meth:`.Styler.applymap` now raise errors if wrong format CSS is passed on render (:issue:`39660`) +- :meth:`.Styler.format` adds keyword argument ``escape`` for optional HTML escaping (:issue:`40437`) - Builtin highlighting methods in :class:`Styler` have a more consistent signature and css customisability (:issue:`40242`) - :meth:`Series.loc.__getitem__` and :meth:`Series.loc.__setitem__` with :class:`MultiIndex` now raising helpful error message when indexer has too many dimensions (:issue:`35349`) - :meth:`pandas.read_stata` and :class:`StataReader` support reading data from compressed files. diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 3abb39d2194c0..60fb854928f72 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -48,6 +48,7 @@ from pandas.core.indexes.api import Index jinja2 = import_optional_dependency("jinja2", extra="DataFrame.style requires jinja2.") +from markupsafe import escape as escape_func # markupsafe is jinja2 dependency BaseFormatter = Union[str, Callable] ExtFormatter = Union[BaseFormatter, Dict[Any, Optional[BaseFormatter]]] @@ -113,6 +114,12 @@ class Styler: .. versionadded:: 1.2.0 + escape : bool, default False + Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display + strings with HTML-safe sequences. + + ... versionadded:: 1.3.0 + Attributes ---------- env : Jinja2 jinja2.Environment @@ -169,6 +176,7 @@ def __init__( cell_ids: bool = True, na_rep: Optional[str] = None, uuid_len: int = 5, + escape: bool = False, ): # validate ordered args if isinstance(data, pd.Series): @@ -202,7 +210,7 @@ def __init__( ] = defaultdict(lambda: partial(_default_formatter, precision=def_precision)) self.precision = precision # can be removed on set_precision depr cycle self.na_rep = na_rep # can be removed on set_na_rep depr cycle - self.format(formatter=None, precision=precision, na_rep=na_rep) + self.format(formatter=None, precision=precision, na_rep=na_rep, escape=escape) def _repr_html_(self) -> str: """ @@ -549,6 +557,7 @@ def format( subset: Optional[Union[slice, Sequence[Any]]] = None, na_rep: Optional[str] = None, precision: Optional[int] = None, + escape: bool = False, ) -> Styler: """ Format the text display value of cells. @@ -572,6 +581,12 @@ def format( .. versionadded:: 1.3.0 + escape : bool, default False + Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display + string with HTML-safe sequences. Escaping is done before ``formatter``. + + .. versionadded:: 1.3.0 + Returns ------- self : Styler @@ -611,7 +626,7 @@ def format( 0 MISS 1.000 A 1 2.000 MISS 3.000 - Using a format specification on consistent column dtypes + Using a ``formatter`` specification on consistent column dtypes >>> df.style.format('{:.2f}', na_rep='MISS', subset=[0,1]) 0 1 2 @@ -634,15 +649,34 @@ def format( 0 MISS 1.00 A 1 2.0 PASS 3.00 - Using a callable formatting function + Using a callable ``formatter`` function. >>> func = lambda s: 'STRING' if isinstance(s, str) else 'FLOAT' >>> df.style.format({0: '{:.1f}', 2: func}, precision=4, na_rep='MISS') 0 1 2 0 MISS 1.0000 STRING 1 2.0 MISS FLOAT - """ - if all((formatter is None, subset is None, precision is None, na_rep is None)): + + Using a ``formatter`` with HTML ``escape`` and ``na_rep``. + + >>> df = pd.DataFrame([['
', '"A&B"', None]]) + >>> s = df.style.format('{0}', escape=True, na_rep="NA") + >>> s.render() + ... + <div></div> + "A&B" + NA + ... + """ + if all( + ( + formatter is None, + subset is None, + precision is None, + na_rep is None, + escape is False, + ) + ): self._display_funcs.clear() return self # clear the formatter / revert to default and avoid looping @@ -660,7 +694,7 @@ def format( except KeyError: format_func = None format_func = _maybe_wrap_formatter( - format_func, na_rep=na_rep, precision=precision + format_func, na_rep=na_rep, precision=precision, escape=escape ) for row, value in data[[col]].itertuples(): @@ -2154,6 +2188,7 @@ def _maybe_wrap_formatter( formatter: Optional[BaseFormatter] = None, na_rep: Optional[str] = None, precision: Optional[int] = None, + escape: bool = False, ) -> Callable: """ Allows formatters to be expressed as str, callable or None, where None returns @@ -2170,10 +2205,19 @@ def _maybe_wrap_formatter( else: raise TypeError(f"'formatter' expected str or callable, got {type(formatter)}") + def _str_escape(x, escape: bool): + """if escaping: only use on str, else return input""" + if escape and isinstance(x, str): + return escape_func(x) + else: + return x + + display_func = lambda x: formatter_func(partial(_str_escape, escape=escape)(x)) + if na_rep is None: - return formatter_func + return display_func else: - return lambda x: na_rep if pd.isna(x) else formatter_func(x) + return lambda x: na_rep if pd.isna(x) else display_func(x) def _maybe_convert_css_to_tuples(style: CSSProperties) -> CSSList: diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index 977b92e217868..cb1db27099cd1 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -647,6 +647,36 @@ def test_format_clear(self): self.styler.format() assert (0, 0) not in self.styler._display_funcs # formatter cleared to default + def test_format_escape(self): + df = DataFrame([['<>&"']]) + s = Styler(df, uuid_len=0).format("X&{0}>X", escape=False) + expected = 'X&<>&">X' + assert expected in s.render() + + # only the value should be escaped before passing to the formatter + s = Styler(df, uuid_len=0).format("X&{0}>X", escape=True) + ex = 'X&<>&">X' + assert ex in s.render() + + def test_format_escape_na_rep(self): + # tests the na_rep is not escaped + df = DataFrame([['<>&"', None]]) + s = Styler(df, uuid_len=0).format("X&{0}>X", escape=True, na_rep="&") + ex = 'X&<>&">X' + expected2 = '&' + assert ex in s.render() + assert expected2 in s.render() + + def test_format_escape_floats(self): + # test given formatter for number format is not impacted by escape + s = self.df.style.format("{:.1f}", escape=True) + for expected in [">0.0<", ">1.0<", ">-1.2<", ">-0.6<"]: + assert expected in s.render() + # tests precision of floats is not impacted by escape + s = self.df.style.format(precision=1, escape=True) + for expected in [">0<", ">1<", ">-1.2<", ">-0.6<"]: + assert expected in s.render() + def test_nonunique_raises(self): df = DataFrame([[1, 2]], columns=["A", "A"]) msg = "style is not supported for non-unique indices."