From a82f4f57eb4b874130f44781d9218c0d4699f8bf Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sun, 14 Mar 2021 23:19:44 +0100 Subject: [PATCH 1/7] update escaping for html --- pandas/io/formats/style.py | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index cc5f3164385cb..959ed940fcf77 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -48,6 +48,7 @@ from pandas.core.indexes.api import Index jinja2 = import_optional_dependency("jinja2", extra="DataFrame.style requires jinja2.") +from jinja2.filters import escape as escape_func BaseFormatter = Union[str, Callable] ExtFormatter = Union[BaseFormatter, Dict[Any, Optional[BaseFormatter]]] @@ -113,6 +114,10 @@ class Styler: .. versionadded:: 1.2.0 + escape : bool, default False + Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display + strings with HTML-safe sequences. + Attributes ---------- env : Jinja2 jinja2.Environment @@ -169,6 +174,7 @@ def __init__( cell_ids: bool = True, na_rep: Optional[str] = None, uuid_len: int = 5, + escape: bool = False, ): # validate ordered args if isinstance(data, pd.Series): @@ -201,7 +207,7 @@ def __init__( ] = defaultdict(lambda: partial(_default_formatter, precision=None)) self.precision = precision # can be removed on set_precision depr cycle self.na_rep = na_rep # can be removed on set_na_rep depr cycle - self.format(formatter=None, precision=precision, na_rep=na_rep) + self.format(formatter=None, precision=precision, na_rep=na_rep, escape=escape) def _repr_html_(self) -> str: """ @@ -547,6 +553,7 @@ def format( subset: Optional[Union[slice, Sequence[Any]]] = None, na_rep: Optional[str] = None, precision: Optional[int] = None, + escape: bool = False, ) -> Styler: """ Format the text display value of cells. @@ -570,6 +577,12 @@ def format( .. versionadded:: 1.3.0 + escape : bool, default False + Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display + string with HTML-safe sequences. Escaping is done before ``formatter``. + + .. versionadded:: 1.3.0 + Returns ------- self : Styler @@ -640,7 +653,15 @@ def format( 0 MISS 1.0000 STRING 1 2.0 MISS FLOAT """ - if all((formatter is None, subset is None, precision is None, na_rep is None)): + if all( + ( + formatter is None, + subset is None, + precision is None, + na_rep is None, + escape is False, + ) + ): self._display_funcs.clear() return self # clear the formatter / revert to default and avoid looping @@ -658,7 +679,7 @@ def format( except KeyError: format_func = None format_func = _maybe_wrap_formatter( - format_func, na_rep=na_rep, precision=precision + format_func, na_rep=na_rep, precision=precision, escape=escape ) for row, value in data[[col]].itertuples(): @@ -2154,6 +2175,7 @@ def _maybe_wrap_formatter( formatter: Optional[BaseFormatter] = None, na_rep: Optional[str] = None, precision: Optional[int] = None, + escape: bool = False, ) -> Callable: """ Allows formatters to be expressed as str, callable or None, where None returns @@ -2170,9 +2192,11 @@ def _maybe_wrap_formatter( raise TypeError(f"'formatter' expected str or callable, got {type(formatter)}") if na_rep is None: - return formatter_func + na_func = formatter_func else: - return lambda x: na_rep if pd.isna(x) else formatter_func(x) + na_func = lambda x: na_rep if pd.isna(x) else formatter_func(x) + + return lambda x: na_func(escape_func(x)) if escape else na_func def _maybe_convert_css_to_tuples(style: CSSProperties) -> CSSList: From 51bda2cddef54411a738f6098144970800734335 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sun, 14 Mar 2021 23:04:42 +0100 Subject: [PATCH 2/7] update escaping for html --- pandas/io/formats/style.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 959ed940fcf77..1705feff41d94 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -652,6 +652,17 @@ def format( 0 1 2 0 MISS 1.0000 STRING 1 2.0 MISS FLOAT + + Using a formatter with HTML ``escape``. + + >>> df = pd.DataFrame([['
', '"A&B"']]) + >>> s = df.style.format('{0}', escape=True) + >>> s.render() + ... + <div></div> + "A&B" + ... + """ if all( ( From a323f93d17f0229350ac9f2f519ff18d5c9b1e93 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sun, 14 Mar 2021 23:34:27 +0100 Subject: [PATCH 3/7] tests for format_escape --- pandas/io/formats/style.py | 2 +- pandas/tests/io/formats/style/test_style.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 1705feff41d94..b39ffb1cb81c3 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -2207,7 +2207,7 @@ def _maybe_wrap_formatter( else: na_func = lambda x: na_rep if pd.isna(x) else formatter_func(x) - return lambda x: na_func(escape_func(x)) if escape else na_func + return (lambda x: na_func(escape_func(x))) if escape else na_func def _maybe_convert_css_to_tuples(style: CSSProperties) -> CSSList: diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index 977b92e217868..d0718e9e7f334 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -647,6 +647,17 @@ def test_format_clear(self): self.styler.format() assert (0, 0) not in self.styler._display_funcs # formatter cleared to default + def test_format_escape(self): + df = DataFrame([['<>&"']]) + s = Styler(df, uuid_len=0).format("X&{0}>X", escape=False) + ex = 'X&<>&">X' + assert ex in s.render() + + # only the value should be escaped before passing to the formatter + s = Styler(df, uuid_len=0).format("X&{0}>X", escape=True) + ex = 'X&<>&">X' + assert ex in s.render() + def test_nonunique_raises(self): df = DataFrame([[1, 2]], columns=["A", "A"]) msg = "style is not supported for non-unique indices." From e01eeb51a33ada34235185ccb482413d3c081b80 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Mon, 15 Mar 2021 07:34:22 +0100 Subject: [PATCH 4/7] check and docs fix --- pandas/io/formats/style.py | 23 +++++++++++---------- pandas/tests/io/formats/style/test_style.py | 13 ++++++++++-- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index c5eb8306c0b70..607d2088a31ce 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -48,7 +48,7 @@ from pandas.core.indexes.api import Index jinja2 = import_optional_dependency("jinja2", extra="DataFrame.style requires jinja2.") -from jinja2.filters import escape as escape_func +from markupsafe import escape as escape_func # markupsafe is jinja2 dependency BaseFormatter = Union[str, Callable] ExtFormatter = Union[BaseFormatter, Dict[Any, Optional[BaseFormatter]]] @@ -624,7 +624,7 @@ def format( 0 MISS 1.000 A 1 2.000 MISS 3.000 - Using a format specification on consistent column dtypes + Using a ``formatter`` specification on consistent column dtypes >>> df.style.format('{:.2f}', na_rep='MISS', subset=[0,1]) 0 1 2 @@ -647,7 +647,7 @@ def format( 0 MISS 1.00 A 1 2.0 PASS 3.00 - Using a callable formatting function + Using a callable ``formatter`` function. >>> func = lambda s: 'STRING' if isinstance(s, str) else 'FLOAT' >>> df.style.format({0: '{:.1f}', 2: func}, precision=4, na_rep='MISS') @@ -655,16 +655,16 @@ def format( 0 MISS 1.0000 STRING 1 2.0 MISS FLOAT - Using a formatter with HTML ``escape``. + Using a ``formatter`` with HTML ``escape`` and ``na_rep``. - >>> df = pd.DataFrame([['
', '"A&B"']]) - >>> s = df.style.format('{0}', escape=True) + >>> df = pd.DataFrame([['
', '"A&B"', None]]) + >>> s = df.style.format('{0}', escape=True, na_rep="NA") >>> s.render() ... <div></div> "A&B" + NA ... - """ if all( ( @@ -2204,11 +2204,12 @@ def _maybe_wrap_formatter( raise TypeError(f"'formatter' expected str or callable, got {type(formatter)}") if na_rep is None: - na_func = formatter_func + return (lambda x: formatter_func(escape_func(x))) if escape else formatter_func else: - na_func = lambda x: na_rep if pd.isna(x) else formatter_func(x) - - return (lambda x: na_func(escape_func(x))) if escape else na_func + if escape: + return lambda x: na_rep if pd.isna(x) else formatter_func(escape_func(x)) + else: + return lambda x: na_rep if pd.isna(x) else formatter_func(x) def _maybe_convert_css_to_tuples(style: CSSProperties) -> CSSList: diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index d0718e9e7f334..272afb34ca3cf 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -650,14 +650,23 @@ def test_format_clear(self): def test_format_escape(self): df = DataFrame([['<>&"']]) s = Styler(df, uuid_len=0).format("X&{0}>X", escape=False) - ex = 'X&<>&">X' - assert ex in s.render() + expected = 'X&<>&">X' + assert expected in s.render() # only the value should be escaped before passing to the formatter s = Styler(df, uuid_len=0).format("X&{0}>X", escape=True) ex = 'X&<>&">X' assert ex in s.render() + def test_format_escape_na_rep(self): + # tests the na_rep is not escaped + df = DataFrame([['<>&"', None]]) + s = Styler(df, uuid_len=0).format("X&{0}>X", escape=True, na_rep="&") + ex = 'X&<>&">X' + expected2 = '&' + assert ex in s.render() + assert expected2 in s.render() + def test_nonunique_raises(self): df = DataFrame([[1, 2]], columns=["A", "A"]) msg = "style is not supported for non-unique indices." From 2348165f891a821fc6bd9c705614d7055d11ebc6 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Mon, 15 Mar 2021 08:36:24 +0100 Subject: [PATCH 5/7] versionadded and whatsnew --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/io/formats/style.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index a9a5041e4a410..87f80ee29a95f 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -135,6 +135,7 @@ Other enhancements - :meth:`.Styler.set_table_styles` amended to optionally allow certain css-string input arguments (:issue:`39564`) - :meth:`.Styler.apply` now more consistently accepts ndarray function returns, i.e. in all cases for ``axis`` is ``0, 1 or None`` (:issue:`39359`) - :meth:`.Styler.apply` and :meth:`.Styler.applymap` now raise errors if wrong format CSS is passed on render (:issue:`39660`) +- :meth:`.Styler.format` adds keyword argument ``escape`` for optional HTML escaping (:issue:`40437`) - Builtin highlighting methods in :class:`Styler` have a more consistent signature and css customisability (:issue:`40242`) - :meth:`Series.loc.__getitem__` and :meth:`Series.loc.__setitem__` with :class:`MultiIndex` now raising helpful error message when indexer has too many dimensions (:issue:`35349`) - :meth:`pandas.read_stata` and :class:`StataReader` support reading data from compressed files. diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 607d2088a31ce..3b4bf07741145 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -118,6 +118,8 @@ class Styler: Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display strings with HTML-safe sequences. + ... versionadded:: 1.3.0 + Attributes ---------- env : Jinja2 jinja2.Environment From 27f39ebdb72ed020094d2e4113319faba64fb122 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Mon, 15 Mar 2021 14:40:16 +0100 Subject: [PATCH 6/7] only use escape on str --- pandas/io/formats/style.py | 8 ++++++-- pandas/tests/io/formats/style/test_style.py | 10 ++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 3b4bf07741145..78257fd72c3f9 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -2205,11 +2205,15 @@ def _maybe_wrap_formatter( else: raise TypeError(f"'formatter' expected str or callable, got {type(formatter)}") + def _str_escape(x): + """only use escape_func on str, else return input""" + return escape_func(x) if isinstance(x, str) else x + if na_rep is None: - return (lambda x: formatter_func(escape_func(x))) if escape else formatter_func + return (lambda x: formatter_func(_str_escape(x))) if escape else formatter_func else: if escape: - return lambda x: na_rep if pd.isna(x) else formatter_func(escape_func(x)) + return lambda x: na_rep if pd.isna(x) else formatter_func(_str_escape(x)) else: return lambda x: na_rep if pd.isna(x) else formatter_func(x) diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index 272afb34ca3cf..cb1db27099cd1 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -667,6 +667,16 @@ def test_format_escape_na_rep(self): assert ex in s.render() assert expected2 in s.render() + def test_format_escape_floats(self): + # test given formatter for number format is not impacted by escape + s = self.df.style.format("{:.1f}", escape=True) + for expected in [">0.0<", ">1.0<", ">-1.2<", ">-0.6<"]: + assert expected in s.render() + # tests precision of floats is not impacted by escape + s = self.df.style.format(precision=1, escape=True) + for expected in [">0<", ">1<", ">-1.2<", ">-0.6<"]: + assert expected in s.render() + def test_nonunique_raises(self): df = DataFrame([[1, 2]], columns=["A", "A"]) msg = "style is not supported for non-unique indices." From c1b29c3c5edc43b17a60931995677de5324ef983 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Mon, 15 Mar 2021 17:41:23 +0100 Subject: [PATCH 7/7] refactor --- pandas/io/formats/style.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 78257fd72c3f9..60fb854928f72 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -2205,17 +2205,19 @@ def _maybe_wrap_formatter( else: raise TypeError(f"'formatter' expected str or callable, got {type(formatter)}") - def _str_escape(x): - """only use escape_func on str, else return input""" - return escape_func(x) if isinstance(x, str) else x + def _str_escape(x, escape: bool): + """if escaping: only use on str, else return input""" + if escape and isinstance(x, str): + return escape_func(x) + else: + return x + + display_func = lambda x: formatter_func(partial(_str_escape, escape=escape)(x)) if na_rep is None: - return (lambda x: formatter_func(_str_escape(x))) if escape else formatter_func + return display_func else: - if escape: - return lambda x: na_rep if pd.isna(x) else formatter_func(_str_escape(x)) - else: - return lambda x: na_rep if pd.isna(x) else formatter_func(x) + return lambda x: na_rep if pd.isna(x) else display_func(x) def _maybe_convert_css_to_tuples(style: CSSProperties) -> CSSList: