From daee4c623627da66f875ea9959e733307ac5ddd0 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sat, 22 May 2021 19:33:22 +0200 Subject: [PATCH 01/12] rename escape: escape_html --- pandas/io/formats/style.py | 6 +++--- pandas/io/formats/style_render.py | 20 +++++++++++--------- pandas/tests/io/formats/style/test_format.py | 10 +++++----- 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 56e34d9500f31..ac34495423eac 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -115,7 +115,7 @@ class Styler(StylerRenderer): .. versionadded:: 1.3.0 - escape : bool, default False + escape_html : bool, default False Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display strings with HTML-safe sequences. @@ -175,7 +175,7 @@ def __init__( uuid_len: int = 5, decimal: str = ".", thousands: str | None = None, - escape: bool = False, + escape_html: bool = False, ): super().__init__( data=data, @@ -194,7 +194,7 @@ def __init__( formatter=None, precision=precision, na_rep=na_rep, - escape=escape, + escape_html=escape_html, decimal=decimal, thousands=thousands, ) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 9d149008dcb88..c33492e4cf75e 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -40,7 +40,7 @@ import pandas.core.common as com jinja2 = import_optional_dependency("jinja2", extra="DataFrame.style requires jinja2.") -from markupsafe import escape as escape_html # markupsafe is jinja2 dependency +from markupsafe import escape as escape_html_f # markupsafe is jinja2 dependency BaseFormatter = Union[str, Callable] ExtFormatter = Union[BaseFormatter, Dict[Any, Optional[BaseFormatter]]] @@ -403,7 +403,7 @@ def format( precision: int | None = None, decimal: str = ".", thousands: str | None = None, - escape: bool = False, + escape_html: bool = False, ) -> StylerRenderer: """ Format the text display value of cells. @@ -438,7 +438,7 @@ def format( .. versionadded:: 1.3.0 - escape : bool, default False + escape_html : bool, default False Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display string with HTML-safe sequences. Escaping is done before ``formatter``. @@ -517,7 +517,9 @@ def format( Using a ``formatter`` with HTML ``escape`` and ``na_rep``. >>> df = pd.DataFrame([['
', '"A&B"', None]]) - >>> s = df.style.format('{0}', escape=True, na_rep="NA") + >>> s = df.style.format( + ... '{0}', escape_html=True, na_rep="NA" + ... ) >>> s.render() ... <div></div> @@ -533,7 +535,7 @@ def format( decimal == ".", thousands is None, na_rep is None, - escape is False, + escape_html is False, ) ): self._display_funcs.clear() @@ -555,7 +557,7 @@ def format( precision=precision, decimal=decimal, thousands=thousands, - escape=escape, + escape_html=escape_html, ) for ri in ris: self._display_funcs[(ri, ci)] = format_func @@ -720,7 +722,7 @@ def wrapper(x): def _str_escape_html(x): """if escaping html: only use on str, else return input""" if isinstance(x, str): - return escape_html(x) + return escape_html_f(x) return x @@ -730,7 +732,7 @@ def _maybe_wrap_formatter( precision: int | None = None, decimal: str = ".", thousands: str | None = None, - escape: bool = False, + escape_html: bool = False, ) -> Callable: """ Allows formatters to be expressed as str, callable or None, where None returns @@ -751,7 +753,7 @@ def _maybe_wrap_formatter( raise TypeError(f"'formatter' expected str or callable, got {type(formatter)}") # Replace HTML chars if escaping - if escape: + if escape_html: func_1 = lambda x: func_0(_str_escape_html(x)) else: func_1 = func_0 diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py index 9db27689a53f5..dad9d3aec1cdd 100644 --- a/pandas/tests/io/formats/style/test_format.py +++ b/pandas/tests/io/formats/style/test_format.py @@ -108,12 +108,12 @@ def test_format_clear(styler): def test_format_escape(): df = DataFrame([['<>&"']]) - s = Styler(df, uuid_len=0).format("X&{0}>X", escape=False) + s = Styler(df, uuid_len=0).format("X&{0}>X", escape_html=False) expected = 'X&<>&">X' assert expected in s.render() # only the value should be escaped before passing to the formatter - s = Styler(df, uuid_len=0).format("X&{0}>X", escape=True) + s = Styler(df, uuid_len=0).format("X&{0}>X", escape_html=True) ex = 'X&<>&">X' assert ex in s.render() @@ -121,7 +121,7 @@ def test_format_escape(): def test_format_escape_na_rep(): # tests the na_rep is not escaped df = DataFrame([['<>&"', None]]) - s = Styler(df, uuid_len=0).format("X&{0}>X", escape=True, na_rep="&") + s = Styler(df, uuid_len=0).format("X&{0}>X", escape_html=True, na_rep="&") ex = 'X&<>&">X' expected2 = '&' assert ex in s.render() @@ -130,11 +130,11 @@ def test_format_escape_na_rep(): def test_format_escape_floats(styler): # test given formatter for number format is not impacted by escape - s = styler.format("{:.1f}", escape=True) + s = styler.format("{:.1f}", escape_html=True) for expected in [">0.0<", ">1.0<", ">-1.2<", ">-0.6<"]: assert expected in s.render() # tests precision of floats is not impacted by escape - s = styler.format(precision=1, escape=True) + s = styler.format(precision=1, escape_html=True) for expected in [">0<", ">1<", ">-1.2<", ">-0.6<"]: assert expected in s.render() From 23907785bc73ff412a2036d13ab5ecf0991f2409 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sun, 23 May 2021 09:14:36 +0200 Subject: [PATCH 02/12] user_guide update --- doc/source/user_guide/style.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb index 86696cc909764..2fdadc400961f 100644 --- a/doc/source/user_guide/style.ipynb +++ b/doc/source/user_guide/style.ipynb @@ -152,7 +152,7 @@ "\n", "Before adding styles it is useful to show that the [Styler][styler] can distinguish the *display* value from the *actual* value. To control the display value, the text is printed in each cell, and we can use the [.format()][formatfunc] method to manipulate this according to a [format spec string][format] or a callable that takes a single value and returns a string. It is possible to define this for the whole table or for individual columns. \n", "\n", - "Additionally, the format function has a **precision** argument to specifically help formatting floats, an **na_rep** argument to display missing data, and an **escape** argument to help displaying safe-HTML. The default formatter is configured to adopt pandas' regular `display.precision` option, controllable using `with pd.option_context('display.precision', 2):`\n", + "Additionally, the format function has a **precision** argument to specifically help formatting floats, an **na_rep** argument to display missing data, and an **escape_html** argument to help displaying safe-HTML. The default formatter is configured to adopt pandas' regular `display.precision` option, controllable using `with pd.option_context('display.precision', 2):`\n", "\n", "Here is an example of using the multiple options to control the formatting generally and with specific column formatters.\n", "\n", @@ -1448,7 +1448,7 @@ "metadata": {}, "outputs": [], "source": [ - "df4.style.format(escape=True)" + "df4.style.format(escape_html=True)" ] }, { @@ -1457,7 +1457,7 @@ "metadata": {}, "outputs": [], "source": [ - "df4.style.format('{}', escape=True)" + "df4.style.format('{}', escape_html=True)" ] }, { From 4c39dc9de83a27eee1117e3732b0087c071b1b0c Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sun, 23 May 2021 09:39:42 +0200 Subject: [PATCH 03/12] whats new --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index d357e4a633347..c6823a613649a 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -211,7 +211,7 @@ Other enhancements - :meth:`.Styler.set_table_styles` amended to optionally allow certain css-string input arguments (:issue:`39564`) - :meth:`.Styler.apply` now more consistently accepts ndarray function returns, i.e. in all cases for ``axis`` is ``0, 1 or None`` (:issue:`39359`) - :meth:`.Styler.apply` and :meth:`.Styler.applymap` now raise errors if wrong format CSS is passed on render (:issue:`39660`) -- :meth:`.Styler.format` adds keyword argument ``escape`` for optional HTML escaping (:issue:`40437`) +- :meth:`.Styler.format` adds keyword argument ``escape_html`` for optional HTML escaping (:issue:`40437` :issue:`41619`) - :meth:`.Styler.background_gradient` now allows the ability to supply a specific gradient map (:issue:`22727`) - :meth:`.Styler.clear` now clears :attr:`Styler.hidden_index` and :attr:`Styler.hidden_columns` as well (:issue:`40484`) - Builtin highlighting methods in :class:`Styler` have a more consistent signature and css customisability (:issue:`40242`) From d7e51a4c0d7f8d71e2fa4cd7b78d3f7f86ad32ab Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 25 May 2021 16:11:54 +0200 Subject: [PATCH 04/12] change arg signature to escape="html" --- pandas/io/formats/style.py | 10 ++++----- pandas/io/formats/style_render.py | 23 ++++++++++---------- pandas/tests/io/formats/style/test_format.py | 10 ++++----- 3 files changed, 22 insertions(+), 21 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 4d9d1164a80f9..ef6ee321a2165 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -119,9 +119,9 @@ class Styler(StylerRenderer): .. versionadded:: 1.3.0 - escape_html : bool, default False - Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display - strings with HTML-safe sequences. + escape : str, optional + Use "html" to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` + in cell display strings with HTML-safe sequences. ... versionadded:: 1.3.0 @@ -179,7 +179,7 @@ def __init__( uuid_len: int = 5, decimal: str = ".", thousands: str | None = None, - escape_html: bool = False, + escape: str | None = None, ): super().__init__( data=data, @@ -198,7 +198,7 @@ def __init__( formatter=None, precision=precision, na_rep=na_rep, - escape_html=escape_html, + escape=escape, decimal=decimal, thousands=thousands, ) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 1114a4c3d553b..b5e81be248c1c 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -40,7 +40,7 @@ import pandas.core.common as com jinja2 = import_optional_dependency("jinja2", extra="DataFrame.style requires jinja2.") -from markupsafe import escape as escape_html_f # markupsafe is jinja2 dependency +from markupsafe import escape as escape_html # markupsafe is jinja2 dependency BaseFormatter = Union[str, Callable] ExtFormatter = Union[BaseFormatter, Dict[Any, Optional[BaseFormatter]]] @@ -457,7 +457,7 @@ def format( precision: int | None = None, decimal: str = ".", thousands: str | None = None, - escape_html: bool = False, + escape: str | None = None, ) -> StylerRenderer: """ Format the text display value of cells. @@ -492,9 +492,10 @@ def format( .. versionadded:: 1.3.0 - escape_html : bool, default False - Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display - string with HTML-safe sequences. Escaping is done before ``formatter``. + escape : str, optional + Use 'html' to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` + in cell display string with HTML-safe sequences. Escaping is done + before ``formatter``. .. versionadded:: 1.3.0 @@ -572,7 +573,7 @@ def format( >>> df = pd.DataFrame([['
', '"A&B"', None]]) >>> s = df.style.format( - ... '{0}', escape_html=True, na_rep="NA" + ... '{0}', escape='html', na_rep="NA" ... ) >>> s.render() ... @@ -589,7 +590,7 @@ def format( decimal == ".", thousands is None, na_rep is None, - escape_html is False, + escape is None, ) ): self._display_funcs.clear() @@ -611,7 +612,7 @@ def format( precision=precision, decimal=decimal, thousands=thousands, - escape_html=escape_html, + escape=escape, ) for ri in ris: self._display_funcs[(ri, ci)] = format_func @@ -776,7 +777,7 @@ def wrapper(x): def _str_escape_html(x): """if escaping html: only use on str, else return input""" if isinstance(x, str): - return escape_html_f(x) + return escape_html(x) return x @@ -786,7 +787,7 @@ def _maybe_wrap_formatter( precision: int | None = None, decimal: str = ".", thousands: str | None = None, - escape_html: bool = False, + escape: str | None = None, ) -> Callable: """ Allows formatters to be expressed as str, callable or None, where None returns @@ -807,7 +808,7 @@ def _maybe_wrap_formatter( raise TypeError(f"'formatter' expected str or callable, got {type(formatter)}") # Replace HTML chars if escaping - if escape_html: + if escape is not None and "html" in escape: func_1 = lambda x: func_0(_str_escape_html(x)) else: func_1 = func_0 diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py index dad9d3aec1cdd..b30b56fa430a7 100644 --- a/pandas/tests/io/formats/style/test_format.py +++ b/pandas/tests/io/formats/style/test_format.py @@ -108,12 +108,12 @@ def test_format_clear(styler): def test_format_escape(): df = DataFrame([['<>&"']]) - s = Styler(df, uuid_len=0).format("X&{0}>X", escape_html=False) + s = Styler(df, uuid_len=0).format("X&{0}>X", escape=None) expected = 'X&<>&">X' assert expected in s.render() # only the value should be escaped before passing to the formatter - s = Styler(df, uuid_len=0).format("X&{0}>X", escape_html=True) + s = Styler(df, uuid_len=0).format("X&{0}>X", escape="html") ex = 'X&<>&">X' assert ex in s.render() @@ -121,7 +121,7 @@ def test_format_escape(): def test_format_escape_na_rep(): # tests the na_rep is not escaped df = DataFrame([['<>&"', None]]) - s = Styler(df, uuid_len=0).format("X&{0}>X", escape_html=True, na_rep="&") + s = Styler(df, uuid_len=0).format("X&{0}>X", escape="html", na_rep="&") ex = 'X&<>&">X' expected2 = '&' assert ex in s.render() @@ -130,11 +130,11 @@ def test_format_escape_na_rep(): def test_format_escape_floats(styler): # test given formatter for number format is not impacted by escape - s = styler.format("{:.1f}", escape_html=True) + s = styler.format("{:.1f}", escape="html") for expected in [">0.0<", ">1.0<", ">-1.2<", ">-0.6<"]: assert expected in s.render() # tests precision of floats is not impacted by escape - s = styler.format(precision=1, escape_html=True) + s = styler.format(precision=1, escape="html") for expected in [">0<", ">1<", ">-1.2<", ">-0.6<"]: assert expected in s.render() From e75cd8389f9ddd4d33327d2197372f638f587171 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 25 May 2021 16:15:41 +0200 Subject: [PATCH 05/12] change arg signature to escape="html" --- pandas/io/formats/style_render.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index b5e81be248c1c..94ef537e8b3b9 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -573,7 +573,7 @@ def format( >>> df = pd.DataFrame([['
', '"A&B"', None]]) >>> s = df.style.format( - ... '{0}', escape='html', na_rep="NA" + ... '{0}', escape="html", na_rep="NA" ... ) >>> s.render() ... From 9c2b6f13bcb62d2f0ed0ed081ed55777ff48551a Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 25 May 2021 16:17:25 +0200 Subject: [PATCH 06/12] remove whats new --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index f37bff747bf66..7c209298c920a 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -214,7 +214,7 @@ Other enhancements - :meth:`.Styler.set_table_styles` amended to optionally allow certain css-string input arguments (:issue:`39564`) - :meth:`.Styler.apply` now more consistently accepts ndarray function returns, i.e. in all cases for ``axis`` is ``0, 1 or None`` (:issue:`39359`) - :meth:`.Styler.apply` and :meth:`.Styler.applymap` now raise errors if wrong format CSS is passed on render (:issue:`39660`) -- :meth:`.Styler.format` adds keyword argument ``escape_html`` for optional HTML escaping (:issue:`40437` :issue:`41619`) +- :meth:`.Styler.format` adds keyword argument ``escape`` for optional HTML escaping (:issue:`40437`) - :meth:`.Styler.background_gradient` now allows the ability to supply a specific gradient map (:issue:`22727`) - :meth:`.Styler.clear` now clears :attr:`Styler.hidden_index` and :attr:`Styler.hidden_columns` as well (:issue:`40484`) - Builtin highlighting methods in :class:`Styler` have a more consistent signature and css customisability (:issue:`40242`) From c1a6f6c63681c7802a2364539d4be5cd7a6303b4 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 25 May 2021 16:21:07 +0200 Subject: [PATCH 07/12] update user guide --- doc/source/user_guide/style.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb index ff85fdde86147..7d8d8e90dfbda 100644 --- a/doc/source/user_guide/style.ipynb +++ b/doc/source/user_guide/style.ipynb @@ -152,7 +152,7 @@ "\n", "Before adding styles it is useful to show that the [Styler][styler] can distinguish the *display* value from the *actual* value. To control the display value, the text is printed in each cell, and we can use the [.format()][formatfunc] method to manipulate this according to a [format spec string][format] or a callable that takes a single value and returns a string. It is possible to define this for the whole table or for individual columns. \n", "\n", - "Additionally, the format function has a **precision** argument to specifically help formatting floats, an **na_rep** argument to display missing data, and an **escape_html** argument to help displaying safe-HTML. The default formatter is configured to adopt pandas' regular `display.precision` option, controllable using `with pd.option_context('display.precision', 2):`\n", + "Additionally, the format function has a **precision** argument to specifically help formatting floats, an **na_rep** argument to display missing data, and an **escape** argument to help displaying safe-HTML. The default formatter is configured to adopt pandas' regular `display.precision` option, controllable using `with pd.option_context('display.precision', 2):`\n", "\n", "Here is an example of using the multiple options to control the formatting generally and with specific column formatters.\n", "\n", @@ -1462,7 +1462,7 @@ "metadata": {}, "outputs": [], "source": [ - "df4.style.format(escape_html=True)" + "df4.style.format(escape=\"html\")" ] }, { @@ -1471,7 +1471,7 @@ "metadata": {}, "outputs": [], "source": [ - "df4.style.format('{}', escape_html=True)" + "df4.style.format('{}', escape=\"html\")" ] }, { From dcbc7e398f5d1c6e130a566cda06d7c7f0beb8b0 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Wed, 26 May 2021 10:25:03 +0200 Subject: [PATCH 08/12] include latex escape function and tests --- pandas/io/formats/style_render.py | 57 +++++++++++++++++--- pandas/tests/io/formats/style/test_format.py | 18 ++++++- 2 files changed, 66 insertions(+), 9 deletions(-) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 94ef537e8b3b9..f7afaa19e07d7 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -459,7 +459,7 @@ def format( thousands: str | None = None, escape: str | None = None, ) -> StylerRenderer: - """ + r""" Format the text display value of cells. Parameters @@ -494,8 +494,11 @@ def format( escape : str, optional Use 'html' to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` - in cell display string with HTML-safe sequences. Escaping is done - before ``formatter``. + in cell display string with HTML-safe sequences. + Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``, + ``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with + LaTeX-safe sequences. + Escaping is done before ``formatter``. .. versionadded:: 1.3.0 @@ -774,10 +777,13 @@ def wrapper(x): return wrapper -def _str_escape_html(x): - """if escaping html: only use on str, else return input""" +def _str_escape(x, escape): + """if escaping: only use on str, else return input""" if isinstance(x, str): - return escape_html(x) + if escape == "html": + return escape_html(x) + elif escape == "latex": + return _escape_latex(x) return x @@ -808,8 +814,8 @@ def _maybe_wrap_formatter( raise TypeError(f"'formatter' expected str or callable, got {type(formatter)}") # Replace HTML chars if escaping - if escape is not None and "html" in escape: - func_1 = lambda x: func_0(_str_escape_html(x)) + if escape == "html" or escape == "latex": + func_1 = lambda x: func_0(_str_escape(x, escape=escape)) else: func_1 = func_0 @@ -1190,3 +1196,38 @@ def _parse_latex_options_strip(value: str | int | float, arg: str) -> str: For example: 'red /* --wrap */ ' --> 'red' """ return str(value).replace(arg, "").replace("/*", "").replace("*/", "").strip() + + +def _escape_latex(s): + r""" + Replace the characters ``&``, ``%``, ``$``, ``#``, ``_``, ``{``, ``}``, + ``~``, ``^``, and ``\`` in the string with LaTeX-safe sequences. + + Use this if you need to display text that might contain such characters in LaTeX. + + Parameters + ---------- + s : str + Input to be escaped + + Return + ------ + str : + Escaped string + """ + return ( + s.replace("\\", "ab2§=§8yz") # rare string for final conversion: avoid \\ clash + .replace("ab2§=§8yz ", "ab2§=§8yz\\space ") # since \backslash gobbles spaces + .replace("&", "\\&") + .replace("%", "\\%") + .replace("$", "\\$") + .replace("#", "\\#") + .replace("_", "\\_") + .replace("{", "\\{") + .replace("}", "\\}") + .replace("~ ", "~\\space ") # since \textasciitilde gobbles spaces + .replace("~", "\\textasciitilde ") + .replace("^ ", "^\\space ") # since \textasciicircum gobbles spaces + .replace("^", "\\textasciicircum ") + .replace("ab2§=§8yz", "\\textbackslash ") + ) diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py index b30b56fa430a7..9d127f31475c1 100644 --- a/pandas/tests/io/formats/style/test_format.py +++ b/pandas/tests/io/formats/style/test_format.py @@ -106,7 +106,7 @@ def test_format_clear(styler): assert (0, 0) not in styler._display_funcs # formatter cleared to default -def test_format_escape(): +def test_format_escape_html(): df = DataFrame([['<>&"']]) s = Styler(df, uuid_len=0).format("X&{0}>X", escape=None) expected = 'X&<>&">X' @@ -118,6 +118,22 @@ def test_format_escape(): assert ex in s.render() +def test_format_escape_latex(): + df = DataFrame([["&%$#_{}~^\\~ ^ \\ "]]) + s = Styler(df, uuid_len=0).format("_{0}_", escape=None) + expected = '_&%$#_{}~^\\~ ^ \\ _' + assert expected in s.render() + + # only the value should be escaped before passing to the formatter + s = Styler(df, uuid_len=0).format("_{0}_", escape="latex") + ex = ( + '_\\&\\%\\$\\#\\_\\{\\}' + "\\textasciitilde \\textasciicircum \\textbackslash \\textasciitilde \\space " + "\\textasciicircum \\space \\textbackslash \\space _" + ) + assert ex in s.render() + + def test_format_escape_na_rep(): # tests the na_rep is not escaped df = DataFrame([['<>&"', None]]) From e0cdf384f69a6bf9288b5864bd2339115058fe48 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Wed, 26 May 2021 10:42:07 +0200 Subject: [PATCH 09/12] better tests --- pandas/tests/io/formats/style/test_format.py | 42 ++++++++++---------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py index 9d127f31475c1..0dae40d32e16e 100644 --- a/pandas/tests/io/formats/style/test_format.py +++ b/pandas/tests/io/formats/style/test_format.py @@ -106,32 +106,30 @@ def test_format_clear(styler): assert (0, 0) not in styler._display_funcs # formatter cleared to default -def test_format_escape_html(): - df = DataFrame([['<>&"']]) - s = Styler(df, uuid_len=0).format("X&{0}>X", escape=None) - expected = 'X&<>&">X' - assert expected in s.render() - - # only the value should be escaped before passing to the formatter - s = Styler(df, uuid_len=0).format("X&{0}>X", escape="html") - ex = 'X&<>&">X' - assert ex in s.render() - +@pytest.mark.parametrize( + "escape, exp", + [ + ("html", "<>&"%$#_{}~^\\~ ^ \\ "), + ( + "latex", + '<>\\&"\\%\\$\\#\\_\\{\\}\\textasciitilde \\textasciicircum ' + "\\textbackslash \\textasciitilde \\space \\textasciicircum \\space " + "\\textbackslash \\space ", + ), + ], +) +def test_format_escape_html(escape, exp): + chars = '<>&"%$#_{}~^\\~ ^ \\ ' + df = DataFrame([[chars]]) -def test_format_escape_latex(): - df = DataFrame([["&%$#_{}~^\\~ ^ \\ "]]) - s = Styler(df, uuid_len=0).format("_{0}_", escape=None) - expected = '_&%$#_{}~^\\~ ^ \\ _' + s = Styler(df, uuid_len=0).format("&{0}&", escape=None) + expected = f'&{chars}&' assert expected in s.render() # only the value should be escaped before passing to the formatter - s = Styler(df, uuid_len=0).format("_{0}_", escape="latex") - ex = ( - '_\\&\\%\\$\\#\\_\\{\\}' - "\\textasciitilde \\textasciicircum \\textbackslash \\textasciitilde \\space " - "\\textasciicircum \\space \\textbackslash \\space _" - ) - assert ex in s.render() + s = Styler(df, uuid_len=0).format("&{0}&", escape=escape) + expected = f'&{exp}&' + assert expected in s.render() def test_format_escape_na_rep(): From 4a7e4e3d187d727223cc01fe5d5f50fd88e1eafb Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Wed, 26 May 2021 10:53:16 +0200 Subject: [PATCH 10/12] docs --- pandas/io/formats/style_render.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index f7afaa19e07d7..c8eb11345cf8c 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -584,6 +584,17 @@ def format( "A&B" NA ... + + Using a ``formatter`` with LaTeX ``escape``. + + >>> df = pd.DataFrame([["123"], ["~ ^"], ["$%#"]]) + >>> s = df.style.format("\\textbf{{{}}}", escape="latex").to_latex() + \begin{tabular}{ll} + {} & {0} \\ + 0 & \textbf{123} \\ + 1 & \textbf{\textasciitilde \space \textasciicircum } \\ + 2 & \textbf{\$\%\#} \\ + \end{tabular} """ if all( ( From bcc41b9eec2d10c8818651ee691a1f48d3b3b92c Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Wed, 26 May 2021 10:55:07 +0200 Subject: [PATCH 11/12] docs --- pandas/io/formats/style.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 09a55a8e1a764..73924631aea5c 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -74,7 +74,7 @@ def _mpl(func: Callable): class Styler(StylerRenderer): - """ + r""" Helps style a DataFrame or Series according to the data with HTML and CSS. Parameters @@ -120,8 +120,11 @@ class Styler(StylerRenderer): .. versionadded:: 1.3.0 escape : str, optional - Use "html" to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` - in cell display strings with HTML-safe sequences. + Use 'html' to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` + in cell display string with HTML-safe sequences. + Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``, + ``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with + LaTeX-safe sequences. ... versionadded:: 1.3.0 From 23bb0daba014f90d917509b867da23862e80e8e9 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Thu, 27 May 2021 09:06:24 +0200 Subject: [PATCH 12/12] validation of escape arg --- pandas/io/formats/style_render.py | 8 ++++++-- pandas/tests/io/formats/style/test_format.py | 12 ++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index c8eb11345cf8c..41733b77cbbd3 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -795,6 +795,10 @@ def _str_escape(x, escape): return escape_html(x) elif escape == "latex": return _escape_latex(x) + else: + raise ValueError( + f"`escape` only permitted in {{'html', 'latex'}}, got {escape}" + ) return x @@ -824,8 +828,8 @@ def _maybe_wrap_formatter( else: raise TypeError(f"'formatter' expected str or callable, got {type(formatter)}") - # Replace HTML chars if escaping - if escape == "html" or escape == "latex": + # Replace chars if escaping + if escape is not None: func_1 = lambda x: func_0(_str_escape(x, escape=escape)) else: func_1 = func_0 diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py index 0dae40d32e16e..77a547098036c 100644 --- a/pandas/tests/io/formats/style/test_format.py +++ b/pandas/tests/io/formats/style/test_format.py @@ -11,6 +11,7 @@ pytest.importorskip("jinja2") from pandas.io.formats.style import Styler +from pandas.io.formats.style_render import _str_escape @pytest.fixture @@ -253,3 +254,14 @@ def test_format_decimal(formatter, thousands, precision): decimal="_", formatter=formatter, thousands=thousands, precision=precision )._translate(True, True) assert "000_123" in result["body"][0][1]["display_value"] + + +def test_str_escape_error(): + msg = "`escape` only permitted in {'html', 'latex'}, got " + with pytest.raises(ValueError, match=msg): + _str_escape("text", "bad_escape") + + with pytest.raises(ValueError, match=msg): + _str_escape("text", []) + + _str_escape(2.00, "bad_escape") # OK since dtype is float