diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 3a6d3cc2141be..e09f3e2753ee6 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -281,6 +281,7 @@ Other enhancements - Added new argument ``engine`` to :func:`read_json` to support parsing JSON with pyarrow by specifying ``engine="pyarrow"`` (:issue:`48893`) - Added support for SQLAlchemy 2.0 (:issue:`40686`) - :class:`Index` set operations :meth:`Index.union`, :meth:`Index.intersection`, :meth:`Index.difference`, and :meth:`Index.symmetric_difference` now support ``sort=True``, which will always return a sorted result, unlike the default ``sort=None`` which does not sort in some cases (:issue:`25151`) +- Added new escape mode "latex-math" to avoid escaping "$" in formatter (:issue:`50040`) .. --------------------------------------------------------------------------- .. _whatsnew_200.notable_bug_fixes: diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index dd751075647d8..3f0366f33a94b 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -873,7 +873,7 @@ def register_converter_cb(key) -> None: "format.escape", None, styler_escape, - validator=is_one_of_factory([None, "html", "latex"]), + validator=is_one_of_factory([None, "html", "latex", "latex-math"]), ) cf.register_option( diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index c5262b9f52fc7..69cc1e9f1f7ae 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -985,6 +985,8 @@ def format( Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``, ``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with LaTeX-safe sequences. + Use 'latex-math' to replace the characters the same way as in 'latex' mode, + except for math substrings, which start and end with ``$``. Escaping is done before ``formatter``. .. versionadded:: 1.3.0 @@ -1101,18 +1103,30 @@ def format( NA ... - Using a ``formatter`` with LaTeX ``escape``. + Using a ``formatter`` with ``escape`` in 'latex' mode. >>> df = pd.DataFrame([["123"], ["~ ^"], ["$%#"]]) >>> df.style.format("\\textbf{{{}}}", escape="latex").to_latex() ... # doctest: +SKIP \begin{tabular}{ll} - {} & {0} \\ + & 0 \\ 0 & \textbf{123} \\ 1 & \textbf{\textasciitilde \space \textasciicircum } \\ 2 & \textbf{\$\%\#} \\ \end{tabular} + Using ``escape`` in 'latex-math' mode. + + >>> df = pd.DataFrame([[r"$\sum_{i=1}^{10} a_i$ a~b $\alpha \ + ... = \frac{\beta}{\zeta^2}$"], ["%#^ $ \$x^2 $"]]) + >>> df.style.format(escape="latex-math").to_latex() + ... # doctest: +SKIP + \begin{tabular}{ll} + & 0 \\ + 0 & $\sum_{i=1}^{10} a_i$ a\textasciitilde b $\alpha = \frac{\beta}{\zeta^2}$ \\ + 1 & \%\#\textasciicircum \space $ \$x^2 $ \\ + \end{tabular} + Pandas defines a `number-format` pseudo CSS attribute instead of the `.format` method to create `to_excel` permissible formatting. Note that semi-colons are CSS protected characters but used as separators in Excel's format string. @@ -1739,9 +1753,12 @@ def _str_escape(x, escape): return escape_html(x) elif escape == "latex": return _escape_latex(x) + elif escape == "latex-math": + return _escape_latex_math(x) else: raise ValueError( - f"`escape` only permitted in {{'html', 'latex'}}, got {escape}" + f"`escape` only permitted in {{'html', 'latex', 'latex-math'}}, \ +got {escape}" ) return x @@ -2340,3 +2357,36 @@ def _escape_latex(s): .replace("^", "\\textasciicircum ") .replace("ab2§=§8yz", "\\textbackslash ") ) + + +def _escape_latex_math(s): + r""" + All characters between two characters ``$`` are preserved. + + The substrings in LaTeX math mode, which start with the character ``$`` + and end with ``$``, are preserved without escaping. Otherwise + regular LaTeX escaping applies. See ``_escape_latex()``. + + Parameters + ---------- + s : str + Input to be escaped + + Return + ------ + str : + Escaped string + """ + s = s.replace(r"\$", r"rt8§=§7wz") + pattern = re.compile(r"\$.*?\$") + pos = 0 + ps = pattern.search(s, pos) + res = [] + while ps: + res.append(_escape_latex(s[pos : ps.span()[0]])) + res.append(ps.group()) + pos = ps.span()[1] + ps = pattern.search(s, pos) + + res.append(_escape_latex(s[pos : len(s)])) + return "".join(res).replace(r"rt8§=§7wz", r"\$") diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py index 0b114ea128b0b..0dec614970467 100644 --- a/pandas/tests/io/formats/style/test_format.py +++ b/pandas/tests/io/formats/style/test_format.py @@ -192,6 +192,15 @@ def test_format_escape_html(escape, exp): assert styler._translate(True, True)["head"][0][1]["display_value"] == f"&{exp}&" +def test_format_escape_latex_math(): + chars = r"$\frac{1}{2} \$ x^2$ ~%#^" + df = DataFrame([[chars]]) + + expected = r"$\frac{1}{2} \$ x^2$ \textasciitilde \%\#\textasciicircum " + s = df.style.format("{0}", escape="latex-math") + assert expected == s._translate(True, True)["body"][0][1]["display_value"] + + def test_format_escape_na_rep(): # tests the na_rep is not escaped df = DataFrame([['<>&"', None]]) @@ -359,7 +368,7 @@ def test_format_decimal(formatter, thousands, precision, func, col): def test_str_escape_error(): - msg = "`escape` only permitted in {'html', 'latex'}, got " + msg = "`escape` only permitted in {'html', 'latex', 'latex-math'}, got " with pytest.raises(ValueError, match=msg): _str_escape("text", "bad_escape") @@ -403,6 +412,9 @@ def test_format_options(): with option_context("styler.format.escape", "latex"): ctx_with_op = df.style._translate(True, True) assert ctx_with_op["body"][1][3]["display_value"] == "\\&\\textasciitilde " + with option_context("styler.format.escape", "latex-math"): + ctx_with_op = df.style._translate(True, True) + assert ctx_with_op["body"][1][3]["display_value"] == "\\&\\textasciitilde " # test option: formatter with option_context("styler.format.formatter", {"int": "{:,.2f}"}):