diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 3a749708fb526..c65f346f85dff 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -38,6 +38,7 @@ Other enhancements - Let :meth:`DataFrame.to_feather` accept a non-default :class:`Index` and non-string column names (:issue:`51787`) - :class:`api.extensions.ExtensionArray` now has a :meth:`~api.extensions.ExtensionArray.map` method (:issue:`51809`) - Improve error message when having incompatible columns using :meth:`DataFrame.merge` (:issue:`51861`) +- Added to the escape mode "latex-math" preserving without escaping all characters between "\(" and "\)" in formatter (:issue:`51903`) - Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`) - :meth:`DataFrame.applymap` now uses the :meth:`~api.extensions.ExtensionArray.map` method of underlying :class:`api.extensions.ExtensionArray` instances (:issue:`52219`) - :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`). diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 573e2b72e81f9..26405aac7d1c0 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -175,7 +175,11 @@ class Styler(StylerRenderer): in cell display string with HTML-safe sequences. Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``, ``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with - LaTeX-safe sequences. If not given uses ``pandas.options.styler.format.escape``. + LaTeX-safe sequences. Use 'latex-math' to replace the characters + the same way as in 'latex' mode, except for math substrings, + which either are surrounded by two characters ``$`` or start with + the character ``\(`` and end with ``\)``. + If not given uses ``pandas.options.styler.format.escape``. .. versionadded:: 1.3.0 formatter : str, callable, dict, optional diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 317ce93cf3da6..3a3b784c9d4d7 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -988,8 +988,9 @@ def format( ``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with LaTeX-safe sequences. Use 'latex-math' to replace the characters the same way as in 'latex' mode, - except for math substrings, which start and end with ``$``. - Escaping is done before ``formatter``. + except for math substrings, which either are surrounded + by two characters ``$`` or start with the character ``\(`` and + end with ``\)``. Escaping is done before ``formatter``. .. versionadded:: 1.3.0 @@ -1117,7 +1118,8 @@ def format( 2 & \textbf{\$\%\#} \\ \end{tabular} - Using ``escape`` in 'latex-math' mode. + Applying ``escape`` in 'latex-math' mode. In the example below + we enter math mode using the character ``$``. >>> df = pd.DataFrame([[r"$\sum_{i=1}^{10} a_i$ a~b $\alpha \ ... = \frac{\beta}{\zeta^2}$"], ["%#^ $ \$x^2 $"]]) @@ -1129,6 +1131,34 @@ def format( 1 & \%\#\textasciicircum \space $ \$x^2 $ \\ \end{tabular} + We can use the character ``\(`` to enter math mode and the character ``\)`` + to close math mode. + + >>> df = pd.DataFrame([[r"\(\sum_{i=1}^{10} a_i\) a~b \(\alpha \ + ... = \frac{\beta}{\zeta^2}\)"], ["%#^ \( \$x^2 \)"]]) + >>> df.style.format(escape="latex-math").to_latex() + ... # doctest: +SKIP + \begin{tabular}{ll} + & 0 \\ + 0 & \(\sum_{i=1}^{10} a_i\) a\textasciitilde b \(\alpha + = \frac{\beta}{\zeta^2}\) \\ + 1 & \%\#\textasciicircum \space \( \$x^2 \) \\ + \end{tabular} + + If we have in one DataFrame cell a combination of both shorthands + for math formulas, the shorthand with the sign ``$`` will be applied. + + >>> df = pd.DataFrame([[r"\( x^2 \) $x^2$"], \ + ... [r"$\frac{\beta}{\zeta}$ \(\frac{\beta}{\zeta}\)"]]) + >>> df.style.format(escape="latex-math").to_latex() + ... # doctest: +SKIP + \begin{tabular}{ll} + & 0 \\ + 0 & \textbackslash ( x\textasciicircum 2 \textbackslash ) $x^2$ \\ + 1 & $\frac{\beta}{\zeta}$ \textbackslash (\textbackslash + frac\{\textbackslash beta\}\{\textbackslash zeta\}\textbackslash ) \\ + \end{tabular} + Pandas defines a `number-format` pseudo CSS attribute instead of the `.format` method to create `to_excel` permissible formatting. Note that semi-colons are CSS protected characters but used as separators in Excel's format string. @@ -2361,13 +2391,13 @@ def _escape_latex(s): ) -def _escape_latex_math(s): +def _math_mode_with_dollar(s): r""" - All characters between two characters ``$`` are preserved. + All characters in LaTeX math mode are preserved. - The substrings in LaTeX math mode, which start with the character ``$`` - and end with ``$``, are preserved without escaping. Otherwise - regular LaTeX escaping applies. See ``_escape_latex()``. + The substrings in LaTeX math mode, which start with + the character ``$`` and end with ``$``, are preserved + without escaping. Otherwise regular LaTeX escaping applies. Parameters ---------- @@ -2392,3 +2422,75 @@ def _escape_latex_math(s): res.append(_escape_latex(s[pos : len(s)])) return "".join(res).replace(r"rt8§=§7wz", r"\$") + + +def _math_mode_with_parentheses(s): + r""" + All characters in LaTeX math mode are preserved. + + The substrings in LaTeX math mode, which start with + the character ``\(`` and end with ``\)``, are preserved + without escaping. Otherwise regular LaTeX escaping applies. + + Parameters + ---------- + s : str + Input to be escaped + + Return + ------ + str : + Escaped string + """ + s = s.replace(r"\(", r"LEFT§=§6yzLEFT").replace(r"\)", r"RIGHTab5§=§RIGHT") + res = [] + for item in re.split(r"LEFT§=§6yz|ab5§=§RIGHT", s): + if item.startswith("LEFT") and item.endswith("RIGHT"): + res.append(item.replace("LEFT", r"\(").replace("RIGHT", r"\)")) + elif "LEFT" in item and "RIGHT" in item: + res.append( + _escape_latex(item).replace("LEFT", r"\(").replace("RIGHT", r"\)") + ) + else: + res.append( + _escape_latex(item) + .replace("LEFT", r"\textbackslash (") + .replace("RIGHT", r"\textbackslash )") + ) + return "".join(res) + + +def _escape_latex_math(s): + r""" + All characters in LaTeX math mode are preserved. + + The substrings in LaTeX math mode, which either are surrounded + by two characters ``$`` or start with the character ``\(`` and end with ``\)``, + are preserved without escaping. Otherwise regular LaTeX escaping applies. + + Parameters + ---------- + s : str + Input to be escaped + + Return + ------ + str : + Escaped string + """ + s = s.replace(r"\$", r"rt8§=§7wz") + ps_d = re.compile(r"\$.*?\$").search(s, 0) + ps_p = re.compile(r"\(.*?\)").search(s, 0) + mode = [] + if ps_d: + mode.append(ps_d.span()[0]) + if ps_p: + mode.append(ps_p.span()[0]) + if len(mode) == 0: + return _escape_latex(s.replace(r"rt8§=§7wz", r"\$")) + if s[mode[0]] == r"$": + return _math_mode_with_dollar(s.replace(r"rt8§=§7wz", r"\$")) + if s[mode[0] - 1 : mode[0] + 1] == r"\(": + return _math_mode_with_parentheses(s.replace(r"rt8§=§7wz", r"\$")) + else: + return _escape_latex(s.replace(r"rt8§=§7wz", r"\$")) diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py index 0dec614970467..c6e981c684044 100644 --- a/pandas/tests/io/formats/style/test_format.py +++ b/pandas/tests/io/formats/style/test_format.py @@ -192,13 +192,51 @@ def test_format_escape_html(escape, exp): assert styler._translate(True, True)["head"][0][1]["display_value"] == f"&{exp}&" -def test_format_escape_latex_math(): - chars = r"$\frac{1}{2} \$ x^2$ ~%#^" +@pytest.mark.parametrize( + "chars, expected", + [ + ( + r"$ \$&%#_{}~^\ $ &%#_{}~^\ $", + "".join( + [ + r"$ \$&%#_{}~^\ $ ", + r"\&\%\#\_\{\}\textasciitilde \textasciicircum ", + r"\textbackslash \space \$", + ] + ), + ), + ( + r"\( &%#_{}~^\ \) &%#_{}~^\ \(", + "".join( + [ + r"\( &%#_{}~^\ \) ", + r"\&\%\#\_\{\}\textasciitilde \textasciicircum ", + r"\textbackslash \space \textbackslash (", + ] + ), + ), + ( + r"$\&%#_{}^\$", + r"\$\textbackslash \&\%\#\_\{\}\textasciicircum \textbackslash \$", + ), + ( + r"$ \frac{1}{2} $ \( \frac{1}{2} \)", + "".join( + [ + r"$ \frac{1}{2} $", + r" \textbackslash ( \textbackslash frac\{1\}\{2\} \textbackslash )", + ] + ), + ), + ], +) +def test_format_escape_latex_math(chars, expected): + # GH 51903 + # latex-math escape works for each DataFrame cell separately. If we have + # a combination of dollar signs and brackets, the dollar sign would apply. df = DataFrame([[chars]]) - - expected = r"$\frac{1}{2} \$ x^2$ \textasciitilde \%\#\textasciicircum " s = df.style.format("{0}", escape="latex-math") - assert expected == s._translate(True, True)["body"][0][1]["display_value"] + assert s._translate(True, True)["body"][0][1]["display_value"] == expected def test_format_escape_na_rep():