Skip to content

Commit c61c7ba

Browse files
authored
ENH: add LaTeX math mode with parentheses (#51903)
* ENH: add math mode with parentheses * ENH: add math mode with parentheses II * ENH: add math mode with parentheses III * ENH: add an example to latex-math mode and a line to whatsnew * ENH: update docs for Styler: add description latex-math mode to escape * improve code style * add an example to test and correct _escape_latex_math
1 parent 233bd83 commit c61c7ba

File tree

4 files changed

+159
-14
lines changed

4 files changed

+159
-14
lines changed

doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ Other enhancements
3838
- Let :meth:`DataFrame.to_feather` accept a non-default :class:`Index` and non-string column names (:issue:`51787`)
3939
- :class:`api.extensions.ExtensionArray` now has a :meth:`~api.extensions.ExtensionArray.map` method (:issue:`51809`)
4040
- Improve error message when having incompatible columns using :meth:`DataFrame.merge` (:issue:`51861`)
41+
- Added to the escape mode "latex-math" preserving without escaping all characters between "\(" and "\)" in formatter (:issue:`51903`)
4142
- Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`)
4243
- :meth:`DataFrame.applymap` now uses the :meth:`~api.extensions.ExtensionArray.map` method of underlying :class:`api.extensions.ExtensionArray` instances (:issue:`52219`)
4344
- :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`).

pandas/io/formats/style.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,11 @@ class Styler(StylerRenderer):
175175
in cell display string with HTML-safe sequences.
176176
Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``,
177177
``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with
178-
LaTeX-safe sequences. If not given uses ``pandas.options.styler.format.escape``.
178+
LaTeX-safe sequences. Use 'latex-math' to replace the characters
179+
the same way as in 'latex' mode, except for math substrings,
180+
which either are surrounded by two characters ``$`` or start with
181+
the character ``\(`` and end with ``\)``.
182+
If not given uses ``pandas.options.styler.format.escape``.
179183
180184
.. versionadded:: 1.3.0
181185
formatter : str, callable, dict, optional

pandas/io/formats/style_render.py

+110-8
Original file line numberDiff line numberDiff line change
@@ -988,8 +988,9 @@ def format(
988988
``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with
989989
LaTeX-safe sequences.
990990
Use 'latex-math' to replace the characters the same way as in 'latex' mode,
991-
except for math substrings, which start and end with ``$``.
992-
Escaping is done before ``formatter``.
991+
except for math substrings, which either are surrounded
992+
by two characters ``$`` or start with the character ``\(`` and
993+
end with ``\)``. Escaping is done before ``formatter``.
993994
994995
.. versionadded:: 1.3.0
995996
@@ -1117,7 +1118,8 @@ def format(
11171118
2 & \textbf{\$\%\#} \\
11181119
\end{tabular}
11191120
1120-
Using ``escape`` in 'latex-math' mode.
1121+
Applying ``escape`` in 'latex-math' mode. In the example below
1122+
we enter math mode using the character ``$``.
11211123
11221124
>>> df = pd.DataFrame([[r"$\sum_{i=1}^{10} a_i$ a~b $\alpha \
11231125
... = \frac{\beta}{\zeta^2}$"], ["%#^ $ \$x^2 $"]])
@@ -1129,6 +1131,34 @@ def format(
11291131
1 & \%\#\textasciicircum \space $ \$x^2 $ \\
11301132
\end{tabular}
11311133
1134+
We can use the character ``\(`` to enter math mode and the character ``\)``
1135+
to close math mode.
1136+
1137+
>>> df = pd.DataFrame([[r"\(\sum_{i=1}^{10} a_i\) a~b \(\alpha \
1138+
... = \frac{\beta}{\zeta^2}\)"], ["%#^ \( \$x^2 \)"]])
1139+
>>> df.style.format(escape="latex-math").to_latex()
1140+
... # doctest: +SKIP
1141+
\begin{tabular}{ll}
1142+
& 0 \\
1143+
0 & \(\sum_{i=1}^{10} a_i\) a\textasciitilde b \(\alpha
1144+
= \frac{\beta}{\zeta^2}\) \\
1145+
1 & \%\#\textasciicircum \space \( \$x^2 \) \\
1146+
\end{tabular}
1147+
1148+
If we have in one DataFrame cell a combination of both shorthands
1149+
for math formulas, the shorthand with the sign ``$`` will be applied.
1150+
1151+
>>> df = pd.DataFrame([[r"\( x^2 \) $x^2$"], \
1152+
... [r"$\frac{\beta}{\zeta}$ \(\frac{\beta}{\zeta}\)"]])
1153+
>>> df.style.format(escape="latex-math").to_latex()
1154+
... # doctest: +SKIP
1155+
\begin{tabular}{ll}
1156+
& 0 \\
1157+
0 & \textbackslash ( x\textasciicircum 2 \textbackslash ) $x^2$ \\
1158+
1 & $\frac{\beta}{\zeta}$ \textbackslash (\textbackslash
1159+
frac\{\textbackslash beta\}\{\textbackslash zeta\}\textbackslash ) \\
1160+
\end{tabular}
1161+
11321162
Pandas defines a `number-format` pseudo CSS attribute instead of the `.format`
11331163
method to create `to_excel` permissible formatting. Note that semi-colons are
11341164
CSS protected characters but used as separators in Excel's format string.
@@ -2361,13 +2391,13 @@ def _escape_latex(s):
23612391
)
23622392

23632393

2364-
def _escape_latex_math(s):
2394+
def _math_mode_with_dollar(s):
23652395
r"""
2366-
All characters between two characters ``$`` are preserved.
2396+
All characters in LaTeX math mode are preserved.
23672397
2368-
The substrings in LaTeX math mode, which start with the character ``$``
2369-
and end with ``$``, are preserved without escaping. Otherwise
2370-
regular LaTeX escaping applies. See ``_escape_latex()``.
2398+
The substrings in LaTeX math mode, which start with
2399+
the character ``$`` and end with ``$``, are preserved
2400+
without escaping. Otherwise regular LaTeX escaping applies.
23712401
23722402
Parameters
23732403
----------
@@ -2392,3 +2422,75 @@ def _escape_latex_math(s):
23922422

23932423
res.append(_escape_latex(s[pos : len(s)]))
23942424
return "".join(res).replace(r"rt8§=§7wz", r"\$")
2425+
2426+
2427+
def _math_mode_with_parentheses(s):
2428+
r"""
2429+
All characters in LaTeX math mode are preserved.
2430+
2431+
The substrings in LaTeX math mode, which start with
2432+
the character ``\(`` and end with ``\)``, are preserved
2433+
without escaping. Otherwise regular LaTeX escaping applies.
2434+
2435+
Parameters
2436+
----------
2437+
s : str
2438+
Input to be escaped
2439+
2440+
Return
2441+
------
2442+
str :
2443+
Escaped string
2444+
"""
2445+
s = s.replace(r"\(", r"LEFT§=§6yzLEFT").replace(r"\)", r"RIGHTab5§=§RIGHT")
2446+
res = []
2447+
for item in re.split(r"LEFT§=§6yz|ab5§=§RIGHT", s):
2448+
if item.startswith("LEFT") and item.endswith("RIGHT"):
2449+
res.append(item.replace("LEFT", r"\(").replace("RIGHT", r"\)"))
2450+
elif "LEFT" in item and "RIGHT" in item:
2451+
res.append(
2452+
_escape_latex(item).replace("LEFT", r"\(").replace("RIGHT", r"\)")
2453+
)
2454+
else:
2455+
res.append(
2456+
_escape_latex(item)
2457+
.replace("LEFT", r"\textbackslash (")
2458+
.replace("RIGHT", r"\textbackslash )")
2459+
)
2460+
return "".join(res)
2461+
2462+
2463+
def _escape_latex_math(s):
2464+
r"""
2465+
All characters in LaTeX math mode are preserved.
2466+
2467+
The substrings in LaTeX math mode, which either are surrounded
2468+
by two characters ``$`` or start with the character ``\(`` and end with ``\)``,
2469+
are preserved without escaping. Otherwise regular LaTeX escaping applies.
2470+
2471+
Parameters
2472+
----------
2473+
s : str
2474+
Input to be escaped
2475+
2476+
Return
2477+
------
2478+
str :
2479+
Escaped string
2480+
"""
2481+
s = s.replace(r"\$", r"rt8§=§7wz")
2482+
ps_d = re.compile(r"\$.*?\$").search(s, 0)
2483+
ps_p = re.compile(r"\(.*?\)").search(s, 0)
2484+
mode = []
2485+
if ps_d:
2486+
mode.append(ps_d.span()[0])
2487+
if ps_p:
2488+
mode.append(ps_p.span()[0])
2489+
if len(mode) == 0:
2490+
return _escape_latex(s.replace(r"rt8§=§7wz", r"\$"))
2491+
if s[mode[0]] == r"$":
2492+
return _math_mode_with_dollar(s.replace(r"rt8§=§7wz", r"\$"))
2493+
if s[mode[0] - 1 : mode[0] + 1] == r"\(":
2494+
return _math_mode_with_parentheses(s.replace(r"rt8§=§7wz", r"\$"))
2495+
else:
2496+
return _escape_latex(s.replace(r"rt8§=§7wz", r"\$"))

pandas/tests/io/formats/style/test_format.py

+43-5
Original file line numberDiff line numberDiff line change
@@ -192,13 +192,51 @@ def test_format_escape_html(escape, exp):
192192
assert styler._translate(True, True)["head"][0][1]["display_value"] == f"&{exp}&"
193193

194194

195-
def test_format_escape_latex_math():
196-
chars = r"$\frac{1}{2} \$ x^2$ ~%#^"
195+
@pytest.mark.parametrize(
196+
"chars, expected",
197+
[
198+
(
199+
r"$ \$&%#_{}~^\ $ &%#_{}~^\ $",
200+
"".join(
201+
[
202+
r"$ \$&%#_{}~^\ $ ",
203+
r"\&\%\#\_\{\}\textasciitilde \textasciicircum ",
204+
r"\textbackslash \space \$",
205+
]
206+
),
207+
),
208+
(
209+
r"\( &%#_{}~^\ \) &%#_{}~^\ \(",
210+
"".join(
211+
[
212+
r"\( &%#_{}~^\ \) ",
213+
r"\&\%\#\_\{\}\textasciitilde \textasciicircum ",
214+
r"\textbackslash \space \textbackslash (",
215+
]
216+
),
217+
),
218+
(
219+
r"$\&%#_{}^\$",
220+
r"\$\textbackslash \&\%\#\_\{\}\textasciicircum \textbackslash \$",
221+
),
222+
(
223+
r"$ \frac{1}{2} $ \( \frac{1}{2} \)",
224+
"".join(
225+
[
226+
r"$ \frac{1}{2} $",
227+
r" \textbackslash ( \textbackslash frac\{1\}\{2\} \textbackslash )",
228+
]
229+
),
230+
),
231+
],
232+
)
233+
def test_format_escape_latex_math(chars, expected):
234+
# GH 51903
235+
# latex-math escape works for each DataFrame cell separately. If we have
236+
# a combination of dollar signs and brackets, the dollar sign would apply.
197237
df = DataFrame([[chars]])
198-
199-
expected = r"$\frac{1}{2} \$ x^2$ \textasciitilde \%\#\textasciicircum "
200238
s = df.style.format("{0}", escape="latex-math")
201-
assert expected == s._translate(True, True)["body"][0][1]["display_value"]
239+
assert s._translate(True, True)["body"][0][1]["display_value"] == expected
202240

203241

204242
def test_format_escape_na_rep():

0 commit comments

Comments
 (0)