Skip to content

ENH: add LaTeX math mode with parentheses #51903

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 72 additions & 17 deletions pandas/io/formats/style_render.py
Original file line number Diff line number Diff line change
Expand Up @@ -1117,7 +1117,8 @@ def format(
2 & \textbf{\$\%\#} \\
\end{tabular}

Using ``escape`` in 'latex-math' mode.
Applying ``escape`` in 'latex-math' mode. In the example below
we enter math mode using the character ``$``.

>>> df = pd.DataFrame([[r"$\sum_{i=1}^{10} a_i$ a~b $\alpha \
... = \frac{\beta}{\zeta^2}$"], ["%#^ $ \$x^2 $"]])
Expand All @@ -1129,6 +1130,20 @@ def format(
1 & \%\#\textasciicircum \space $ \$x^2 $ \\
\end{tabular}

We can use the character ``\(`` to enter math mode and the character ``\)``
to close math mode.

>>> df = pd.DataFrame([[r"\(\sum_{i=1}^{10} a_i\) a~b \(\alpha \
... = \frac{\beta}{\zeta^2}\)"], ["%#^ \( \$x^2 \)"]])
>>> df.style.format(escape="latex-math").to_latex()
... # doctest: +SKIP
\begin{tabular}{ll}
& 0 \\
0 & \(\sum_{i=1}^{10} a_i\) a\textasciitilde b \(\alpha
= \frac{\beta}{\zeta^2}\) \\
1 & \%\#\textasciicircum \space \( \$x^2 \) \\
\end{tabular}

Pandas defines a `number-format` pseudo CSS attribute instead of the `.format`
method to create `to_excel` permissible formatting. Note that semi-colons are
CSS protected characters but used as separators in Excel's format string.
Expand Down Expand Up @@ -2344,7 +2359,8 @@ def _escape_latex(s):
Escaped string
"""
return (
s.replace("\\", "ab2§=§8yz") # rare string for final conversion: avoid \\ clash
s.replace("\\ ", "ab2§=§8yz")
.replace("\\", "ab2§=§8yz") # rare string for final conversion: avoid \\ clash
.replace("ab2§=§8yz ", "ab2§=§8yz\\space ") # since \backslash gobbles spaces
.replace("&", "\\&")
.replace("%", "\\%")
Expand All @@ -2363,11 +2379,11 @@ def _escape_latex(s):

def _escape_latex_math(s):
r"""
All characters between two characters ``$`` are preserved.
All characters in LaTeX math mode are preserved.

The substrings in LaTeX math mode, which start with the character ``$``
and end with ``$``, are preserved without escaping. Otherwise
regular LaTeX escaping applies. See ``_escape_latex()``.
The substrings in LaTeX math mode, which either are surrounded
by two characters ``$`` or start with the character ``\(`` and end with ``\)``,
are preserved without escaping. Otherwise regular LaTeX escaping applies.

Parameters
----------
Expand All @@ -2379,16 +2395,55 @@ def _escape_latex_math(s):
str :
Escaped string
"""
s = s.replace(r"\$", r"rt8§=§7wz")
pattern = re.compile(r"\$.*?\$")
pos = 0
ps = pattern.search(s, pos)
res = []
while ps:
res.append(_escape_latex(s[pos : ps.span()[0]]))
res.append(ps.group())
pos = ps.span()[1]

def _math_mode_with_dollar(s):
s = s.replace(r"\$", r"rt8§=§7wz")
pattern = re.compile(r"\$.*?\$")
pos = 0
ps = pattern.search(s, pos)
res = []
while ps:
res.append(_escape_latex(s[pos : ps.span()[0]]))
res.append(ps.group())
pos = ps.span()[1]
ps = pattern.search(s, pos)

res.append(_escape_latex(s[pos : len(s)]))
return "".join(res).replace(r"rt8§=§7wz", r"\$")

def _math_mode_with_parentheses(s):
s = s.replace(r"\(", r"LEFT§=§6yzLEFT").replace(r"\)", r"RIGHTab5§=§RIGHT")
res = []
for item in re.split(r"LEFT§=§6yz|ab5§=§RIGHT", s):
if item.startswith("LEFT") and item.endswith("RIGHT"):
res.append(item.replace("LEFT", r"\(").replace("RIGHT", r"\)"))
elif "LEFT" in item and "RIGHT" in item:
res.append(
_escape_latex(item).replace("LEFT", r"\(").replace("RIGHT", r"\)")
)
else:
res.append(
_escape_latex(item)
.replace("LEFT", r"\textbackslash (")
.replace("RIGHT", r"\textbackslash )")
)
return "".join(res)

res.append(_escape_latex(s[pos : len(s)]))
return "".join(res).replace(r"rt8§=§7wz", r"\$")
s = s.replace(r"\$", r"rt8§=§7wz")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand this code.
You are replacing the string \$ with a uuid string, first.
Then, you are searching for a pattern (r"\$.*?\$") that cannot exist, since it was replaced.

Then in line 2490 you are replacing the same string s again with the same uuid, but this is unnecessary since it has already done this in line 2481.
I think your tests pass and this does the correct thing but I think some of these lines are redundant and do nothing for the overall effect??

Copy link
Contributor Author

@natmokval natmokval Mar 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for the comment.
It’s right, the replacement in line 2490 has a mistake. The right one would be the reverse replacement: s.replace(r"rt8§=§7wz", r"\$")

I can explain what I am trying to do. When I checked the function _escape_latex_math I noticed that for a string like r"$&%#^$" which contains only one sign "$" and only one combination "\$" I got a wrong result, because the function processed this string in math mode. By doing the replacement in line 2481 I exclude from the consideration the string “\$” to avoid confusing it with “$”. Then I get the correct result and do the reverse replacement. If we don’t have a combination of one sign "$" and one sign "\$" we don’t need to do this check, but I prefer to leave it.

I corrected my mistake a made a new commit. I also added an example for this case in the test.

pattern_d = re.compile(r"\$.*?\$")
pattern_p = re.compile(r"\\(.*?\\)")
pos_d = 0
pos_p = 0
ps_d = pattern_d.search(s, pos_d)
ps_p = pattern_p.search(s, pos_p)
mode = []
if ps_d:
mode.append(ps_d.span()[0])
if ps_p:
mode.append(ps_p.span()[0])
if len(mode) == 0:
return _escape_latex(s.replace(r"\$", r"rt8§=§7wz"))
if s[min(mode)] == r"$":
return _math_mode_with_dollar(s.replace(r"\$", r"rt8§=§7wz"))
else:
return _math_mode_with_parentheses(s.replace(r"\$", r"rt8§=§7wz"))
46 changes: 40 additions & 6 deletions pandas/tests/io/formats/style/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def test_format_clear(styler, func, attr, kwargs):
"latex",
'<>\\&"\\%\\$\\#\\_\\{\\}\\textasciitilde \\textasciicircum '
"\\textbackslash \\textasciitilde \\space \\textasciicircum \\space "
"\\textbackslash \\space ",
"\\textbackslash ",
),
],
)
Expand All @@ -192,13 +192,47 @@ def test_format_escape_html(escape, exp):
assert styler._translate(True, True)["head"][0][1]["display_value"] == f"&{exp}&"


def test_format_escape_latex_math():
chars = r"$\frac{1}{2} \$ x^2$ ~%#^"
@pytest.mark.parametrize(
"chars, expected",
[
(
r"$ \$&%#_{}~^\ $ &%#_{}~^\ $",
"".join(
[
r"$ \$&%#_{}~^\ $ ",
r"\&\%\#\_\{\}\textasciitilde \textasciicircum \textbackslash \$",
]
),
),
(
r"\( &%#_{}~^\ \) &%#_{}~^\ \(",
"".join(
[
r"\( &%#_{}~^\ \) ",
r"\&\%\#\_\{\}\textasciitilde \textasciicircum ",
r"\textbackslash \textbackslash (",
]
),
),
(
r"$ \frac{1}{2} $ \( \frac{1}{2} \)",
"".join(
[
r"$ \frac{1}{2} $",
r" \textbackslash ( \textbackslash frac\{1\}\{2\} \textbackslash )",
]
),
),
],
)
def test_format_escape_latex_math(chars, expected):
# GH 51903
# latex-math escape works for each DataFrame cell separately.
# If we have a combination of dollar signs and brackets,
# the sign which occurs first would apply.
df = DataFrame([[chars]])

expected = r"$\frac{1}{2} \$ x^2$ \textasciitilde \%\#\textasciicircum "
s = df.style.format("{0}", escape="latex-math")
assert expected == s._translate(True, True)["body"][0][1]["display_value"]
assert s._translate(True, True)["body"][0][1]["display_value"] == expected


def test_format_escape_na_rep():
Expand Down