Skip to content

Commit 3c19380

Browse files
authored
ENH: add render_links for Styler.to_html formatting (#45058)
1 parent 4b77cbe commit 3c19380

File tree

5 files changed

+90
-2
lines changed

5 files changed

+90
-2
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ Additionally there are specific enhancements to the HTML specific rendering:
111111
- :meth:`.Styler.to_html` introduces keyword arguments ``sparse_index``, ``sparse_columns``, ``bold_headers``, ``caption``, ``max_rows`` and ``max_columns`` (:issue:`41946`, :issue:`43149`, :issue:`42972`).
112112
- :meth:`.Styler.to_html` omits CSSStyle rules for hidden table elements as a performance enhancement (:issue:`43619`)
113113
- Custom CSS classes can now be directly specified without string replacement (:issue:`43686`)
114+
- Ability to render hyperlinks automatically via a new ``hyperlinks`` formatting keyword argument (:issue:`45058`)
114115

115116
There are also some LaTeX specific enhancements:
116117

pandas/io/formats/style.py

+1
Original file line numberDiff line numberDiff line change
@@ -623,6 +623,7 @@ def to_latex(
623623
| \\sisetup{detect-all = true} *(within {document})*
624624
environment \\usepackage{longtable} if arg is "longtable"
625625
| or any other relevant environment package
626+
hyperlinks \\usepackage{hyperref}
626627
===================== ==========================================================
627628
628629
**Cell Styles**

pandas/io/formats/style_render.py

+40-2
Original file line numberDiff line numberDiff line change
@@ -798,6 +798,7 @@ def format(
798798
decimal: str = ".",
799799
thousands: str | None = None,
800800
escape: str | None = None,
801+
hyperlinks: str | None = None,
801802
) -> StylerRenderer:
802803
r"""
803804
Format the text display value of cells.
@@ -842,6 +843,13 @@ def format(
842843
843844
.. versionadded:: 1.3.0
844845
846+
hyperlinks : {"html", "latex"}, optional
847+
Convert string patterns containing https://, http://, ftp:// or www. to
848+
HTML <a> tags as clickable URL hyperlinks if "html", or LaTeX \href
849+
commands if "latex".
850+
851+
.. versionadded:: 1.4.0
852+
845853
Returns
846854
-------
847855
self : Styler
@@ -958,6 +966,7 @@ def format(
958966
thousands is None,
959967
na_rep is None,
960968
escape is None,
969+
hyperlinks is None,
961970
)
962971
):
963972
self._display_funcs.clear()
@@ -980,6 +989,7 @@ def format(
980989
decimal=decimal,
981990
thousands=thousands,
982991
escape=escape,
992+
hyperlinks=hyperlinks,
983993
)
984994
for ri in ris:
985995
self._display_funcs[(ri, ci)] = format_func
@@ -996,6 +1006,7 @@ def format_index(
9961006
decimal: str = ".",
9971007
thousands: str | None = None,
9981008
escape: str | None = None,
1009+
hyperlinks: str | None = None,
9991010
) -> StylerRenderer:
10001011
r"""
10011012
Format the text display value of index labels or column headers.
@@ -1027,6 +1038,10 @@ def format_index(
10271038
``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with
10281039
LaTeX-safe sequences.
10291040
Escaping is done before ``formatter``.
1041+
hyperlinks : {"html", "latex"}, optional
1042+
Convert string patterns containing https://, http://, ftp:// or www. to
1043+
HTML <a> tags as clickable URL hyperlinks if "html", or LaTeX \href
1044+
commands if "latex".
10301045
10311046
Returns
10321047
-------
@@ -1128,6 +1143,7 @@ def format_index(
11281143
thousands is None,
11291144
na_rep is None,
11301145
escape is None,
1146+
hyperlinks is None,
11311147
)
11321148
):
11331149
display_funcs_.clear()
@@ -1149,6 +1165,7 @@ def format_index(
11491165
decimal=decimal,
11501166
thousands=thousands,
11511167
escape=escape,
1168+
hyperlinks=hyperlinks,
11521169
)
11531170

11541171
for idx in [(i, lvl) if axis == 0 else (lvl, i) for i in range(len(obj))]:
@@ -1391,13 +1408,28 @@ def _str_escape(x, escape):
13911408
return x
13921409

13931410

1411+
def _render_href(x, format):
1412+
"""uses regex to detect a common URL pattern and converts to href tag in format."""
1413+
if isinstance(x, str):
1414+
if format == "html":
1415+
href = '<a href="{0}" target="_blank">{0}</a>'
1416+
elif format == "latex":
1417+
href = r"\href{{{0}}}{{{0}}}"
1418+
else:
1419+
raise ValueError("``hyperlinks`` format can only be 'html' or 'latex'")
1420+
pat = r"(https?:\/\/|ftp:\/\/|www.)[\w/\-?=%.]+\.[\w/\-&?=%.]+"
1421+
return re.sub(pat, lambda m: href.format(m.group(0)), x)
1422+
return x
1423+
1424+
13941425
def _maybe_wrap_formatter(
13951426
formatter: BaseFormatter | None = None,
13961427
na_rep: str | None = None,
13971428
precision: int | None = None,
13981429
decimal: str = ".",
13991430
thousands: str | None = None,
14001431
escape: str | None = None,
1432+
hyperlinks: str | None = None,
14011433
) -> Callable:
14021434
"""
14031435
Allows formatters to be expressed as str, callable or None, where None returns
@@ -1431,11 +1463,17 @@ def _maybe_wrap_formatter(
14311463
else:
14321464
func_2 = func_1
14331465

1466+
# Render links
1467+
if hyperlinks is not None:
1468+
func_3 = lambda x: func_2(_render_href(x, format=hyperlinks))
1469+
else:
1470+
func_3 = func_2
1471+
14341472
# Replace missing values if na_rep
14351473
if na_rep is None:
1436-
return func_2
1474+
return func_3
14371475
else:
1438-
return lambda x: na_rep if isna(x) else func_2(x)
1476+
return lambda x: na_rep if isna(x) else func_3(x)
14391477

14401478

14411479
def non_reducing_slice(slice_: Subset):

pandas/tests/io/formats/style/test_html.py

+40
Original file line numberDiff line numberDiff line change
@@ -764,3 +764,43 @@ def test_hiding_index_columns_multiindex_trimming():
764764
)
765765

766766
assert result == expected
767+
768+
769+
@pytest.mark.parametrize("type", ["data", "index"])
770+
@pytest.mark.parametrize(
771+
"text, exp, found",
772+
[
773+
("no link, just text", False, ""),
774+
("subdomain not www: sub.web.com", False, ""),
775+
("www subdomain: www.web.com other", True, "www.web.com"),
776+
("scheme full structure: http://www.web.com", True, "http://www.web.com"),
777+
("scheme no top-level: http://www.web", True, "http://www.web"),
778+
("no scheme, no top-level: www.web", False, "www.web"),
779+
("https scheme: https://www.web.com", True, "https://www.web.com"),
780+
("ftp scheme: ftp://www.web", True, "ftp://www.web"),
781+
("subdirectories: www.web.com/directory", True, "www.web.com/directory"),
782+
("Multiple domains: www.1.2.3.4", True, "www.1.2.3.4"),
783+
],
784+
)
785+
def test_rendered_links(type, text, exp, found):
786+
if type == "data":
787+
df = DataFrame([text])
788+
styler = df.style.format(hyperlinks="html")
789+
else:
790+
df = DataFrame([0], index=[text])
791+
styler = df.style.format_index(hyperlinks="html")
792+
793+
rendered = '<a href="{0}" target="_blank">{0}</a>'.format(found)
794+
result = styler.to_html()
795+
assert (rendered in result) is exp
796+
assert (text in result) is not exp # test conversion done when expected and not
797+
798+
799+
def test_multiple_rendered_links():
800+
links = ("www.a.b", "http://a.c", "https://a.d", "ftp://a.e")
801+
df = DataFrame(["text {} {} text {} {}".format(*links)])
802+
result = df.style.format(hyperlinks="html").to_html()
803+
href = '<a href="{0}" target="_blank">{0}</a>'
804+
for link in links:
805+
assert href.format(link) in result
806+
assert href.format("text") not in result

pandas/tests/io/formats/style/test_to_latex.py

+8
Original file line numberDiff line numberDiff line change
@@ -845,3 +845,11 @@ def test_latex_hiding_index_columns_multiindex_alignment():
845845
"""
846846
)
847847
assert result == expected
848+
849+
850+
def test_rendered_links():
851+
# note the majority of testing is done in test_html.py: test_rendered_links
852+
# these test only the alternative latex format is functional
853+
df = DataFrame(["text www.domain.com text"])
854+
result = df.style.format(hyperlinks="latex").to_latex()
855+
assert r"text \href{www.domain.com}{www.domain.com} text" in result

0 commit comments

Comments
 (0)