diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 9d788ffcfabe1..330d64ea88b8c 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -111,6 +111,7 @@ Additionally there are specific enhancements to the HTML specific rendering:
- :meth:`.Styler.to_html` introduces keyword arguments ``sparse_index``, ``sparse_columns``, ``bold_headers``, ``caption``, ``max_rows`` and ``max_columns`` (:issue:`41946`, :issue:`43149`, :issue:`42972`).
- :meth:`.Styler.to_html` omits CSSStyle rules for hidden table elements as a performance enhancement (:issue:`43619`)
- Custom CSS classes can now be directly specified without string replacement (:issue:`43686`)
+ - Ability to render hyperlinks automatically via a new ``hyperlinks`` formatting keyword argument (:issue:`45058`)
There are also some LaTeX specific enhancements:
diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py
index d9550f0940376..a196ec60c4012 100644
--- a/pandas/io/formats/style.py
+++ b/pandas/io/formats/style.py
@@ -623,6 +623,7 @@ def to_latex(
| \\sisetup{detect-all = true} *(within {document})*
environment \\usepackage{longtable} if arg is "longtable"
| or any other relevant environment package
+ hyperlinks \\usepackage{hyperref}
===================== ==========================================================
**Cell Styles**
diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py
index dcb1f9a2a70dc..a6b8913b23d9d 100644
--- a/pandas/io/formats/style_render.py
+++ b/pandas/io/formats/style_render.py
@@ -798,6 +798,7 @@ def format(
decimal: str = ".",
thousands: str | None = None,
escape: str | None = None,
+ hyperlinks: str | None = None,
) -> StylerRenderer:
r"""
Format the text display value of cells.
@@ -842,6 +843,13 @@ def format(
.. versionadded:: 1.3.0
+ hyperlinks : {"html", "latex"}, optional
+ Convert string patterns containing https://, http://, ftp:// or www. to
+ HTML tags as clickable URL hyperlinks if "html", or LaTeX \href
+ commands if "latex".
+
+ .. versionadded:: 1.4.0
+
Returns
-------
self : Styler
@@ -958,6 +966,7 @@ def format(
thousands is None,
na_rep is None,
escape is None,
+ hyperlinks is None,
)
):
self._display_funcs.clear()
@@ -980,6 +989,7 @@ def format(
decimal=decimal,
thousands=thousands,
escape=escape,
+ hyperlinks=hyperlinks,
)
for ri in ris:
self._display_funcs[(ri, ci)] = format_func
@@ -996,6 +1006,7 @@ def format_index(
decimal: str = ".",
thousands: str | None = None,
escape: str | None = None,
+ hyperlinks: str | None = None,
) -> StylerRenderer:
r"""
Format the text display value of index labels or column headers.
@@ -1027,6 +1038,10 @@ def format_index(
``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with
LaTeX-safe sequences.
Escaping is done before ``formatter``.
+ hyperlinks : {"html", "latex"}, optional
+ Convert string patterns containing https://, http://, ftp:// or www. to
+ HTML tags as clickable URL hyperlinks if "html", or LaTeX \href
+ commands if "latex".
Returns
-------
@@ -1128,6 +1143,7 @@ def format_index(
thousands is None,
na_rep is None,
escape is None,
+ hyperlinks is None,
)
):
display_funcs_.clear()
@@ -1149,6 +1165,7 @@ def format_index(
decimal=decimal,
thousands=thousands,
escape=escape,
+ hyperlinks=hyperlinks,
)
for idx in [(i, lvl) if axis == 0 else (lvl, i) for i in range(len(obj))]:
@@ -1391,6 +1408,20 @@ def _str_escape(x, escape):
return x
+def _render_href(x, format):
+ """uses regex to detect a common URL pattern and converts to href tag in format."""
+ if isinstance(x, str):
+ if format == "html":
+ href = '{0}'
+ elif format == "latex":
+ href = r"\href{{{0}}}{{{0}}}"
+ else:
+ raise ValueError("``hyperlinks`` format can only be 'html' or 'latex'")
+ pat = r"(https?:\/\/|ftp:\/\/|www.)[\w/\-?=%.]+\.[\w/\-&?=%.]+"
+ return re.sub(pat, lambda m: href.format(m.group(0)), x)
+ return x
+
+
def _maybe_wrap_formatter(
formatter: BaseFormatter | None = None,
na_rep: str | None = None,
@@ -1398,6 +1429,7 @@ def _maybe_wrap_formatter(
decimal: str = ".",
thousands: str | None = None,
escape: str | None = None,
+ hyperlinks: str | None = None,
) -> Callable:
"""
Allows formatters to be expressed as str, callable or None, where None returns
@@ -1431,11 +1463,17 @@ def _maybe_wrap_formatter(
else:
func_2 = func_1
+ # Render links
+ if hyperlinks is not None:
+ func_3 = lambda x: func_2(_render_href(x, format=hyperlinks))
+ else:
+ func_3 = func_2
+
# Replace missing values if na_rep
if na_rep is None:
- return func_2
+ return func_3
else:
- return lambda x: na_rep if isna(x) else func_2(x)
+ return lambda x: na_rep if isna(x) else func_3(x)
def non_reducing_slice(slice_: Subset):
diff --git a/pandas/tests/io/formats/style/test_html.py b/pandas/tests/io/formats/style/test_html.py
index 2143ef40582a5..fad289d5e0d2c 100644
--- a/pandas/tests/io/formats/style/test_html.py
+++ b/pandas/tests/io/formats/style/test_html.py
@@ -764,3 +764,43 @@ def test_hiding_index_columns_multiindex_trimming():
)
assert result == expected
+
+
+@pytest.mark.parametrize("type", ["data", "index"])
+@pytest.mark.parametrize(
+ "text, exp, found",
+ [
+ ("no link, just text", False, ""),
+ ("subdomain not www: sub.web.com", False, ""),
+ ("www subdomain: www.web.com other", True, "www.web.com"),
+ ("scheme full structure: http://www.web.com", True, "http://www.web.com"),
+ ("scheme no top-level: http://www.web", True, "http://www.web"),
+ ("no scheme, no top-level: www.web", False, "www.web"),
+ ("https scheme: https://www.web.com", True, "https://www.web.com"),
+ ("ftp scheme: ftp://www.web", True, "ftp://www.web"),
+ ("subdirectories: www.web.com/directory", True, "www.web.com/directory"),
+ ("Multiple domains: www.1.2.3.4", True, "www.1.2.3.4"),
+ ],
+)
+def test_rendered_links(type, text, exp, found):
+ if type == "data":
+ df = DataFrame([text])
+ styler = df.style.format(hyperlinks="html")
+ else:
+ df = DataFrame([0], index=[text])
+ styler = df.style.format_index(hyperlinks="html")
+
+ rendered = '{0}'.format(found)
+ result = styler.to_html()
+ assert (rendered in result) is exp
+ assert (text in result) is not exp # test conversion done when expected and not
+
+
+def test_multiple_rendered_links():
+ links = ("www.a.b", "http://a.c", "https://a.d", "ftp://a.e")
+ df = DataFrame(["text {} {} text {} {}".format(*links)])
+ result = df.style.format(hyperlinks="html").to_html()
+ href = '{0}'
+ for link in links:
+ assert href.format(link) in result
+ assert href.format("text") not in result
diff --git a/pandas/tests/io/formats/style/test_to_latex.py b/pandas/tests/io/formats/style/test_to_latex.py
index 9c2a364b396b8..0ecf6079044e0 100644
--- a/pandas/tests/io/formats/style/test_to_latex.py
+++ b/pandas/tests/io/formats/style/test_to_latex.py
@@ -845,3 +845,11 @@ def test_latex_hiding_index_columns_multiindex_alignment():
"""
)
assert result == expected
+
+
+def test_rendered_links():
+ # note the majority of testing is done in test_html.py: test_rendered_links
+ # these test only the alternative latex format is functional
+ df = DataFrame(["text www.domain.com text"])
+ result = df.style.format(hyperlinks="latex").to_latex()
+ assert r"text \href{www.domain.com}{www.domain.com} text" in result