diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 9d788ffcfabe1..330d64ea88b8c 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -111,6 +111,7 @@ Additionally there are specific enhancements to the HTML specific rendering: - :meth:`.Styler.to_html` introduces keyword arguments ``sparse_index``, ``sparse_columns``, ``bold_headers``, ``caption``, ``max_rows`` and ``max_columns`` (:issue:`41946`, :issue:`43149`, :issue:`42972`). - :meth:`.Styler.to_html` omits CSSStyle rules for hidden table elements as a performance enhancement (:issue:`43619`) - Custom CSS classes can now be directly specified without string replacement (:issue:`43686`) + - Ability to render hyperlinks automatically via a new ``hyperlinks`` formatting keyword argument (:issue:`45058`) There are also some LaTeX specific enhancements: diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index d9550f0940376..a196ec60c4012 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -623,6 +623,7 @@ def to_latex( | \\sisetup{detect-all = true} *(within {document})* environment \\usepackage{longtable} if arg is "longtable" | or any other relevant environment package + hyperlinks \\usepackage{hyperref} ===================== ========================================================== **Cell Styles** diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index dcb1f9a2a70dc..a6b8913b23d9d 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -798,6 +798,7 @@ def format( decimal: str = ".", thousands: str | None = None, escape: str | None = None, + hyperlinks: str | None = None, ) -> StylerRenderer: r""" Format the text display value of cells. @@ -842,6 +843,13 @@ def format( .. versionadded:: 1.3.0 + hyperlinks : {"html", "latex"}, optional + Convert string patterns containing https://, http://, ftp:// or www. to + HTML tags as clickable URL hyperlinks if "html", or LaTeX \href + commands if "latex". + + .. versionadded:: 1.4.0 + Returns ------- self : Styler @@ -958,6 +966,7 @@ def format( thousands is None, na_rep is None, escape is None, + hyperlinks is None, ) ): self._display_funcs.clear() @@ -980,6 +989,7 @@ def format( decimal=decimal, thousands=thousands, escape=escape, + hyperlinks=hyperlinks, ) for ri in ris: self._display_funcs[(ri, ci)] = format_func @@ -996,6 +1006,7 @@ def format_index( decimal: str = ".", thousands: str | None = None, escape: str | None = None, + hyperlinks: str | None = None, ) -> StylerRenderer: r""" Format the text display value of index labels or column headers. @@ -1027,6 +1038,10 @@ def format_index( ``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with LaTeX-safe sequences. Escaping is done before ``formatter``. + hyperlinks : {"html", "latex"}, optional + Convert string patterns containing https://, http://, ftp:// or www. to + HTML tags as clickable URL hyperlinks if "html", or LaTeX \href + commands if "latex". Returns ------- @@ -1128,6 +1143,7 @@ def format_index( thousands is None, na_rep is None, escape is None, + hyperlinks is None, ) ): display_funcs_.clear() @@ -1149,6 +1165,7 @@ def format_index( decimal=decimal, thousands=thousands, escape=escape, + hyperlinks=hyperlinks, ) for idx in [(i, lvl) if axis == 0 else (lvl, i) for i in range(len(obj))]: @@ -1391,6 +1408,20 @@ def _str_escape(x, escape): return x +def _render_href(x, format): + """uses regex to detect a common URL pattern and converts to href tag in format.""" + if isinstance(x, str): + if format == "html": + href = '{0}' + elif format == "latex": + href = r"\href{{{0}}}{{{0}}}" + else: + raise ValueError("``hyperlinks`` format can only be 'html' or 'latex'") + pat = r"(https?:\/\/|ftp:\/\/|www.)[\w/\-?=%.]+\.[\w/\-&?=%.]+" + return re.sub(pat, lambda m: href.format(m.group(0)), x) + return x + + def _maybe_wrap_formatter( formatter: BaseFormatter | None = None, na_rep: str | None = None, @@ -1398,6 +1429,7 @@ def _maybe_wrap_formatter( decimal: str = ".", thousands: str | None = None, escape: str | None = None, + hyperlinks: str | None = None, ) -> Callable: """ Allows formatters to be expressed as str, callable or None, where None returns @@ -1431,11 +1463,17 @@ def _maybe_wrap_formatter( else: func_2 = func_1 + # Render links + if hyperlinks is not None: + func_3 = lambda x: func_2(_render_href(x, format=hyperlinks)) + else: + func_3 = func_2 + # Replace missing values if na_rep if na_rep is None: - return func_2 + return func_3 else: - return lambda x: na_rep if isna(x) else func_2(x) + return lambda x: na_rep if isna(x) else func_3(x) def non_reducing_slice(slice_: Subset): diff --git a/pandas/tests/io/formats/style/test_html.py b/pandas/tests/io/formats/style/test_html.py index 2143ef40582a5..fad289d5e0d2c 100644 --- a/pandas/tests/io/formats/style/test_html.py +++ b/pandas/tests/io/formats/style/test_html.py @@ -764,3 +764,43 @@ def test_hiding_index_columns_multiindex_trimming(): ) assert result == expected + + +@pytest.mark.parametrize("type", ["data", "index"]) +@pytest.mark.parametrize( + "text, exp, found", + [ + ("no link, just text", False, ""), + ("subdomain not www: sub.web.com", False, ""), + ("www subdomain: www.web.com other", True, "www.web.com"), + ("scheme full structure: http://www.web.com", True, "http://www.web.com"), + ("scheme no top-level: http://www.web", True, "http://www.web"), + ("no scheme, no top-level: www.web", False, "www.web"), + ("https scheme: https://www.web.com", True, "https://www.web.com"), + ("ftp scheme: ftp://www.web", True, "ftp://www.web"), + ("subdirectories: www.web.com/directory", True, "www.web.com/directory"), + ("Multiple domains: www.1.2.3.4", True, "www.1.2.3.4"), + ], +) +def test_rendered_links(type, text, exp, found): + if type == "data": + df = DataFrame([text]) + styler = df.style.format(hyperlinks="html") + else: + df = DataFrame([0], index=[text]) + styler = df.style.format_index(hyperlinks="html") + + rendered = '{0}'.format(found) + result = styler.to_html() + assert (rendered in result) is exp + assert (text in result) is not exp # test conversion done when expected and not + + +def test_multiple_rendered_links(): + links = ("www.a.b", "http://a.c", "https://a.d", "ftp://a.e") + df = DataFrame(["text {} {} text {} {}".format(*links)]) + result = df.style.format(hyperlinks="html").to_html() + href = '{0}' + for link in links: + assert href.format(link) in result + assert href.format("text") not in result diff --git a/pandas/tests/io/formats/style/test_to_latex.py b/pandas/tests/io/formats/style/test_to_latex.py index 9c2a364b396b8..0ecf6079044e0 100644 --- a/pandas/tests/io/formats/style/test_to_latex.py +++ b/pandas/tests/io/formats/style/test_to_latex.py @@ -845,3 +845,11 @@ def test_latex_hiding_index_columns_multiindex_alignment(): """ ) assert result == expected + + +def test_rendered_links(): + # note the majority of testing is done in test_html.py: test_rendered_links + # these test only the alternative latex format is functional + df = DataFrame(["text www.domain.com text"]) + result = df.style.format(hyperlinks="latex").to_latex() + assert r"text \href{www.domain.com}{www.domain.com} text" in result