From ba9cc9a3cf14048469a4fea595ff0456ebbf90d4 Mon Sep 17 00:00:00 2001 From: Kian Eliasi Date: Mon, 21 Mar 2022 14:31:17 +0330 Subject: [PATCH 1/3] BUG: url regex in `style_render` does not pass colon and other valid URLs containing some valid characters such as colon in port numbers get cut off when html-formatting. As a workaround, expanded the regex to match a wider variety of URLs. --- pandas/io/formats/style_render.py | 2 +- pandas/tests/io/formats/style/test_html.py | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 0227bb1ef7cc6..4e3f86d21b228 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -1589,7 +1589,7 @@ def _render_href(x, format): href = r"\href{{{0}}}{{{0}}}" else: raise ValueError("``hyperlinks`` format can only be 'html' or 'latex'") - pat = r"(https?:\/\/|ftp:\/\/|www.)[\w/\-?=%.]+\.[\w/\-&?=%.]+" + pat = r"((http|ftp)s?:\/\/|www.)[\w/\-?=%.:@]+\.[\w/\-&?=%.,':;~!@#$*()\[\]]+" return re.sub(pat, lambda m: href.format(m.group(0)), x) return x diff --git a/pandas/tests/io/formats/style/test_html.py b/pandas/tests/io/formats/style/test_html.py index 2abc963525977..bf4f79d0e89e6 100644 --- a/pandas/tests/io/formats/style/test_html.py +++ b/pandas/tests/io/formats/style/test_html.py @@ -778,8 +778,20 @@ def test_hiding_index_columns_multiindex_trimming(): ("no scheme, no top-level: www.web", False, "www.web"), ("https scheme: https://www.web.com", True, "https://www.web.com"), ("ftp scheme: ftp://www.web", True, "ftp://www.web"), + ("ftps scheme: ftps://www.web", True, "ftps://www.web"), ("subdirectories: www.web.com/directory", True, "www.web.com/directory"), ("Multiple domains: www.1.2.3.4", True, "www.1.2.3.4"), + ("with port: http://web.com:80", True, "http://web.com:80"), + ( + "full net_loc scheme: http://user:pass@web.com", + True, + "http://user:pass@web.com", + ), + ( + "with valid special chars: http://web.com/,.':;~!@#$*()[]", + True, + "http://web.com/,.':;~!@#$*()[]", + ), ], ) def test_rendered_links(type, text, exp, found): From 7d7acda558b6e2c61b08ee9d38030791eda18cc4 Mon Sep 17 00:00:00 2001 From: Kian Eliasi Date: Mon, 21 Mar 2022 17:04:46 +0330 Subject: [PATCH 2/3] Add whatsnew entry for #46389 fix --- doc/source/whatsnew/v1.4.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.2.rst b/doc/source/whatsnew/v1.4.2.rst index 4cbb8118055af..1714834fd51b9 100644 --- a/doc/source/whatsnew/v1.4.2.rst +++ b/doc/source/whatsnew/v1.4.2.rst @@ -31,7 +31,7 @@ Bug fixes ~~~~~~~~~ - Fix some cases for subclasses that define their ``_constructor`` properties as general callables (:issue:`46018`) - Fixed "longtable" formatting in :meth:`.Styler.to_latex` when ``column_format`` is given in extended format (:issue:`46037`) -- +- Fix incorrect hyperlink rendering when the url contains colon or other special characters (:issue:`46389`) .. --------------------------------------------------------------------------- From 73e6ff5cf6cdc62f1b7c36885070fb4266d53230 Mon Sep 17 00:00:00 2001 From: Kian Eliasi Date: Thu, 24 Mar 2022 17:57:05 +0430 Subject: [PATCH 3/3] Update whatsnew entry for fix #46389 Co-authored-by: Simon Hawkins --- doc/source/whatsnew/v1.4.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.2.rst b/doc/source/whatsnew/v1.4.2.rst index 1714834fd51b9..13f3e9a0d0a8c 100644 --- a/doc/source/whatsnew/v1.4.2.rst +++ b/doc/source/whatsnew/v1.4.2.rst @@ -31,7 +31,7 @@ Bug fixes ~~~~~~~~~ - Fix some cases for subclasses that define their ``_constructor`` properties as general callables (:issue:`46018`) - Fixed "longtable" formatting in :meth:`.Styler.to_latex` when ``column_format`` is given in extended format (:issue:`46037`) -- Fix incorrect hyperlink rendering when the url contains colon or other special characters (:issue:`46389`) +- Fixed incorrect rendering in :meth:`.Styler.format` with ``hyperlinks="html"`` when the url contains a colon or other special characters (:issue:`46389`) .. ---------------------------------------------------------------------------