Skip to content

Commit 2f6d682

Browse files
benjaminarjunWillAyd
authored andcommitted
Add option for DataFrame.to_html() to render URL data as links (#2679) (#23715)
1 parent b7ee829 commit 2f6d682

File tree

8 files changed

+117
-8
lines changed

8 files changed

+117
-8
lines changed

doc/source/io.rst

+22
Original file line numberDiff line numberDiff line change
@@ -2596,6 +2596,28 @@ table CSS classes. Note that these classes are *appended* to the existing
25962596
25972597
print(df.to_html(classes=['awesome_table_class', 'even_more_awesome_class']))
25982598
2599+
The ``render_links`` argument provides the ability to add hyperlinks to cells
2600+
that contain URLs.
2601+
2602+
.. versionadded:: 0.24
2603+
2604+
.. ipython:: python
2605+
2606+
url_df = pd.DataFrame({
2607+
'name': ['Python', 'Pandas'],
2608+
'url': ['https://www.python.org/', 'http://pandas.pydata.org']})
2609+
print(url_df.to_html(render_links=True))
2610+
2611+
.. ipython:: python
2612+
:suppress:
2613+
2614+
write_html(url_df, 'render_links', render_links=True)
2615+
2616+
HTML:
2617+
2618+
.. raw:: html
2619+
:file: _static/render_links.html
2620+
25992621
Finally, the ``escape`` argument allows you to control whether the
26002622
"<", ">" and "&" characters escaped in the resulting HTML (by default it is
26012623
``True``). So to get the HTML without escaped characters pass ``escape=False``

doc/source/whatsnew/v0.24.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ New features
2727
- :meth:`DataFrame.corr` and :meth:`Series.corr` now accept a callable for generic calculation methods of correlation, e.g. histogram intersection (:issue:`22684`)
2828
- :func:`DataFrame.to_string` now accepts ``decimal`` as an argument, allowing the user to specify which decimal separator should be used in the output. (:issue:`23614`)
2929
- :func:`DataFrame.read_feather` now accepts ``columns`` as an argument, allowing the user to specify which columns should be read. (:issue:`24025`)
30+
- :func:`DataFrame.to_html` now accepts ``render_links`` as an argument, allowing the user to generate HTML with links to any URLs that appear in the DataFrame.
31+
See the :ref:`section on writing HTML <io.html>` in the IO docs for example usage. (:issue:`2679`)
3032

3133
.. _whatsnew_0240.values_api:
3234

pandas/core/frame.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -2044,8 +2044,8 @@ def to_html(self, buf=None, columns=None, col_space=None, header=True,
20442044
index=True, na_rep='NaN', formatters=None, float_format=None,
20452045
sparsify=None, index_names=True, justify=None, max_rows=None,
20462046
max_cols=None, show_dimensions=False, decimal='.',
2047-
bold_rows=True, classes=None, escape=True,
2048-
notebook=False, border=None, table_id=None):
2047+
bold_rows=True, classes=None, escape=True, notebook=False,
2048+
border=None, table_id=None, render_links=False):
20492049
"""
20502050
Render a DataFrame as an HTML table.
20512051
%(shared_params)s
@@ -2067,6 +2067,12 @@ def to_html(self, buf=None, columns=None, col_space=None, header=True,
20672067
A css id is included in the opening `<table>` tag if specified.
20682068
20692069
.. versionadded:: 0.23.0
2070+
2071+
render_links : bool, default False
2072+
Convert URLs to HTML links.
2073+
2074+
.. versionadded:: 0.24.0
2075+
20702076
%(returns)s
20712077
See Also
20722078
--------
@@ -2088,7 +2094,8 @@ def to_html(self, buf=None, columns=None, col_space=None, header=True,
20882094
max_rows=max_rows,
20892095
max_cols=max_cols,
20902096
show_dimensions=show_dimensions,
2091-
decimal=decimal, table_id=table_id)
2097+
decimal=decimal, table_id=table_id,
2098+
render_links=render_links)
20922099
# TODO: a generic formatter wld b in DataFrameFormatter
20932100
formatter.to_html(classes=classes, notebook=notebook, border=border)
20942101

pandas/io/formats/format.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,7 @@ def __init__(self, frame, buf=None, columns=None, col_space=None,
383383
justify=None, float_format=None, sparsify=None,
384384
index_names=True, line_width=None, max_rows=None,
385385
max_cols=None, show_dimensions=False, decimal='.',
386-
table_id=None, **kwds):
386+
table_id=None, render_links=False, **kwds):
387387
self.frame = frame
388388
if buf is not None:
389389
self.buf = _expand_user(_stringify_path(buf))
@@ -410,6 +410,7 @@ def __init__(self, frame, buf=None, columns=None, col_space=None,
410410
len(self.frame))
411411
self.show_dimensions = show_dimensions
412412
self.table_id = table_id
413+
self.render_links = render_links
413414

414415
if justify is None:
415416
self.justify = get_option("display.colheader_justify")
@@ -731,7 +732,8 @@ def to_html(self, classes=None, notebook=False, border=None):
731732
"""
732733
from pandas.io.formats.html import HTMLFormatter
733734
html_renderer = HTMLFormatter(self, classes=classes, notebook=notebook,
734-
border=border, table_id=self.table_id)
735+
border=border, table_id=self.table_id,
736+
render_links=self.render_links)
735737
if hasattr(self.buf, 'write'):
736738
html_renderer.write_result(self.buf)
737739
elif isinstance(self.buf, compat.string_types):

pandas/io/formats/html.py

+15-3
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import pandas.core.common as com
1616
from pandas.core.config import get_option
1717

18+
from pandas.io.common import _is_url
1819
from pandas.io.formats.format import (
1920
TableFormatter, buffer_put_lines, get_level_lengths)
2021
from pandas.io.formats.printing import pprint_thing
@@ -25,7 +26,7 @@ class HTMLFormatter(TableFormatter):
2526
indent_delta = 2
2627

2728
def __init__(self, formatter, classes=None, notebook=False, border=None,
28-
table_id=None):
29+
table_id=None, render_links=False):
2930
self.fmt = formatter
3031
self.classes = classes
3132

@@ -40,6 +41,7 @@ def __init__(self, formatter, classes=None, notebook=False, border=None,
4041
border = get_option('display.html.border')
4142
self.border = border
4243
self.table_id = table_id
44+
self.render_links = render_links
4345

4446
@property
4547
def is_truncated(self):
@@ -76,9 +78,19 @@ def _write_cell(self, s, kind='td', indent=0, tags=None):
7678
('>', r'&gt;')])
7779
else:
7880
esc = {}
81+
7982
rs = pprint_thing(s, escape_chars=esc).strip()
80-
self.write(u'{start}{rs}</{kind}>'
81-
.format(start=start_tag, rs=rs, kind=kind), indent)
83+
84+
if self.render_links and _is_url(rs):
85+
rs_unescaped = pprint_thing(s, escape_chars={}).strip()
86+
start_tag += '<a href="{url}" target="_blank">'.format(
87+
url=rs_unescaped)
88+
end_a = '</a>'
89+
else:
90+
end_a = ''
91+
92+
self.write(u'{start}{rs}{end_a}</{kind}>'.format(
93+
start=start_tag, rs=rs, end_a=end_a, kind=kind), indent)
8294

8395
def write_tr(self, line, indent=0, indent_delta=0, header=False,
8496
align=None, tags=None, nindex_levels=0):
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
<table border="1" class="dataframe">
2+
<thead>
3+
<tr style="text-align: right;">
4+
<th></th>
5+
<th>foo</th>
6+
<th>bar</th>
7+
<th>None</th>
8+
</tr>
9+
</thead>
10+
<tbody>
11+
<tr>
12+
<th>0</th>
13+
<td>0</td>
14+
<td>http://pandas.pydata.org/?q1=a&amp;q2=b</td>
15+
<td>pydata.org</td>
16+
</tr>
17+
<tr>
18+
<th>1</th>
19+
<td>0</td>
20+
<td>www.pydata.org</td>
21+
<td>pydata.org</td>
22+
</tr>
23+
</tbody>
24+
</table>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
<table border="1" class="dataframe">
2+
<thead>
3+
<tr style="text-align: right;">
4+
<th></th>
5+
<th>foo</th>
6+
<th>bar</th>
7+
<th>None</th>
8+
</tr>
9+
</thead>
10+
<tbody>
11+
<tr>
12+
<th>0</th>
13+
<td>0</td>
14+
<td><a href="http://pandas.pydata.org/?q1=a&q2=b" target="_blank">http://pandas.pydata.org/?q1=a&amp;q2=b</a></td>
15+
<td>pydata.org</td>
16+
</tr>
17+
<tr>
18+
<th>1</th>
19+
<td>0</td>
20+
<td>www.pydata.org</td>
21+
<td>pydata.org</td>
22+
</tr>
23+
</tbody>
24+
</table>

pandas/tests/io/formats/test_to_html.py

+16
Original file line numberDiff line numberDiff line change
@@ -477,3 +477,19 @@ def test_to_html_float_format_no_fixed_width(self, datapath):
477477
df = DataFrame({'x': [100.0]})
478478
expected = expected_html(datapath, 'gh22270_expected_output')
479479
assert df.to_html(float_format='%.0f') == expected
480+
481+
@pytest.mark.parametrize("render_links, file_name", [
482+
(True, 'render_links_true'),
483+
(False, 'render_links_false'),
484+
])
485+
def test_to_html_render_links(self, render_links, file_name, datapath):
486+
# GH 2679
487+
data = [
488+
[0, 'http://pandas.pydata.org/?q1=a&q2=b', 'pydata.org'],
489+
[0, 'www.pydata.org', 'pydata.org']
490+
]
491+
df = DataFrame(data, columns=['foo', 'bar', None])
492+
493+
result = df.to_html(render_links=render_links)
494+
expected = expected_html(datapath, file_name)
495+
assert result == expected

0 commit comments

Comments
 (0)