Skip to content

Commit 5146769

Browse files
mroeschkemeeseeksmachine
authored andcommitted
Backport PR pandas-dev#48334: BUG: read_html(extract_links=all) with no header
1 parent 5987b63 commit 5146769

File tree

2 files changed

+20
-2
lines changed

2 files changed

+20
-2
lines changed

pandas/io/html.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
from pandas import isna
3535
from pandas.core.construction import create_series_with_explicit_dtype
3636
from pandas.core.indexes.base import Index
37+
from pandas.core.indexes.multi import MultiIndex
3738

3839
from pandas.io.common import (
3940
file_exists,
@@ -1009,9 +1010,11 @@ def _parse(flavor, io, match, attrs, encoding, displayed_only, extract_links, **
10091010
try:
10101011
df = _data_to_frame(data=table, **kwargs)
10111012
# Cast MultiIndex header to an Index of tuples when extracting header
1012-
# links and replace nan with None.
1013+
# links and replace nan with None (therefore can't use mi.to_flat_index()).
10131014
# This maintains consistency of selection (e.g. df.columns.str[1])
1014-
if extract_links in ("all", "header"):
1015+
if extract_links in ("all", "header") and isinstance(
1016+
df.columns, MultiIndex
1017+
):
10151018
df.columns = Index(
10161019
((col[0], None if isna(col[1]) else col[1]) for col in df.columns),
10171020
tupleize_cols=False,

pandas/tests/io/test_html.py

+15
Original file line numberDiff line numberDiff line change
@@ -1416,3 +1416,18 @@ def test_extract_links_bad(self, spam_data):
14161416
)
14171417
with pytest.raises(ValueError, match=msg):
14181418
read_html(spam_data, extract_links="incorrect")
1419+
1420+
def test_extract_links_all_no_header(self):
1421+
# GH 48316
1422+
data = """
1423+
<table>
1424+
<tr>
1425+
<td>
1426+
<a href='https://google.com'>Google.com</a>
1427+
</td>
1428+
</tr>
1429+
</table>
1430+
"""
1431+
result = self.read_html(data, extract_links="all")[0]
1432+
expected = DataFrame([[("Google.com", "https://google.com")]])
1433+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)