diff --git a/pandas/io/html.py b/pandas/io/html.py index f890ad86519df..acf98a2f83921 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -34,6 +34,7 @@ from pandas import isna from pandas.core.construction import create_series_with_explicit_dtype from pandas.core.indexes.base import Index +from pandas.core.indexes.multi import MultiIndex from pandas.io.common import ( file_exists, @@ -1009,9 +1010,11 @@ def _parse(flavor, io, match, attrs, encoding, displayed_only, extract_links, ** try: df = _data_to_frame(data=table, **kwargs) # Cast MultiIndex header to an Index of tuples when extracting header - # links and replace nan with None. + # links and replace nan with None (therefore can't use mi.to_flat_index()). # This maintains consistency of selection (e.g. df.columns.str[1]) - if extract_links in ("all", "header"): + if extract_links in ("all", "header") and isinstance( + df.columns, MultiIndex + ): df.columns = Index( ((col[0], None if isna(col[1]) else col[1]) for col in df.columns), tupleize_cols=False, diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 045c22f106105..de0d4c1b49ea5 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -1416,3 +1416,18 @@ def test_extract_links_bad(self, spam_data): ) with pytest.raises(ValueError, match=msg): read_html(spam_data, extract_links="incorrect") + + def test_extract_links_all_no_header(self): + # GH 48316 + data = """ + + + + +
+ Google.com +
+ """ + result = self.read_html(data, extract_links="all")[0] + expected = DataFrame([[("Google.com", "https://google.com")]]) + tm.assert_frame_equal(result, expected)