Revert MultiIndex -> Index conversion so as to maintain consistency in the behaviour of read_html

abmyii · abmyii · commit f41897569f7f · 2022-06-18T11:03:53.000+01:00
diff --git a/pandas/io/html.py b/pandas/io/html.py
@@ -186,7 +186,8 @@ class _HtmlFrameParser:
 
     extract_links : {None, "all", "header", "body", "footer"}
         Table elements in the specified section(s) with <a> tags will have their
-        href extracted.
+        href extracted. Note that specifying "header" will result in a
+        :class:`~pandas.MultiIndex`.
 
     .. versionadded:: 1.5.0
 
@@ -210,7 +211,8 @@ class _HtmlFrameParser:
 
     extract_links : {None, "all", "header", "body", "footer"}
         Table elements in the specified section(s) with <a> tags will have their
-        href extracted.
+        href extracted. Note that specifying "header" will result in a
+        :class:`~pandas.MultiIndex`.
 
     .. versionadded:: 1.5.0
 
@@ -1005,9 +1007,7 @@ def _parse(flavor, io, match, attrs, encoding, displayed_only, extract_links, **
     for table in tables:
         try:
             df = _data_to_frame(data=table, **kwargs)
-            # Cast MultiIndex header to an Index of tuples when extracting header
-            # links and replace np.nan with None.
-            # This maintains consistency of selection (e.g. df.columns.str[1])
+            # Replace nan with None for null header links for consistency throughout df.
             if extract_links in ("all", "header"):
                 idx = df.columns.values
                 idx[:] = np.vectorize(
@@ -1136,7 +1136,8 @@ def read_html(
 
     extract_links : {None, "all", "header", "body", "footer"}
         Table elements in the specified section(s) with <a> tags will have their
-        href extracted.
+        href extracted. Note that specifying "header" will result in a
+        :class:`~pandas.MultiIndex`.
 
     .. versionadded:: 1.5.0