23
23
AbstractMethodError ,
24
24
EmptyDataError ,
25
25
)
26
+ from pandas .util ._decorators import doc
26
27
from pandas .util ._exceptions import find_stack_level
27
28
from pandas .util ._validators import check_dtype_backend
28
29
32
33
from pandas .core .indexes .base import Index
33
34
from pandas .core .indexes .multi import MultiIndex
34
35
from pandas .core .series import Series
36
+ from pandas .core .shared_docs import _shared_docs
35
37
36
38
from pandas .io .common import (
37
39
file_exists ,
@@ -363,13 +365,13 @@ def _parse_tfoot_tr(self, table):
363
365
"""
364
366
raise AbstractMethodError (self )
365
367
366
- def _parse_tables (self , doc , match , attrs ):
368
+ def _parse_tables (self , document , match , attrs ):
367
369
"""
368
370
Return all tables from the parsed DOM.
369
371
370
372
Parameters
371
373
----------
372
- doc : the DOM from which to parse the table element.
374
+ document : the DOM from which to parse the table element.
373
375
374
376
match : str or regular expression
375
377
The text to search for in the DOM tree.
@@ -594,9 +596,9 @@ def __init__(self, *args, **kwargs) -> None:
594
596
595
597
self ._strainer = SoupStrainer ("table" )
596
598
597
- def _parse_tables (self , doc , match , attrs ):
599
+ def _parse_tables (self , document , match , attrs ):
598
600
element_name = self ._strainer .name
599
- tables = doc .find_all (element_name , attrs = attrs )
601
+ tables = document .find_all (element_name , attrs = attrs )
600
602
if not tables :
601
603
raise ValueError ("No tables found" )
602
604
@@ -726,7 +728,7 @@ def _parse_td(self, row):
726
728
# <thead> or <tfoot> (see _parse_thead_tr).
727
729
return row .xpath ("./td|./th" )
728
730
729
- def _parse_tables (self , doc , match , kwargs ):
731
+ def _parse_tables (self , document , match , kwargs ):
730
732
pattern = match .pattern
731
733
732
734
# 1. check all descendants for the given pattern and only search tables
@@ -738,7 +740,7 @@ def _parse_tables(self, doc, match, kwargs):
738
740
if kwargs :
739
741
xpath_expr += _build_xpath_expr (kwargs )
740
742
741
- tables = doc .xpath (xpath_expr , namespaces = _re_namespace )
743
+ tables = document .xpath (xpath_expr , namespaces = _re_namespace )
742
744
743
745
tables = self ._handle_hidden_tables (tables , "attrib" )
744
746
if self .displayed_only :
@@ -1026,6 +1028,7 @@ def _parse(
1026
1028
return ret
1027
1029
1028
1030
1031
+ @doc (storage_options = _shared_docs ["storage_options" ])
1029
1032
def read_html (
1030
1033
io : FilePath | ReadBuffer [str ],
1031
1034
* ,
@@ -1096,13 +1099,13 @@ def read_html(
1096
1099
passed to lxml or Beautiful Soup. However, these attributes must be
1097
1100
valid HTML table attributes to work correctly. For example, ::
1098
1101
1099
- attrs = {'id': 'table'}
1102
+ attrs = {{ 'id': 'table'} }
1100
1103
1101
1104
is a valid attribute dictionary because the 'id' HTML tag attribute is
1102
1105
a valid HTML attribute for *any* HTML tag as per `this document
1103
1106
<https://html.spec.whatwg.org/multipage/dom.html#global-attributes>`__. ::
1104
1107
1105
- attrs = {'asdf': 'table'}
1108
+ attrs = {{ 'asdf': 'table'} }
1106
1109
1107
1110
is *not* a valid attribute dictionary because 'asdf' is not a valid
1108
1111
HTML attribute even if it is a valid XML attribute. Valid HTML 4.01
@@ -1144,13 +1147,13 @@ def read_html(
1144
1147
displayed_only : bool, default True
1145
1148
Whether elements with "display: none" should be parsed.
1146
1149
1147
- extract_links : {None, "all", "header", "body", "footer"}
1150
+ extract_links : {{ None, "all", "header", "body", "footer"} }
1148
1151
Table elements in the specified section(s) with <a> tags will have their
1149
1152
href extracted.
1150
1153
1151
1154
.. versionadded:: 1.5.0
1152
1155
1153
- dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable'
1156
+ dtype_backend : {{ 'numpy_nullable', 'pyarrow'} }, default 'numpy_nullable'
1154
1157
Back-end data type applied to the resultant :class:`DataFrame`
1155
1158
(still experimental). Behaviour is as follows:
1156
1159
@@ -1161,6 +1164,10 @@ def read_html(
1161
1164
1162
1165
.. versionadded:: 2.0
1163
1166
1167
+ {storage_options}
1168
+
1169
+ .. versionadded:: 2.1.0
1170
+
1164
1171
Returns
1165
1172
-------
1166
1173
dfs
0 commit comments