Skip to content

Commit f7f2057

Browse files
Backport PR #54815 on branch 2.1.x (DOC: added docstring for storage_options in read_html) (#54852)
Backport PR #54815: DOC: added docstring for `storage_options` in `read_html` Co-authored-by: Rajat Subhra Mukherjee <[email protected]>
1 parent 11424f8 commit f7f2057

File tree

1 file changed

+17
-10
lines changed

1 file changed

+17
-10
lines changed

pandas/io/html.py

+17-10
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
AbstractMethodError,
2424
EmptyDataError,
2525
)
26+
from pandas.util._decorators import doc
2627
from pandas.util._exceptions import find_stack_level
2728
from pandas.util._validators import check_dtype_backend
2829

@@ -32,6 +33,7 @@
3233
from pandas.core.indexes.base import Index
3334
from pandas.core.indexes.multi import MultiIndex
3435
from pandas.core.series import Series
36+
from pandas.core.shared_docs import _shared_docs
3537

3638
from pandas.io.common import (
3739
file_exists,
@@ -363,13 +365,13 @@ def _parse_tfoot_tr(self, table):
363365
"""
364366
raise AbstractMethodError(self)
365367

366-
def _parse_tables(self, doc, match, attrs):
368+
def _parse_tables(self, document, match, attrs):
367369
"""
368370
Return all tables from the parsed DOM.
369371
370372
Parameters
371373
----------
372-
doc : the DOM from which to parse the table element.
374+
document : the DOM from which to parse the table element.
373375
374376
match : str or regular expression
375377
The text to search for in the DOM tree.
@@ -594,9 +596,9 @@ def __init__(self, *args, **kwargs) -> None:
594596

595597
self._strainer = SoupStrainer("table")
596598

597-
def _parse_tables(self, doc, match, attrs):
599+
def _parse_tables(self, document, match, attrs):
598600
element_name = self._strainer.name
599-
tables = doc.find_all(element_name, attrs=attrs)
601+
tables = document.find_all(element_name, attrs=attrs)
600602
if not tables:
601603
raise ValueError("No tables found")
602604

@@ -726,7 +728,7 @@ def _parse_td(self, row):
726728
# <thead> or <tfoot> (see _parse_thead_tr).
727729
return row.xpath("./td|./th")
728730

729-
def _parse_tables(self, doc, match, kwargs):
731+
def _parse_tables(self, document, match, kwargs):
730732
pattern = match.pattern
731733

732734
# 1. check all descendants for the given pattern and only search tables
@@ -738,7 +740,7 @@ def _parse_tables(self, doc, match, kwargs):
738740
if kwargs:
739741
xpath_expr += _build_xpath_expr(kwargs)
740742

741-
tables = doc.xpath(xpath_expr, namespaces=_re_namespace)
743+
tables = document.xpath(xpath_expr, namespaces=_re_namespace)
742744

743745
tables = self._handle_hidden_tables(tables, "attrib")
744746
if self.displayed_only:
@@ -1026,6 +1028,7 @@ def _parse(
10261028
return ret
10271029

10281030

1031+
@doc(storage_options=_shared_docs["storage_options"])
10291032
def read_html(
10301033
io: FilePath | ReadBuffer[str],
10311034
*,
@@ -1096,13 +1099,13 @@ def read_html(
10961099
passed to lxml or Beautiful Soup. However, these attributes must be
10971100
valid HTML table attributes to work correctly. For example, ::
10981101
1099-
attrs = {'id': 'table'}
1102+
attrs = {{'id': 'table'}}
11001103
11011104
is a valid attribute dictionary because the 'id' HTML tag attribute is
11021105
a valid HTML attribute for *any* HTML tag as per `this document
11031106
<https://html.spec.whatwg.org/multipage/dom.html#global-attributes>`__. ::
11041107
1105-
attrs = {'asdf': 'table'}
1108+
attrs = {{'asdf': 'table'}}
11061109
11071110
is *not* a valid attribute dictionary because 'asdf' is not a valid
11081111
HTML attribute even if it is a valid XML attribute. Valid HTML 4.01
@@ -1144,13 +1147,13 @@ def read_html(
11441147
displayed_only : bool, default True
11451148
Whether elements with "display: none" should be parsed.
11461149
1147-
extract_links : {None, "all", "header", "body", "footer"}
1150+
extract_links : {{None, "all", "header", "body", "footer"}}
11481151
Table elements in the specified section(s) with <a> tags will have their
11491152
href extracted.
11501153
11511154
.. versionadded:: 1.5.0
11521155
1153-
dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable'
1156+
dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default 'numpy_nullable'
11541157
Back-end data type applied to the resultant :class:`DataFrame`
11551158
(still experimental). Behaviour is as follows:
11561159
@@ -1161,6 +1164,10 @@ def read_html(
11611164
11621165
.. versionadded:: 2.0
11631166
1167+
{storage_options}
1168+
1169+
.. versionadded:: 2.1.0
1170+
11641171
Returns
11651172
-------
11661173
dfs

0 commit comments

Comments
 (0)