Skip to content

Commit 5bfb41f

Browse files
TYP/DOC: add HTMLFlavors type to read_html and related (pandas-dev#55529)
1 parent e61a0a8 commit 5bfb41f

File tree

2 files changed

+8
-4
lines changed

2 files changed

+8
-4
lines changed

pandas/_typing.py

+3
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,9 @@ def closed(self) -> bool:
410410
# read_xml parsers
411411
XMLParsers = Literal["lxml", "etree"]
412412

413+
# read_html flavors
414+
HTMLFlavors = Literal["lxml", "html5lib", "bs4"]
415+
413416
# Interval closed type
414417
IntervalLeftRight = Literal["left", "right"]
415418
IntervalClosedType = Union[IntervalLeftRight, Literal["both", "neither"]]

pandas/io/html.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
BaseBuffer,
5858
DtypeBackend,
5959
FilePath,
60+
HTMLFlavors,
6061
ReadBuffer,
6162
StorageOptions,
6263
)
@@ -889,13 +890,13 @@ def _data_to_frame(**kwargs):
889890
}
890891

891892

892-
def _parser_dispatch(flavor: str | None) -> type[_HtmlFrameParser]:
893+
def _parser_dispatch(flavor: HTMLFlavors | None) -> type[_HtmlFrameParser]:
893894
"""
894895
Choose the parser based on the input flavor.
895896
896897
Parameters
897898
----------
898-
flavor : str
899+
flavor : {{"lxml", "html5lib", "bs4"}} or None
899900
The type of parser to use. This must be a valid backend.
900901
901902
Returns
@@ -1033,7 +1034,7 @@ def read_html(
10331034
io: FilePath | ReadBuffer[str],
10341035
*,
10351036
match: str | Pattern = ".+",
1036-
flavor: str | Sequence[str] | None = None,
1037+
flavor: HTMLFlavors | Sequence[HTMLFlavors] | None = None,
10371038
header: int | Sequence[int] | None = None,
10381039
index_col: int | Sequence[int] | None = None,
10391040
skiprows: int | Sequence[int] | slice | None = None,
@@ -1074,7 +1075,7 @@ def read_html(
10741075
This value is converted to a regular expression so that there is
10751076
consistent behavior between Beautiful Soup and lxml.
10761077
1077-
flavor : str or list-like, optional
1078+
flavor : {{"lxml", "html5lib", "bs4"}} or list-like, optional
10781079
The parsing engine (or list of parsing engines) to use. 'bs4' and
10791080
'html5lib' are synonymous with each other, they are both there for
10801081
backwards compatibility. The default of ``None`` tries to use ``lxml``

0 commit comments

Comments
 (0)