Skip to content

Commit 93a4383

Browse files
authored
TYP: Add type hints to pd.read_html (#34291)
1 parent bfb3715 commit 93a4383

File tree

1 file changed

+24
-21
lines changed

1 file changed

+24
-21
lines changed

pandas/io/html.py

+24-21
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,17 @@
88
import numbers
99
import os
1010
import re
11+
from typing import Dict, List, Optional, Pattern, Sequence, Union
1112

13+
from pandas._typing import FilePathOrBuffer
1214
from pandas.compat._optional import import_optional_dependency
1315
from pandas.errors import AbstractMethodError, EmptyDataError
1416
from pandas.util._decorators import deprecate_nonkeyword_arguments
1517

1618
from pandas.core.dtypes.common import is_list_like
1719

1820
from pandas.core.construction import create_series_with_explicit_dtype
21+
from pandas.core.frame import DataFrame
1922

2023
from pandas.io.common import is_url, urlopen, validate_header_arg
2124
from pandas.io.formats.printing import pprint_thing
@@ -924,22 +927,22 @@ def _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs):
924927

925928
@deprecate_nonkeyword_arguments(version="2.0")
926929
def read_html(
927-
io,
928-
match=".+",
929-
flavor=None,
930-
header=None,
931-
index_col=None,
932-
skiprows=None,
933-
attrs=None,
934-
parse_dates=False,
935-
thousands=",",
936-
encoding=None,
937-
decimal=".",
938-
converters=None,
930+
io: FilePathOrBuffer,
931+
match: Union[str, Pattern] = ".+",
932+
flavor: Optional[str] = None,
933+
header: Optional[Union[int, Sequence[int]]] = None,
934+
index_col: Optional[Union[int, Sequence[int]]] = None,
935+
skiprows: Optional[Union[int, Sequence[int], slice]] = None,
936+
attrs: Optional[Dict[str, str]] = None,
937+
parse_dates: bool = False,
938+
thousands: Optional[str] = ",",
939+
encoding: Optional[str] = None,
940+
decimal: str = ".",
941+
converters: Optional[Dict] = None,
939942
na_values=None,
940-
keep_default_na=True,
941-
displayed_only=True,
942-
):
943+
keep_default_na: bool = True,
944+
displayed_only: bool = True,
945+
) -> List[DataFrame]:
943946
r"""
944947
Read HTML tables into a ``list`` of ``DataFrame`` objects.
945948
@@ -958,26 +961,26 @@ def read_html(
958961
This value is converted to a regular expression so that there is
959962
consistent behavior between Beautiful Soup and lxml.
960963
961-
flavor : str or None
964+
flavor : str, optional
962965
The parsing engine to use. 'bs4' and 'html5lib' are synonymous with
963966
each other, they are both there for backwards compatibility. The
964967
default of ``None`` tries to use ``lxml`` to parse and if that fails it
965968
falls back on ``bs4`` + ``html5lib``.
966969
967-
header : int or list-like or None, optional
970+
header : int or list-like, optional
968971
The row (or list of rows for a :class:`~pandas.MultiIndex`) to use to
969972
make the columns headers.
970973
971-
index_col : int or list-like or None, optional
974+
index_col : int or list-like, optional
972975
The column (or list of columns) to use to create the index.
973976
974-
skiprows : int or list-like or slice or None, optional
977+
skiprows : int, list-like or slice, optional
975978
Number of rows to skip after parsing the column integer. 0-based. If a
976979
sequence of integers or a slice is given, will skip the rows indexed by
977980
that sequence. Note that a single element sequence means 'skip the nth
978981
row' whereas an integer means 'skip n rows'.
979982
980-
attrs : dict or None, optional
983+
attrs : dict, optional
981984
This is a dictionary of attributes that you can pass to use to identify
982985
the table in the HTML. These are not checked for validity before being
983986
passed to lxml or Beautiful Soup. However, these attributes must be
@@ -1005,7 +1008,7 @@ def read_html(
10051008
thousands : str, optional
10061009
Separator to use to parse thousands. Defaults to ``','``.
10071010
1008-
encoding : str or None, optional
1011+
encoding : str, optional
10091012
The encoding used to decode the web page. Defaults to ``None``.``None``
10101013
preserves the previous encoding behavior, which depends on the
10111014
underlying parser library (e.g., the parser library will try to use

0 commit comments

Comments
 (0)