8
8
import numbers
9
9
import os
10
10
import re
11
+ from typing import Dict , List , Optional , Pattern , Sequence , Union
11
12
13
+ from pandas ._typing import FilePathOrBuffer
12
14
from pandas .compat ._optional import import_optional_dependency
13
15
from pandas .errors import AbstractMethodError , EmptyDataError
14
16
from pandas .util ._decorators import deprecate_nonkeyword_arguments
15
17
16
18
from pandas .core .dtypes .common import is_list_like
17
19
18
20
from pandas .core .construction import create_series_with_explicit_dtype
21
+ from pandas .core .frame import DataFrame
19
22
20
23
from pandas .io .common import is_url , urlopen , validate_header_arg
21
24
from pandas .io .formats .printing import pprint_thing
@@ -924,22 +927,22 @@ def _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs):
924
927
925
928
@deprecate_nonkeyword_arguments (version = "2.0" )
926
929
def read_html (
927
- io ,
928
- match = ".+" ,
929
- flavor = None ,
930
- header = None ,
931
- index_col = None ,
932
- skiprows = None ,
933
- attrs = None ,
934
- parse_dates = False ,
935
- thousands = "," ,
936
- encoding = None ,
937
- decimal = "." ,
938
- converters = None ,
930
+ io : FilePathOrBuffer ,
931
+ match : Union [ str , Pattern ] = ".+" ,
932
+ flavor : Optional [ str ] = None ,
933
+ header : Optional [ Union [ int , Sequence [ int ]]] = None ,
934
+ index_col : Optional [ Union [ int , Sequence [ int ]]] = None ,
935
+ skiprows : Optional [ Union [ int , Sequence [ int ], slice ]] = None ,
936
+ attrs : Optional [ Dict [ str , str ]] = None ,
937
+ parse_dates : bool = False ,
938
+ thousands : Optional [ str ] = "," ,
939
+ encoding : Optional [ str ] = None ,
940
+ decimal : str = "." ,
941
+ converters : Optional [ Dict ] = None ,
939
942
na_values = None ,
940
- keep_default_na = True ,
941
- displayed_only = True ,
942
- ):
943
+ keep_default_na : bool = True ,
944
+ displayed_only : bool = True ,
945
+ ) -> List [ DataFrame ] :
943
946
r"""
944
947
Read HTML tables into a ``list`` of ``DataFrame`` objects.
945
948
@@ -958,26 +961,26 @@ def read_html(
958
961
This value is converted to a regular expression so that there is
959
962
consistent behavior between Beautiful Soup and lxml.
960
963
961
- flavor : str or None
964
+ flavor : str, optional
962
965
The parsing engine to use. 'bs4' and 'html5lib' are synonymous with
963
966
each other, they are both there for backwards compatibility. The
964
967
default of ``None`` tries to use ``lxml`` to parse and if that fails it
965
968
falls back on ``bs4`` + ``html5lib``.
966
969
967
- header : int or list-like or None , optional
970
+ header : int or list-like, optional
968
971
The row (or list of rows for a :class:`~pandas.MultiIndex`) to use to
969
972
make the columns headers.
970
973
971
- index_col : int or list-like or None , optional
974
+ index_col : int or list-like, optional
972
975
The column (or list of columns) to use to create the index.
973
976
974
- skiprows : int or list-like or slice or None , optional
977
+ skiprows : int, list-like or slice, optional
975
978
Number of rows to skip after parsing the column integer. 0-based. If a
976
979
sequence of integers or a slice is given, will skip the rows indexed by
977
980
that sequence. Note that a single element sequence means 'skip the nth
978
981
row' whereas an integer means 'skip n rows'.
979
982
980
- attrs : dict or None , optional
983
+ attrs : dict, optional
981
984
This is a dictionary of attributes that you can pass to use to identify
982
985
the table in the HTML. These are not checked for validity before being
983
986
passed to lxml or Beautiful Soup. However, these attributes must be
@@ -1005,7 +1008,7 @@ def read_html(
1005
1008
thousands : str, optional
1006
1009
Separator to use to parse thousands. Defaults to ``','``.
1007
1010
1008
- encoding : str or None , optional
1011
+ encoding : str, optional
1009
1012
The encoding used to decode the web page. Defaults to ``None``.``None``
1010
1013
preserves the previous encoding behavior, which depends on the
1011
1014
underlying parser library (e.g., the parser library will try to use
0 commit comments