TYP: Add type hints to pd.read_html (#34291)

topper-123 · web-flow · commit 93a43838d607 · 2020-07-14T13:15:06.000-04:00
diff --git a/pandas/io/html.py b/pandas/io/html.py
@@ -8,14 +8,17 @@
 import numbers
 import os
 import re
+from typing import Dict, List, Optional, Pattern, Sequence, Union
 
+from pandas._typing import FilePathOrBuffer
 from pandas.compat._optional import import_optional_dependency
 from pandas.errors import AbstractMethodError, EmptyDataError
 from pandas.util._decorators import deprecate_nonkeyword_arguments
 
 from pandas.core.dtypes.common import is_list_like
 
 from pandas.core.construction import create_series_with_explicit_dtype
+from pandas.core.frame import DataFrame
 
 from pandas.io.common import is_url, urlopen, validate_header_arg
 from pandas.io.formats.printing import pprint_thing
@@ -924,22 +927,22 @@ def _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs):
 
 @deprecate_nonkeyword_arguments(version="2.0")
 def read_html(
-    io,
-    match=".+",
-    flavor=None,
-    header=None,
-    index_col=None,
-    skiprows=None,
-    attrs=None,
-    parse_dates=False,
-    thousands=",",
-    encoding=None,
-    decimal=".",
-    converters=None,
+    io: FilePathOrBuffer,
+    match: Union[str, Pattern] = ".+",
+    flavor: Optional[str] = None,
+    header: Optional[Union[int, Sequence[int]]] = None,
+    index_col: Optional[Union[int, Sequence[int]]] = None,
+    skiprows: Optional[Union[int, Sequence[int], slice]] = None,
+    attrs: Optional[Dict[str, str]] = None,
+    parse_dates: bool = False,
+    thousands: Optional[str] = ",",
+    encoding: Optional[str] = None,
+    decimal: str = ".",
+    converters: Optional[Dict] = None,
     na_values=None,
-    keep_default_na=True,
-    displayed_only=True,
-):
+    keep_default_na: bool = True,
+    displayed_only: bool = True,
+) -> List[DataFrame]:
     r"""
     Read HTML tables into a ``list`` of ``DataFrame`` objects.
 
@@ -958,26 +961,26 @@ def read_html(
         This value is converted to a regular expression so that there is
         consistent behavior between Beautiful Soup and lxml.
 
-    flavor : str or None
+    flavor : str, optional
         The parsing engine to use. 'bs4' and 'html5lib' are synonymous with
         each other, they are both there for backwards compatibility. The
         default of ``None`` tries to use ``lxml`` to parse and if that fails it
         falls back on ``bs4`` + ``html5lib``.
 
-    header : int or list-like or None, optional
+    header : int or list-like, optional
         The row (or list of rows for a :class:`~pandas.MultiIndex`) to use to
         make the columns headers.
 
-    index_col : int or list-like or None, optional
+    index_col : int or list-like, optional
         The column (or list of columns) to use to create the index.
 
-    skiprows : int or list-like or slice or None, optional
+    skiprows : int, list-like or slice, optional
         Number of rows to skip after parsing the column integer. 0-based. If a
         sequence of integers or a slice is given, will skip the rows indexed by
         that sequence.  Note that a single element sequence means 'skip the nth
         row' whereas an integer means 'skip n rows'.
 
-    attrs : dict or None, optional
+    attrs : dict, optional
         This is a dictionary of attributes that you can pass to use to identify
         the table in the HTML. These are not checked for validity before being
         passed to lxml or Beautiful Soup. However, these attributes must be
@@ -1005,7 +1008,7 @@ def read_html(
     thousands : str, optional
         Separator to use to parse thousands. Defaults to ``','``.
 
-    encoding : str or None, optional
+    encoding : str, optional
         The encoding used to decode the web page. Defaults to ``None``.``None``
         preserves the previous encoding behavior, which depends on the
         underlying parser library (e.g., the parser library will try to use