Skip to content

TYP: Add type hints to pd.read_html #34291

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jul 14, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 24 additions & 21 deletions pandas/io/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,17 @@
import numbers
import os
import re
from typing import Dict, List, Optional, Pattern, Sequence, Union

from pandas._typing import FilePathOrBuffer
from pandas.compat._optional import import_optional_dependency
from pandas.errors import AbstractMethodError, EmptyDataError
from pandas.util._decorators import deprecate_nonkeyword_arguments

from pandas.core.dtypes.common import is_list_like

from pandas.core.construction import create_series_with_explicit_dtype
from pandas.core.frame import DataFrame

from pandas.io.common import is_url, urlopen, validate_header_arg
from pandas.io.formats.printing import pprint_thing
Expand Down Expand Up @@ -924,22 +927,22 @@ def _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs):

@deprecate_nonkeyword_arguments(version="2.0")
def read_html(
io,
match=".+",
flavor=None,
header=None,
index_col=None,
skiprows=None,
attrs=None,
parse_dates=False,
thousands=",",
encoding=None,
decimal=".",
converters=None,
io: FilePathOrBuffer,
match: Union[str, Pattern] = ".+",
flavor: Optional[str] = None,
header: Optional[Union[int, Sequence[int]]] = None,
index_col: Optional[Union[int, Sequence[int]]] = None,
skiprows: Optional[Union[int, Sequence[int], slice]] = None,
attrs: Optional[Dict[str, str]] = None,
parse_dates: bool = False,
thousands: Optional[str] = ",",
encoding: Optional[str] = None,
decimal: str = ".",
converters: Optional[Dict] = None,
na_values=None,
keep_default_na=True,
displayed_only=True,
):
keep_default_na: bool = True,
displayed_only: bool = True,
) -> List[DataFrame]:
r"""
Read HTML tables into a ``list`` of ``DataFrame`` objects.

Expand All @@ -958,26 +961,26 @@ def read_html(
This value is converted to a regular expression so that there is
consistent behavior between Beautiful Soup and lxml.

flavor : str or None
flavor : str, optional
The parsing engine to use. 'bs4' and 'html5lib' are synonymous with
each other, they are both there for backwards compatibility. The
default of ``None`` tries to use ``lxml`` to parse and if that fails it
falls back on ``bs4`` + ``html5lib``.

header : int or list-like or None, optional
header : int or list-like, optional
The row (or list of rows for a :class:`~pandas.MultiIndex`) to use to
make the columns headers.

index_col : int or list-like or None, optional
index_col : int or list-like, optional
The column (or list of columns) to use to create the index.

skiprows : int or list-like or slice or None, optional
skiprows : int, list-like or slice, optional
Number of rows to skip after parsing the column integer. 0-based. If a
sequence of integers or a slice is given, will skip the rows indexed by
that sequence. Note that a single element sequence means 'skip the nth
row' whereas an integer means 'skip n rows'.

attrs : dict or None, optional
attrs : dict, optional
This is a dictionary of attributes that you can pass to use to identify
the table in the HTML. These are not checked for validity before being
passed to lxml or Beautiful Soup. However, these attributes must be
Expand Down Expand Up @@ -1005,7 +1008,7 @@ def read_html(
thousands : str, optional
Separator to use to parse thousands. Defaults to ``','``.

encoding : str or None, optional
encoding : str, optional
The encoding used to decode the web page. Defaults to ``None``.``None``
preserves the previous encoding behavior, which depends on the
underlying parser library (e.g., the parser library will try to use
Expand Down