From c6b38f651b39f60bbd7599a7bcb5186903f0de27 Mon Sep 17 00:00:00 2001
From: Matheus Felipe <matheusfelipeog@protonmail.com>
Date: Mon, 11 Sep 2023 01:56:55 -0300
Subject: [PATCH 1/3] Fix flavor param with incorrect type hint in read_html

refs:
- https://github.com/pandas-dev/pandas/issues/55059
- https://github.com/pandas-dev/pandas/pull/55076
---
 pandas-stubs/io/html.pyi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas-stubs/io/html.pyi b/pandas-stubs/io/html.pyi
index 6f8f15c86..94cef340b 100644
--- a/pandas-stubs/io/html.pyi
+++ b/pandas-stubs/io/html.pyi
@@ -28,7 +28,7 @@ def read_html(
     io: FilePath | ReadBuffer[str],
     *,
     match: str | Pattern = ...,
-    flavor: str | None = ...,
+    flavor: str | Sequence[str] | None = ...,
     header: int | Sequence[int] | None = ...,
     index_col: int | Sequence[int] | list[HashableT1] | None = ...,
     skiprows: int | Sequence[int] | slice | None = ...,

From 3063be8ec9434b32cd023642c7d82a1f94e743ff Mon Sep 17 00:00:00 2001
From: Matheus Felipe <matheusfelipeog@protonmail.com>
Date: Thu, 2 Nov 2023 04:12:28 -0300
Subject: [PATCH 2/3] Add HTMLFlavors type to read_html

ref: https://github.com/pandas-dev/pandas/pull/55529
---
 pandas-stubs/_typing.pyi | 3 +++
 pandas-stubs/io/html.pyi | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi
index f8fbc6136..a948519e6 100644
--- a/pandas-stubs/_typing.pyi
+++ b/pandas-stubs/_typing.pyi
@@ -577,6 +577,9 @@ ParseDatesArg: TypeAlias = (
 # read_xml parsers
 XMLParsers: TypeAlias = Literal["lxml", "etree"]
 
+# read_html flavors
+HTMLFlavors: TypeAlias = Literal["lxml", "html5lib", "bs4"]
+
 # Any plain Python or numpy function
 Function: TypeAlias = np.ufunc | Callable[..., Any]
 # Use a distinct HashableT in shared types to avoid conflicts with
diff --git a/pandas-stubs/io/html.pyi b/pandas-stubs/io/html.pyi
index 94cef340b..839864d80 100644
--- a/pandas-stubs/io/html.pyi
+++ b/pandas-stubs/io/html.pyi
@@ -21,6 +21,7 @@ from pandas._typing import (
     HashableT3,
     HashableT4,
     HashableT5,
+    HTMLFlavors,
     ReadBuffer,
 )
 
@@ -28,7 +29,7 @@ def read_html(
     io: FilePath | ReadBuffer[str],
     *,
     match: str | Pattern = ...,
-    flavor: str | Sequence[str] | None = ...,
+    flavor: HTMLFlavors | Sequence[HTMLFlavors] | None = ...,
     header: int | Sequence[int] | None = ...,
     index_col: int | Sequence[int] | list[HashableT1] | None = ...,
     skiprows: int | Sequence[int] | slice | None = ...,

From ff5faeac6e9f9a7d5918c1282b9891e164a3fa3e Mon Sep 17 00:00:00 2001
From: Matheus Felipe <matheusfelipeog@protonmail.com>
Date: Thu, 2 Nov 2023 04:17:44 -0300
Subject: [PATCH 3/3] Add tests and new dev dependencies

Added:
- tests to check HTMLFlavors type in read_html flavor arg;
- set beautifulsoup4 and html5lib as dev dependencies. They are used
by the respective flavors in read_html.
---
 pyproject.toml   | 4 +++-
 tests/test_io.py | 6 ++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index e4eca4719..e3f058c83 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,7 +48,7 @@ pre-commit = ">=2.19.0"
 black = ">=23.3.0"
 isort = ">=5.12.0"
 openpyxl = ">=3.0.10"
-tables = { version = ">=3.8.0" , python = "<4"}  # 3.8.0 depends on blosc2 which caps python to <4
+tables = { version = ">=3.8.0",  python = "<4"}  # 3.8.0 depends on blosc2 which caps python to <4
 lxml = ">=4.9.1"
 pyreadstat = ">=1.2.0"
 xlrd = ">=2.0.1"
@@ -62,6 +62,8 @@ scipy = ">=1.9.1"
 SQLAlchemy = ">=2.0.12"
 types-python-dateutil = ">=2.8.19"
 numexpr = "<2.8.5"  # https://github.com/pandas-dev/pandas/issues/54449
+beautifulsoup4 = ">=4.12.2"
+html5lib = ">=1.1"
 
 [build-system]
 requires = ["poetry-core>=1.0.0"]
diff --git a/tests/test_io.py b/tests/test_io.py
index ca6274071..55eea5edc 100644
--- a/tests/test_io.py
+++ b/tests/test_io.py
@@ -1242,6 +1242,12 @@ def test_read_html():
     with ensure_clean() as path:
         check(assert_type(DF.to_html(path), None), type(None))
         check(assert_type(read_html(path), list[DataFrame]), list)
+        check(assert_type(read_html(path, flavor=None), list[DataFrame]), list)
+        check(assert_type(read_html(path, flavor="bs4"), list[DataFrame]), list)
+        check(assert_type(read_html(path, flavor=["bs4"]), list[DataFrame]), list)
+        check(
+            assert_type(read_html(path, flavor=["bs4", "lxml"]), list[DataFrame]), list
+        )
 
 
 def test_csv_quoting():