Skip to content

Backport PR #30553 on branch 1.0.x (TST/BUG: fix incorrectly-passing Exception in test_html) #31124

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions pandas/io/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,9 +591,14 @@ def _setup_build_doc(self):
def _build_doc(self):
from bs4 import BeautifulSoup

return BeautifulSoup(
self._setup_build_doc(), features="html5lib", from_encoding=self.encoding
)
bdoc = self._setup_build_doc()
if isinstance(bdoc, bytes) and self.encoding is not None:
udoc = bdoc.decode(self.encoding)
from_encoding = None
else:
udoc = bdoc
from_encoding = self.encoding
return BeautifulSoup(udoc, features="html5lib", from_encoding=from_encoding)


def _build_xpath_expr(attrs) -> str:
Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/io/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -1158,9 +1158,9 @@ def test_displayed_only(self, displayed_only, exp0, exp1):
assert len(dfs) == 1 # Should not parse hidden table

def test_encode(self, html_encoding_file):
_, encoding = os.path.splitext(os.path.basename(html_encoding_file))[0].split(
"_"
)
base_path = os.path.basename(html_encoding_file)
root = os.path.splitext(base_path)[0]
_, encoding = root.split("_")

try:
with open(html_encoding_file, "rb") as fobj:
Expand All @@ -1183,7 +1183,7 @@ def test_encode(self, html_encoding_file):
if is_platform_windows():
if "16" in encoding or "32" in encoding:
pytest.skip()
raise
raise

def test_parse_failure_unseekable(self):
# Issue #17975
Expand Down