Skip to content

Commit ad8541b

Browse files
committed
BUG: Improved thread safety for read_html() GH16928
1 parent 7ffe7fc commit ad8541b

File tree

3 files changed

+34
-2
lines changed

3 files changed

+34
-2
lines changed

doc/source/whatsnew/v0.21.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,8 @@ I/O
164164

165165
- Bug in :func:`read_stata` where value labels could not be read when using an iterator (:issue:`16923`)
166166

167+
- Bug in :func:`read_html` importcheck fails when run concurrently (:issue:`16928`)
168+
167169
Plotting
168170
^^^^^^^^
169171
- Bug in plotting methods using ``secondary_y`` and ``fontsize`` not setting secondary axis font size (:issue:`12565`)

pandas/io/html.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,6 @@ def _importers():
3737
if _IMPORTS:
3838
return
3939

40-
_IMPORTS = True
41-
4240
global _HAS_BS4, _HAS_LXML, _HAS_HTML5LIB
4341

4442
try:
@@ -59,6 +57,8 @@ def _importers():
5957
except ImportError:
6058
pass
6159

60+
_IMPORTS = True
61+
6262

6363
#############
6464
# READ HTML #

pandas/tests/io/test_html.py

+30
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,19 @@
33
import glob
44
import os
55
import re
6+
import threading
67
import warnings
78

89
try:
910
from importlib import import_module
1011
except ImportError:
1112
import_module = __import__
1213

14+
try:
15+
from importlib import reload
16+
except ImportError:
17+
pass
18+
1319
from distutils.version import LooseVersion
1420

1521
import pytest
@@ -22,6 +28,7 @@
2228
from pandas.compat import (map, zip, StringIO, string_types, BytesIO,
2329
is_platform_windows, PY3)
2430
from pandas.io.common import URLError, urlopen, file_path_to_url
31+
import pandas.io.html
2532
from pandas.io.html import read_html
2633
from pandas._libs.parsers import ParserError
2734

@@ -931,3 +938,26 @@ def test_same_ordering():
931938
dfs_lxml = read_html(filename, index_col=0, flavor=['lxml'])
932939
dfs_bs4 = read_html(filename, index_col=0, flavor=['bs4'])
933940
assert_framelist_equal(dfs_lxml, dfs_bs4)
941+
942+
943+
class ErrorThread(threading.Thread):
944+
def run(self):
945+
try:
946+
super(ErrorThread, self).run()
947+
except Exception as e:
948+
self.err = e
949+
else:
950+
self.err = None
951+
952+
953+
@pytest.mark.slow
954+
def test_importcheck_thread_safety():
955+
reload(pandas.io.html)
956+
filename = os.path.join(DATA_PATH, 'valid_markup.html')
957+
helper_thread1 = ErrorThread(target=read_html, args=(filename,))
958+
helper_thread2 = ErrorThread(target=read_html, args=(filename,))
959+
helper_thread1.start()
960+
helper_thread2.start()
961+
while(helper_thread1.is_alive() or helper_thread2.is_alive()):
962+
pass
963+
assert None is helper_thread1.err is helper_thread2.err

0 commit comments

Comments
 (0)