Skip to content

Commit fe30ac7

Browse files
lithomas1attack68
authored andcommitted
CI: Revive Banklist Tests (pandas-dev#42889)
(cherry picked from commit 9981172)
1 parent d42fdac commit fe30ac7

File tree

2 files changed

+25
-13
lines changed

2 files changed

+25
-13
lines changed

doc/source/user_guide/io.rst

+7-5
Original file line numberDiff line numberDiff line change
@@ -2502,14 +2502,16 @@ Read a URL with no options:
25022502

25032503
.. ipython:: python
25042504
2505-
url = (
2506-
"https://raw.githubusercontent.com/pandas-dev/pandas/master/"
2507-
"pandas/tests/io/data/html/spam.html"
2508-
)
2505+
url = "https://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list"
25092506
dfs = pd.read_html(url)
25102507
dfs
25112508
2512-
Read in the content of the "banklist.html" file and pass it to ``read_html``
2509+
.. note::
2510+
2511+
The data from the above URL changes every Monday so the resulting data above
2512+
and the data below may be slightly different.
2513+
2514+
Read in the content of the file from the above URL and pass it to ``read_html``
25132515
as a string:
25142516

25152517
.. ipython:: python

pandas/tests/io/test_html.py

+18-8
Original file line numberDiff line numberDiff line change
@@ -134,28 +134,38 @@ def test_to_html_compat(self):
134134
res = self.read_html(out, attrs={"class": "dataframe"}, index_col=0)[0]
135135
tm.assert_frame_equal(res, df)
136136

137-
@pytest.mark.xfail(reason="Html file was removed")
138137
@tm.network
139138
def test_banklist_url_positional_match(self):
140-
url = "https://www.fdic.gov/bank/individual/failed/banklist.html"
139+
url = "http://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list/index.html" # noqa E501
141140
# Passing match argument as positional should cause a FutureWarning.
142141
with tm.assert_produces_warning(FutureWarning):
143142
df1 = self.read_html(
144-
url, "First Federal Bank of Florida", attrs={"id": "table"}
143+
# lxml cannot find attrs leave out for now
144+
url,
145+
"First Federal Bank of Florida", # attrs={"class": "dataTable"}
145146
)
146147
with tm.assert_produces_warning(FutureWarning):
147-
df2 = self.read_html(url, "Metcalf Bank", attrs={"id": "table"})
148+
# lxml cannot find attrs leave out for now
149+
df2 = self.read_html(
150+
url,
151+
"Metcalf Bank",
152+
) # attrs={"class": "dataTable"})
148153

149154
assert_framelist_equal(df1, df2)
150155

151-
@pytest.mark.xfail(reason="Html file was removed")
152156
@tm.network
153157
def test_banklist_url(self):
154-
url = "https://www.fdic.gov/bank/individual/failed/banklist.html"
158+
url = "http://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list/index.html" # noqa E501
155159
df1 = self.read_html(
156-
url, match="First Federal Bank of Florida", attrs={"id": "table"}
160+
# lxml cannot find attrs leave out for now
161+
url,
162+
match="First Federal Bank of Florida", # attrs={"class": "dataTable"}
157163
)
158-
df2 = self.read_html(url, match="Metcalf Bank", attrs={"id": "table"})
164+
# lxml cannot find attrs leave out for now
165+
df2 = self.read_html(
166+
url,
167+
match="Metcalf Bank",
168+
) # attrs={"class": "dataTable"})
159169

160170
assert_framelist_equal(df1, df2)
161171

0 commit comments

Comments
 (0)