CI: Revive Banklist Tests (pandas-dev#42889)

lithomas1 · attack68 · commit fe30ac7b7ddd · 2021-09-01T18:19:30.000+02:00
(cherry picked from commit 9981172)
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -2502,14 +2502,16 @@ Read a URL with no options:
 
 .. ipython:: python
 
-   url = (
-       "https://raw.githubusercontent.com/pandas-dev/pandas/master/"
-       "pandas/tests/io/data/html/spam.html"
-   )
+   url = "https://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list"
    dfs = pd.read_html(url)
    dfs
 
-Read in the content of the "banklist.html" file and pass it to ``read_html``
+.. note::
+
+   The data from the above URL changes every Monday so the resulting data above
+   and the data below may be slightly different.
+
+Read in the content of the file from the above URL and pass it to ``read_html``
 as a string:
 
 .. ipython:: python
diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py
@@ -134,28 +134,38 @@ def test_to_html_compat(self):
         res = self.read_html(out, attrs={"class": "dataframe"}, index_col=0)[0]
         tm.assert_frame_equal(res, df)
 
-    @pytest.mark.xfail(reason="Html file was removed")
     @tm.network
     def test_banklist_url_positional_match(self):
-        url = "https://www.fdic.gov/bank/individual/failed/banklist.html"
+        url = "http://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list/index.html"  # noqa E501
         # Passing match argument as positional should cause a FutureWarning.
         with tm.assert_produces_warning(FutureWarning):
             df1 = self.read_html(
-                url, "First Federal Bank of Florida", attrs={"id": "table"}
+                # lxml cannot find attrs leave out for now
+                url,
+                "First Federal Bank of Florida",  # attrs={"class": "dataTable"}
             )
         with tm.assert_produces_warning(FutureWarning):
-            df2 = self.read_html(url, "Metcalf Bank", attrs={"id": "table"})
+            # lxml cannot find attrs leave out for now
+            df2 = self.read_html(
+                url,
+                "Metcalf Bank",
+            )  # attrs={"class": "dataTable"})
 
         assert_framelist_equal(df1, df2)
 
-    @pytest.mark.xfail(reason="Html file was removed")
     @tm.network
     def test_banklist_url(self):
-        url = "https://www.fdic.gov/bank/individual/failed/banklist.html"
+        url = "http://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list/index.html"  # noqa E501
         df1 = self.read_html(
-            url, match="First Federal Bank of Florida", attrs={"id": "table"}
+            # lxml cannot find attrs leave out for now
+            url,
+            match="First Federal Bank of Florida",  # attrs={"class": "dataTable"}
         )
-        df2 = self.read_html(url, match="Metcalf Bank", attrs={"id": "table"})
+        # lxml cannot find attrs leave out for now
+        df2 = self.read_html(
+            url,
+            match="Metcalf Bank",
+        )  # attrs={"class": "dataTable"})
 
         assert_framelist_equal(df1, df2)