diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 1f1556123db17..4c7b13bcf989f 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -2502,14 +2502,16 @@ Read a URL with no options: .. ipython:: python - url = ( - "https://raw.githubusercontent.com/pandas-dev/pandas/master/" - "pandas/tests/io/data/html/spam.html" - ) + url = "https://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list" dfs = pd.read_html(url) dfs -Read in the content of the "banklist.html" file and pass it to ``read_html`` +.. note:: + + The data from the above URL changes every Monday so the resulting data above + and the data below may be slightly different. + +Read in the content of the file from the above URL and pass it to ``read_html`` as a string: .. ipython:: python diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index f842e4cd58863..1363a0b04ee0a 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -134,28 +134,38 @@ def test_to_html_compat(self): res = self.read_html(out, attrs={"class": "dataframe"}, index_col=0)[0] tm.assert_frame_equal(res, df) - @pytest.mark.xfail(reason="Html file was removed") @tm.network def test_banklist_url_positional_match(self): - url = "https://www.fdic.gov/bank/individual/failed/banklist.html" + url = "http://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list/index.html" # noqa E501 # Passing match argument as positional should cause a FutureWarning. with tm.assert_produces_warning(FutureWarning): df1 = self.read_html( - url, "First Federal Bank of Florida", attrs={"id": "table"} + # lxml cannot find attrs leave out for now + url, + "First Federal Bank of Florida", # attrs={"class": "dataTable"} ) with tm.assert_produces_warning(FutureWarning): - df2 = self.read_html(url, "Metcalf Bank", attrs={"id": "table"}) + # lxml cannot find attrs leave out for now + df2 = self.read_html( + url, + "Metcalf Bank", + ) # attrs={"class": "dataTable"}) assert_framelist_equal(df1, df2) - @pytest.mark.xfail(reason="Html file was removed") @tm.network def test_banklist_url(self): - url = "https://www.fdic.gov/bank/individual/failed/banklist.html" + url = "http://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list/index.html" # noqa E501 df1 = self.read_html( - url, match="First Federal Bank of Florida", attrs={"id": "table"} + # lxml cannot find attrs leave out for now + url, + match="First Federal Bank of Florida", # attrs={"class": "dataTable"} ) - df2 = self.read_html(url, match="Metcalf Bank", attrs={"id": "table"}) + # lxml cannot find attrs leave out for now + df2 = self.read_html( + url, + match="Metcalf Bank", + ) # attrs={"class": "dataTable"}) assert_framelist_equal(df1, df2)