Skip to content

Commit 6104a03

Browse files
Backport PR #45949: TST: Check network URL statuses in tests (#45993)
Co-authored-by: Matthew Roeschke <[email protected]>
1 parent dc460e3 commit 6104a03

File tree

9 files changed

+98
-21
lines changed

9 files changed

+98
-21
lines changed

pandas/_testing/_io.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import bz2
44
from functools import wraps
55
import gzip
6+
import socket
67
from typing import (
78
TYPE_CHECKING,
89
Any,
@@ -73,7 +74,13 @@ def _get_default_network_errors():
7374
import http.client
7475
import urllib.error
7576

76-
return (OSError, http.client.HTTPException, TimeoutError, urllib.error.URLError)
77+
return (
78+
OSError,
79+
http.client.HTTPException,
80+
TimeoutError,
81+
urllib.error.URLError,
82+
socket.timeout,
83+
)
7784

7885

7986
def optional_args(decorator):
@@ -264,8 +271,10 @@ def can_connect(url, error_classes=None):
264271
error_classes = _get_default_network_errors()
265272

266273
try:
267-
with urlopen(url):
268-
pass
274+
with urlopen(url, timeout=20) as response:
275+
# Timeout just in case rate-limiting is applied
276+
if response.status != 200:
277+
return False
269278
except error_classes:
270279
return False
271280
else:

pandas/tests/io/excel/test_readers.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -764,7 +764,13 @@ def test_corrupt_bytes_raises(self, read_ext, engine):
764764
pd.read_excel(bad_stream)
765765

766766
@pytest.mark.network
767-
@tm.network
767+
@tm.network(
768+
url=(
769+
"https://raw.githubusercontent.com/pandas-dev/pandas/main/"
770+
"pandas/tests/io/data/excel/test1.xlsx"
771+
),
772+
check_before_test=True,
773+
)
768774
def test_read_from_http_url(self, read_ext):
769775
url = (
770776
"https://raw.githubusercontent.com/pandas-dev/pandas/main/"

pandas/tests/io/json/test_pandas.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -996,7 +996,10 @@ def test_round_trip_exception_(self, datapath):
996996
tm.assert_frame_equal(result.reindex(index=df.index, columns=df.columns), df)
997997

998998
@pytest.mark.network
999-
@tm.network
999+
@tm.network(
1000+
url="https://api.github.com/repos/pandas-dev/pandas/issues?per_page=5",
1001+
check_before_test=True,
1002+
)
10001003
@pytest.mark.parametrize(
10011004
"field,dtype",
10021005
[

pandas/tests/io/parser/common/test_file_buffer_url.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,13 @@
2727

2828

2929
@pytest.mark.network
30-
@tm.network
30+
@tm.network(
31+
url=(
32+
"https://raw.github.com/pandas-dev/pandas/main/"
33+
"pandas/tests/io/parser/data/salaries.csv"
34+
),
35+
check_before_test=True,
36+
)
3137
def test_url(all_parsers, csv_dir_path):
3238
parser = all_parsers
3339
kwargs = {"sep": "\t"}

pandas/tests/io/parser/test_network.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,13 @@
2323

2424

2525
@pytest.mark.network
26-
@tm.network
26+
@tm.network(
27+
url=(
28+
"https://github.com/pandas-dev/pandas/raw/main/"
29+
"pandas/tests/io/parser/data/salaries.csv"
30+
),
31+
check_before_test=True,
32+
)
2733
@pytest.mark.parametrize("mode", ["explicit", "infer"])
2834
@pytest.mark.parametrize("engine", ["python", "c"])
2935
def test_compressed_urls(salaries_table, mode, engine, compression_only):
@@ -45,7 +51,13 @@ def test_compressed_urls(salaries_table, mode, engine, compression_only):
4551

4652

4753
@pytest.mark.network
48-
@tm.network
54+
@tm.network(
55+
url=(
56+
"https://raw.githubusercontent.com/pandas-dev/pandas/main/"
57+
"pandas/tests/io/parser/data/unicode_series.csv"
58+
),
59+
check_before_test=True,
60+
)
4961
def test_url_encoding_csv():
5062
"""
5163
read_csv should honor the requested encoding for URLs.

pandas/tests/io/test_feather.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,13 @@ def test_passthrough_keywords(self):
182182
self.check_round_trip(df, write_kwargs={"version": 1})
183183

184184
@pytest.mark.network
185-
@tm.network
185+
@tm.network(
186+
url=(
187+
"https://raw.githubusercontent.com/pandas-dev/pandas/main/"
188+
"pandas/tests/io/data/feather/feather-0_3_1.feather"
189+
),
190+
check_before_test=True,
191+
)
186192
def test_http_path(self, feather_file):
187193
# GH 29055
188194
url = (

pandas/tests/io/test_html.py

+25-7
Original file line numberDiff line numberDiff line change
@@ -134,9 +134,15 @@ def test_to_html_compat(self):
134134
tm.assert_frame_equal(res, df)
135135

136136
@pytest.mark.network
137-
@tm.network
137+
@tm.network(
138+
url=(
139+
"https://www.fdic.gov/resources/resolutions/"
140+
"bank-failures/failed-bank-list/index.html"
141+
),
142+
check_before_test=True,
143+
)
138144
def test_banklist_url_positional_match(self):
139-
url = "http://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list/index.html" # noqa E501
145+
url = "https://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list/index.html" # noqa E501
140146
# Passing match argument as positional should cause a FutureWarning.
141147
with tm.assert_produces_warning(FutureWarning):
142148
df1 = self.read_html(
@@ -154,9 +160,15 @@ def test_banklist_url_positional_match(self):
154160
assert_framelist_equal(df1, df2)
155161

156162
@pytest.mark.network
157-
@tm.network
163+
@tm.network(
164+
url=(
165+
"https://www.fdic.gov/resources/resolutions/"
166+
"bank-failures/failed-bank-list/index.html"
167+
),
168+
check_before_test=True,
169+
)
158170
def test_banklist_url(self):
159-
url = "http://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list/index.html" # noqa E501
171+
url = "https://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list/index.html" # noqa E501
160172
df1 = self.read_html(
161173
# lxml cannot find attrs leave out for now
162174
url,
@@ -171,7 +183,13 @@ def test_banklist_url(self):
171183
assert_framelist_equal(df1, df2)
172184

173185
@pytest.mark.network
174-
@tm.network
186+
@tm.network(
187+
url=(
188+
"https://raw.githubusercontent.com/pandas-dev/pandas/main/"
189+
"pandas/tests/io/data/html/spam.html"
190+
),
191+
check_before_test=True,
192+
)
175193
def test_spam_url(self):
176194
url = (
177195
"https://raw.githubusercontent.com/pandas-dev/pandas/main/"
@@ -409,14 +427,14 @@ def test_negative_skiprows(self):
409427
self.read_html(self.spam_data, match="Water", skiprows=-1)
410428

411429
@pytest.mark.network
412-
@tm.network
430+
@tm.network(url="https://docs.python.org/2/", check_before_test=True)
413431
def test_multiple_matches(self):
414432
url = "https://docs.python.org/2/"
415433
dfs = self.read_html(url, match="Python")
416434
assert len(dfs) > 1
417435

418436
@pytest.mark.network
419-
@tm.network
437+
@tm.network(url="https://docs.python.org/2/", check_before_test=True)
420438
def test_python_docs_table(self):
421439
url = "https://docs.python.org/2/"
422440
dfs = self.read_html(url, match="Python")

pandas/tests/io/test_parquet.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,13 @@ def check_external_error_on_write(self, df, engine, exc):
380380
to_parquet(df, path, engine, compression=None)
381381

382382
@pytest.mark.network
383-
@tm.network
383+
@tm.network(
384+
url=(
385+
"https://raw.githubusercontent.com/pandas-dev/pandas/"
386+
"main/pandas/tests/io/data/parquet/simple.parquet"
387+
),
388+
check_before_test=True,
389+
)
384390
def test_parquet_read_from_url(self, df_compat, engine):
385391
if engine != "auto":
386392
pytest.importorskip(engine)

pandas/tests/io/xml/test_xml.py

+15-4
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,13 @@ def test_parser_consistency_file(datapath):
258258
@pytest.mark.network
259259
@pytest.mark.slow
260260
@td.skip_if_no("lxml")
261-
@tm.network
261+
@tm.network(
262+
url=(
263+
"https://data.cityofchicago.org/api/views/"
264+
"8pix-ypme/rows.xml?accessType=DOWNLOAD"
265+
),
266+
check_before_test=True,
267+
)
262268
def test_parser_consistency_url():
263269
url = (
264270
"https://data.cityofchicago.org/api/views/"
@@ -404,7 +410,10 @@ def test_wrong_file_path_etree():
404410

405411

406412
@pytest.mark.network
407-
@tm.network
413+
@tm.network(
414+
url="https://www.w3schools.com/xml/books.xml",
415+
check_before_test=True,
416+
)
408417
@td.skip_if_no("lxml")
409418
def test_url():
410419
url = "https://www.w3schools.com/xml/books.xml"
@@ -425,7 +434,7 @@ def test_url():
425434

426435

427436
@pytest.mark.network
428-
@tm.network
437+
@tm.network(url="https://www.w3schools.com/xml/python.xml", check_before_test=True)
429438
def test_wrong_url(parser):
430439
with pytest.raises(HTTPError, match=("HTTP Error 404: Not Found")):
431440
url = "https://www.w3schools.com/xml/python.xml"
@@ -1022,7 +1031,9 @@ def test_empty_stylesheet(val):
10221031

10231032
@pytest.mark.network
10241033
@td.skip_if_no("lxml")
1025-
@tm.network
1034+
@tm.network(
1035+
url="https://www.w3schools.com/xml/cdcatalog_with_xsl.xml", check_before_test=True
1036+
)
10261037
def test_online_stylesheet():
10271038
xml = "https://www.w3schools.com/xml/cdcatalog_with_xsl.xml"
10281039
xsl = "https://www.w3schools.com/xml/cdcatalog.xsl"

0 commit comments

Comments
 (0)