Skip to content

Commit b9dcd44

Browse files
mroeschkeyehoshuadimarsky
authored andcommitted
TST: Check network URL statuses in tests (pandas-dev#45949)
1 parent 9888c55 commit b9dcd44

File tree

9 files changed

+98
-21
lines changed

9 files changed

+98
-21
lines changed

pandas/_testing/_io.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import bz2
44
from functools import wraps
55
import gzip
6+
import socket
67
from typing import (
78
TYPE_CHECKING,
89
Any,
@@ -73,7 +74,13 @@ def _get_default_network_errors():
7374
import http.client
7475
import urllib.error
7576

76-
return (OSError, http.client.HTTPException, TimeoutError, urllib.error.URLError)
77+
return (
78+
OSError,
79+
http.client.HTTPException,
80+
TimeoutError,
81+
urllib.error.URLError,
82+
socket.timeout,
83+
)
7784

7885

7986
def optional_args(decorator):
@@ -264,8 +271,10 @@ def can_connect(url, error_classes=None):
264271
error_classes = _get_default_network_errors()
265272

266273
try:
267-
with urlopen(url):
268-
pass
274+
with urlopen(url, timeout=20) as response:
275+
# Timeout just in case rate-limiting is applied
276+
if response.status != 200:
277+
return False
269278
except error_classes:
270279
return False
271280
else:

pandas/tests/io/excel/test_readers.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -764,7 +764,13 @@ def test_corrupt_bytes_raises(self, engine):
764764
pd.read_excel(bad_stream)
765765

766766
@pytest.mark.network
767-
@tm.network
767+
@tm.network(
768+
url=(
769+
"https://raw.githubusercontent.com/pandas-dev/pandas/main/"
770+
"pandas/tests/io/data/excel/test1.xlsx"
771+
),
772+
check_before_test=True,
773+
)
768774
def test_read_from_http_url(self, read_ext):
769775
url = (
770776
"https://raw.githubusercontent.com/pandas-dev/pandas/main/"

pandas/tests/io/json/test_pandas.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -994,7 +994,10 @@ def test_round_trip_exception_(self, datapath):
994994
tm.assert_frame_equal(result.reindex(index=df.index, columns=df.columns), df)
995995

996996
@pytest.mark.network
997-
@tm.network
997+
@tm.network(
998+
url="https://api.github.com/repos/pandas-dev/pandas/issues?per_page=5",
999+
check_before_test=True,
1000+
)
9981001
@pytest.mark.parametrize(
9991002
"field,dtype",
10001003
[

pandas/tests/io/parser/common/test_file_buffer_url.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,13 @@
2727

2828

2929
@pytest.mark.network
30-
@tm.network
30+
@tm.network(
31+
url=(
32+
"https://raw.github.com/pandas-dev/pandas/main/"
33+
"pandas/tests/io/parser/data/salaries.csv"
34+
),
35+
check_before_test=True,
36+
)
3137
def test_url(all_parsers, csv_dir_path):
3238
parser = all_parsers
3339
kwargs = {"sep": "\t"}

pandas/tests/io/parser/test_network.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,13 @@
2323

2424

2525
@pytest.mark.network
26-
@tm.network
26+
@tm.network(
27+
url=(
28+
"https://github.com/pandas-dev/pandas/raw/main/"
29+
"pandas/tests/io/parser/data/salaries.csv"
30+
),
31+
check_before_test=True,
32+
)
2733
@pytest.mark.parametrize("mode", ["explicit", "infer"])
2834
@pytest.mark.parametrize("engine", ["python", "c"])
2935
def test_compressed_urls(salaries_table, mode, engine, compression_only):
@@ -45,7 +51,13 @@ def test_compressed_urls(salaries_table, mode, engine, compression_only):
4551

4652

4753
@pytest.mark.network
48-
@tm.network
54+
@tm.network(
55+
url=(
56+
"https://raw.githubusercontent.com/pandas-dev/pandas/main/"
57+
"pandas/tests/io/parser/data/unicode_series.csv"
58+
),
59+
check_before_test=True,
60+
)
4961
def test_url_encoding_csv():
5062
"""
5163
read_csv should honor the requested encoding for URLs.

pandas/tests/io/test_feather.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,13 @@ def test_passthrough_keywords(self):
182182
self.check_round_trip(df, write_kwargs={"version": 1})
183183

184184
@pytest.mark.network
185-
@tm.network
185+
@tm.network(
186+
url=(
187+
"https://raw.githubusercontent.com/pandas-dev/pandas/main/"
188+
"pandas/tests/io/data/feather/feather-0_3_1.feather"
189+
),
190+
check_before_test=True,
191+
)
186192
def test_http_path(self, feather_file):
187193
# GH 29055
188194
url = (

pandas/tests/io/test_html.py

+25-7
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,15 @@ def test_to_html_compat(self):
133133
tm.assert_frame_equal(res, df)
134134

135135
@pytest.mark.network
136-
@tm.network
136+
@tm.network(
137+
url=(
138+
"https://www.fdic.gov/resources/resolutions/"
139+
"bank-failures/failed-bank-list/index.html"
140+
),
141+
check_before_test=True,
142+
)
137143
def test_banklist_url_positional_match(self):
138-
url = "http://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list/index.html" # noqa E501
144+
url = "https://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list/index.html" # noqa E501
139145
# Passing match argument as positional should cause a FutureWarning.
140146
with tm.assert_produces_warning(FutureWarning):
141147
df1 = self.read_html(
@@ -153,9 +159,15 @@ def test_banklist_url_positional_match(self):
153159
assert_framelist_equal(df1, df2)
154160

155161
@pytest.mark.network
156-
@tm.network
162+
@tm.network(
163+
url=(
164+
"https://www.fdic.gov/resources/resolutions/"
165+
"bank-failures/failed-bank-list/index.html"
166+
),
167+
check_before_test=True,
168+
)
157169
def test_banklist_url(self):
158-
url = "http://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list/index.html" # noqa E501
170+
url = "https://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list/index.html" # noqa E501
159171
df1 = self.read_html(
160172
# lxml cannot find attrs leave out for now
161173
url,
@@ -170,7 +182,13 @@ def test_banklist_url(self):
170182
assert_framelist_equal(df1, df2)
171183

172184
@pytest.mark.network
173-
@tm.network
185+
@tm.network(
186+
url=(
187+
"https://raw.githubusercontent.com/pandas-dev/pandas/main/"
188+
"pandas/tests/io/data/html/spam.html"
189+
),
190+
check_before_test=True,
191+
)
174192
def test_spam_url(self):
175193
url = (
176194
"https://raw.githubusercontent.com/pandas-dev/pandas/main/"
@@ -406,14 +424,14 @@ def test_negative_skiprows(self, spam_data):
406424
self.read_html(spam_data, match="Water", skiprows=-1)
407425

408426
@pytest.mark.network
409-
@tm.network
427+
@tm.network(url="https://docs.python.org/2/", check_before_test=True)
410428
def test_multiple_matches(self):
411429
url = "https://docs.python.org/2/"
412430
dfs = self.read_html(url, match="Python")
413431
assert len(dfs) > 1
414432

415433
@pytest.mark.network
416-
@tm.network
434+
@tm.network(url="https://docs.python.org/2/", check_before_test=True)
417435
def test_python_docs_table(self):
418436
url = "https://docs.python.org/2/"
419437
dfs = self.read_html(url, match="Python")

pandas/tests/io/test_parquet.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,13 @@ def check_external_error_on_write(self, df, engine, exc):
381381
to_parquet(df, path, engine, compression=None)
382382

383383
@pytest.mark.network
384-
@tm.network
384+
@tm.network(
385+
url=(
386+
"https://raw.githubusercontent.com/pandas-dev/pandas/"
387+
"main/pandas/tests/io/data/parquet/simple.parquet"
388+
),
389+
check_before_test=True,
390+
)
385391
def test_parquet_read_from_url(self, df_compat, engine):
386392
if engine != "auto":
387393
pytest.importorskip(engine)

pandas/tests/io/xml/test_xml.py

+15-4
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,13 @@ def test_parser_consistency_file(datapath):
258258
@pytest.mark.network
259259
@pytest.mark.slow
260260
@td.skip_if_no("lxml")
261-
@tm.network
261+
@tm.network(
262+
url=(
263+
"https://data.cityofchicago.org/api/views/"
264+
"8pix-ypme/rows.xml?accessType=DOWNLOAD"
265+
),
266+
check_before_test=True,
267+
)
262268
def test_parser_consistency_url():
263269
url = (
264270
"https://data.cityofchicago.org/api/views/"
@@ -404,7 +410,10 @@ def test_wrong_file_path_etree():
404410

405411

406412
@pytest.mark.network
407-
@tm.network
413+
@tm.network(
414+
url="https://www.w3schools.com/xml/books.xml",
415+
check_before_test=True,
416+
)
408417
@td.skip_if_no("lxml")
409418
def test_url():
410419
url = "https://www.w3schools.com/xml/books.xml"
@@ -425,7 +434,7 @@ def test_url():
425434

426435

427436
@pytest.mark.network
428-
@tm.network
437+
@tm.network(url="https://www.w3schools.com/xml/python.xml", check_before_test=True)
429438
def test_wrong_url(parser):
430439
with pytest.raises(HTTPError, match=("HTTP Error 404: Not Found")):
431440
url = "https://www.w3schools.com/xml/python.xml"
@@ -1022,7 +1031,9 @@ def test_empty_stylesheet(val):
10221031

10231032
@pytest.mark.network
10241033
@td.skip_if_no("lxml")
1025-
@tm.network
1034+
@tm.network(
1035+
url="https://www.w3schools.com/xml/cdcatalog_with_xsl.xml", check_before_test=True
1036+
)
10261037
def test_online_stylesheet():
10271038
xml = "https://www.w3schools.com/xml/cdcatalog_with_xsl.xml"
10281039
xsl = "https://www.w3schools.com/xml/cdcatalog.xsl"

0 commit comments

Comments
 (0)