Skip to content

TST: Check network URL statuses in tests #45949

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Feb 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions pandas/_testing/_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import bz2
from functools import wraps
import gzip
import socket
from typing import (
TYPE_CHECKING,
Any,
Expand Down Expand Up @@ -73,7 +74,13 @@ def _get_default_network_errors():
import http.client
import urllib.error

return (OSError, http.client.HTTPException, TimeoutError, urllib.error.URLError)
return (
OSError,
http.client.HTTPException,
TimeoutError,
urllib.error.URLError,
socket.timeout,
)


def optional_args(decorator):
Expand Down Expand Up @@ -264,8 +271,10 @@ def can_connect(url, error_classes=None):
error_classes = _get_default_network_errors()

try:
with urlopen(url):
pass
with urlopen(url, timeout=20) as response:
# Timeout just in case rate-limiting is applied
if response.status != 200:
return False
except error_classes:
return False
else:
Expand Down
8 changes: 7 additions & 1 deletion pandas/tests/io/excel/test_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -764,7 +764,13 @@ def test_corrupt_bytes_raises(self, engine):
pd.read_excel(bad_stream)

@pytest.mark.network
@tm.network
@tm.network(
url=(
"https://raw.githubusercontent.com/pandas-dev/pandas/main/"
"pandas/tests/io/data/excel/test1.xlsx"
),
check_before_test=True,
)
def test_read_from_http_url(self, read_ext):
url = (
"https://raw.githubusercontent.com/pandas-dev/pandas/main/"
Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -994,7 +994,10 @@ def test_round_trip_exception_(self, datapath):
tm.assert_frame_equal(result.reindex(index=df.index, columns=df.columns), df)

@pytest.mark.network
@tm.network
@tm.network(
url="https://api.github.com/repos/pandas-dev/pandas/issues?per_page=5",
check_before_test=True,
)
@pytest.mark.parametrize(
"field,dtype",
[
Expand Down
8 changes: 7 additions & 1 deletion pandas/tests/io/parser/common/test_file_buffer_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,13 @@


@pytest.mark.network
@tm.network
@tm.network(
url=(
"https://raw.github.com/pandas-dev/pandas/main/"
"pandas/tests/io/parser/data/salaries.csv"
),
check_before_test=True,
)
def test_url(all_parsers, csv_dir_path):
parser = all_parsers
kwargs = {"sep": "\t"}
Expand Down
16 changes: 14 additions & 2 deletions pandas/tests/io/parser/test_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,13 @@


@pytest.mark.network
@tm.network
@tm.network(
url=(
"https://github.com/pandas-dev/pandas/raw/main/"
"pandas/tests/io/parser/data/salaries.csv"
),
check_before_test=True,
)
@pytest.mark.parametrize("mode", ["explicit", "infer"])
@pytest.mark.parametrize("engine", ["python", "c"])
def test_compressed_urls(salaries_table, mode, engine, compression_only):
Expand All @@ -45,7 +51,13 @@ def test_compressed_urls(salaries_table, mode, engine, compression_only):


@pytest.mark.network
@tm.network
@tm.network(
url=(
"https://raw.githubusercontent.com/pandas-dev/pandas/main/"
"pandas/tests/io/parser/data/unicode_series.csv"
),
check_before_test=True,
)
def test_url_encoding_csv():
"""
read_csv should honor the requested encoding for URLs.
Expand Down
8 changes: 7 additions & 1 deletion pandas/tests/io/test_feather.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,13 @@ def test_passthrough_keywords(self):
self.check_round_trip(df, write_kwargs={"version": 1})

@pytest.mark.network
@tm.network
@tm.network(
url=(
"https://raw.githubusercontent.com/pandas-dev/pandas/main/"
"pandas/tests/io/data/feather/feather-0_3_1.feather"
),
check_before_test=True,
)
def test_http_path(self, feather_file):
# GH 29055
url = (
Expand Down
32 changes: 25 additions & 7 deletions pandas/tests/io/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,9 +133,15 @@ def test_to_html_compat(self):
tm.assert_frame_equal(res, df)

@pytest.mark.network
@tm.network
@tm.network(
url=(
"https://www.fdic.gov/resources/resolutions/"
"bank-failures/failed-bank-list/index.html"
),
check_before_test=True,
)
def test_banklist_url_positional_match(self):
url = "http://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list/index.html" # noqa E501
url = "https://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list/index.html" # noqa E501
# Passing match argument as positional should cause a FutureWarning.
with tm.assert_produces_warning(FutureWarning):
df1 = self.read_html(
Expand All @@ -153,9 +159,15 @@ def test_banklist_url_positional_match(self):
assert_framelist_equal(df1, df2)

@pytest.mark.network
@tm.network
@tm.network(
url=(
"https://www.fdic.gov/resources/resolutions/"
"bank-failures/failed-bank-list/index.html"
),
check_before_test=True,
)
def test_banklist_url(self):
url = "http://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list/index.html" # noqa E501
url = "https://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list/index.html" # noqa E501
df1 = self.read_html(
# lxml cannot find attrs leave out for now
url,
Expand All @@ -170,7 +182,13 @@ def test_banklist_url(self):
assert_framelist_equal(df1, df2)

@pytest.mark.network
@tm.network
@tm.network(
url=(
"https://raw.githubusercontent.com/pandas-dev/pandas/main/"
"pandas/tests/io/data/html/spam.html"
),
check_before_test=True,
)
def test_spam_url(self):
url = (
"https://raw.githubusercontent.com/pandas-dev/pandas/main/"
Expand Down Expand Up @@ -406,14 +424,14 @@ def test_negative_skiprows(self, spam_data):
self.read_html(spam_data, match="Water", skiprows=-1)

@pytest.mark.network
@tm.network
@tm.network(url="https://docs.python.org/2/", check_before_test=True)
def test_multiple_matches(self):
url = "https://docs.python.org/2/"
dfs = self.read_html(url, match="Python")
assert len(dfs) > 1

@pytest.mark.network
@tm.network
@tm.network(url="https://docs.python.org/2/", check_before_test=True)
def test_python_docs_table(self):
url = "https://docs.python.org/2/"
dfs = self.read_html(url, match="Python")
Expand Down
8 changes: 7 additions & 1 deletion pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,13 @@ def check_external_error_on_write(self, df, engine, exc):
to_parquet(df, path, engine, compression=None)

@pytest.mark.network
@tm.network
@tm.network(
url=(
"https://raw.githubusercontent.com/pandas-dev/pandas/"
"main/pandas/tests/io/data/parquet/simple.parquet"
),
check_before_test=True,
)
def test_parquet_read_from_url(self, df_compat, engine):
if engine != "auto":
pytest.importorskip(engine)
Expand Down
19 changes: 15 additions & 4 deletions pandas/tests/io/xml/test_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,13 @@ def test_parser_consistency_file(datapath):
@pytest.mark.network
@pytest.mark.slow
@td.skip_if_no("lxml")
@tm.network
@tm.network(
url=(
"https://data.cityofchicago.org/api/views/"
"8pix-ypme/rows.xml?accessType=DOWNLOAD"
),
check_before_test=True,
)
def test_parser_consistency_url():
url = (
"https://data.cityofchicago.org/api/views/"
Expand Down Expand Up @@ -404,7 +410,10 @@ def test_wrong_file_path_etree():


@pytest.mark.network
@tm.network
@tm.network(
url="https://www.w3schools.com/xml/books.xml",
check_before_test=True,
)
@td.skip_if_no("lxml")
def test_url():
url = "https://www.w3schools.com/xml/books.xml"
Expand All @@ -425,7 +434,7 @@ def test_url():


@pytest.mark.network
@tm.network
@tm.network(url="https://www.w3schools.com/xml/python.xml", check_before_test=True)
def test_wrong_url(parser):
with pytest.raises(HTTPError, match=("HTTP Error 404: Not Found")):
url = "https://www.w3schools.com/xml/python.xml"
Expand Down Expand Up @@ -1022,7 +1031,9 @@ def test_empty_stylesheet(val):

@pytest.mark.network
@td.skip_if_no("lxml")
@tm.network
@tm.network(
url="https://www.w3schools.com/xml/cdcatalog_with_xsl.xml", check_before_test=True
)
def test_online_stylesheet():
xml = "https://www.w3schools.com/xml/cdcatalog_with_xsl.xml"
xsl = "https://www.w3schools.com/xml/cdcatalog.xsl"
Expand Down