Skip to content

Commit 8040a88

Browse files
mroeschkeim-vinicius
authored and
im-vinicius
committed
TST: Use pytest-localserver instead of making network connections (pandas-dev#53828)
* Use pytest-localserver instead of making network connections * Fix test, remove network function * remove network from init * Ignore disutils from datareader, s3so * specify encoding * Specify encoding * Clarify contributing doc
1 parent cc20ddc commit 8040a88

24 files changed

+468
-577
lines changed

ci/deps/actions-310.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ dependencies:
1515
- pytest-cov
1616
- pytest-xdist>=2.2.0
1717
- pytest-asyncio>=0.17.0
18+
- pytest-localserver>=0.7.1
1819
- boto3
1920

2021
# required dependencies

ci/deps/actions-311-downstream_compat.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ dependencies:
1616
- pytest-cov
1717
- pytest-xdist>=2.2.0
1818
- pytest-asyncio>=0.17.0
19+
- pytest-localserver>=0.7.1
1920
- boto3
2021

2122
# required dependencies

ci/deps/actions-311.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ dependencies:
1515
- pytest-cov
1616
- pytest-xdist>=2.2.0
1717
- pytest-asyncio>=0.17.0
18+
- pytest-localserver>=0.7.1
1819
- boto3
1920

2021
# required dependencies

ci/deps/actions-39-minimum_versions.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ dependencies:
1717
- pytest-cov
1818
- pytest-xdist>=2.2.0
1919
- pytest-asyncio>=0.17.0
20+
- pytest-localserver>=0.7.1
2021
- boto3
2122

2223
# required dependencies

ci/deps/actions-39.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ dependencies:
1515
- pytest-cov
1616
- pytest-xdist>=2.2.0
1717
- pytest-asyncio>=0.17.0
18+
- pytest-localserver>=0.7.1
1819
- boto3
1920

2021
# required dependencies

ci/deps/circle-310-arm64.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ dependencies:
1515
- pytest-cov
1616
- pytest-xdist>=2.2.0
1717
- pytest-asyncio>=0.17.0
18+
- pytest-localserver>=0.7.1
1819
- boto3
1920

2021
# required dependencies

doc/source/development/contributing_codebase.rst

+8-14
Original file line numberDiff line numberDiff line change
@@ -612,23 +612,17 @@ deleted when the context block is exited.
612612
Testing involving network connectivity
613613
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
614614

615-
It is highly discouraged to add a test that connects to the internet due to flakiness of network connections and
616-
lack of ownership of the server that is being connected to. If network connectivity is absolutely required, use the
617-
``tm.network`` decorator.
615+
A unit test should not access a public data set over the internet due to flakiness of network connections and
616+
lack of ownership of the server that is being connected to. To mock this interaction, use the ``httpserver`` fixture from the
617+
`pytest-localserver plugin. <https://github.com/pytest-dev/pytest-localserver>`_ with synthetic data.
618618

619619
.. code-block:: python
620620
621-
@tm.network # noqa
622-
def test_network():
623-
result = package.call_to_internet()
624-
625-
If the test requires data from a specific website, specify ``check_before_test=True`` and the site in the decorator.
626-
627-
.. code-block:: python
628-
629-
@tm.network("https://www.somespecificsite.com", check_before_test=True)
630-
def test_network():
631-
result = pd.read_html("https://www.somespecificsite.com")
621+
@pytest.mark.network
622+
@pytest.mark.single_cpu
623+
def test_network(httpserver):
624+
httpserver.serve_content(content="content")
625+
result = pd.read_html(httpserver.url)
632626
633627
Example
634628
^^^^^^^

environment.yml

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ dependencies:
1717
- pytest-cov
1818
- pytest-xdist>=2.2.0
1919
- pytest-asyncio>=0.17.0
20+
- pytest-localserver>=0.7.1
2021
- coverage
2122

2223
# required dependencies

pandas/_testing/__init__.py

-2
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@
5151
)
5252
from pandas._testing._io import (
5353
close,
54-
network,
5554
round_trip_localpath,
5655
round_trip_pathlib,
5756
round_trip_pickle,
@@ -1150,7 +1149,6 @@ def shares_memory(left, right) -> bool:
11501149
"makeUIntIndex",
11511150
"maybe_produces_warning",
11521151
"NARROW_NP_DTYPES",
1153-
"network",
11541152
"NP_NAT_OBJECTS",
11551153
"NULL_OBJECTS",
11561154
"OBJECT_DTYPES",

pandas/_testing/_io.py

-253
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
from __future__ import annotations
22

33
import bz2
4-
from functools import wraps
54
import gzip
65
import io
7-
import socket
86
import tarfile
97
from typing import (
108
TYPE_CHECKING,
@@ -20,8 +18,6 @@
2018
from pandas._testing._random import rands
2119
from pandas._testing.contexts import ensure_clean
2220

23-
from pandas.io.common import urlopen
24-
2521
if TYPE_CHECKING:
2622
from pandas._typing import (
2723
FilePath,
@@ -33,255 +29,6 @@
3329
Series,
3430
)
3531

36-
# skip tests on exceptions with these messages
37-
_network_error_messages = (
38-
# 'urlopen error timed out',
39-
# 'timeout: timed out',
40-
# 'socket.timeout: timed out',
41-
"timed out",
42-
"Server Hangup",
43-
"HTTP Error 503: Service Unavailable",
44-
"502: Proxy Error",
45-
"HTTP Error 502: internal error",
46-
"HTTP Error 502",
47-
"HTTP Error 503",
48-
"HTTP Error 403",
49-
"HTTP Error 400",
50-
"Temporary failure in name resolution",
51-
"Name or service not known",
52-
"Connection refused",
53-
"certificate verify",
54-
)
55-
56-
# or this e.errno/e.reason.errno
57-
_network_errno_vals = (
58-
101, # Network is unreachable
59-
111, # Connection refused
60-
110, # Connection timed out
61-
104, # Connection reset Error
62-
54, # Connection reset by peer
63-
60, # urllib.error.URLError: [Errno 60] Connection timed out
64-
)
65-
66-
# Both of the above shouldn't mask real issues such as 404's
67-
# or refused connections (changed DNS).
68-
# But some tests (test_data yahoo) contact incredibly flakey
69-
# servers.
70-
71-
# and conditionally raise on exception types in _get_default_network_errors
72-
73-
74-
def _get_default_network_errors():
75-
# Lazy import for http.client & urllib.error
76-
# because it imports many things from the stdlib
77-
import http.client
78-
import urllib.error
79-
80-
return (
81-
OSError,
82-
http.client.HTTPException,
83-
TimeoutError,
84-
urllib.error.URLError,
85-
socket.timeout,
86-
)
87-
88-
89-
def optional_args(decorator):
90-
"""
91-
allows a decorator to take optional positional and keyword arguments.
92-
Assumes that taking a single, callable, positional argument means that
93-
it is decorating a function, i.e. something like this::
94-
95-
@my_decorator
96-
def function(): pass
97-
98-
Calls decorator with decorator(f, *args, **kwargs)
99-
"""
100-
101-
@wraps(decorator)
102-
def wrapper(*args, **kwargs):
103-
def dec(f):
104-
return decorator(f, *args, **kwargs)
105-
106-
is_decorating = not kwargs and len(args) == 1 and callable(args[0])
107-
if is_decorating:
108-
f = args[0]
109-
args = ()
110-
return dec(f)
111-
else:
112-
return dec
113-
114-
return wrapper
115-
116-
117-
# error: Untyped decorator makes function "network" untyped
118-
@optional_args # type: ignore[misc]
119-
def network(
120-
t,
121-
url: str = "https://www.google.com",
122-
raise_on_error: bool = False,
123-
check_before_test: bool = False,
124-
error_classes=None,
125-
skip_errnos=_network_errno_vals,
126-
_skip_on_messages=_network_error_messages,
127-
):
128-
"""
129-
Label a test as requiring network connection and, if an error is
130-
encountered, only raise if it does not find a network connection.
131-
132-
In comparison to ``network``, this assumes an added contract to your test:
133-
you must assert that, under normal conditions, your test will ONLY fail if
134-
it does not have network connectivity.
135-
136-
You can call this in 3 ways: as a standard decorator, with keyword
137-
arguments, or with a positional argument that is the url to check.
138-
139-
Parameters
140-
----------
141-
t : callable
142-
The test requiring network connectivity.
143-
url : path
144-
The url to test via ``pandas.io.common.urlopen`` to check
145-
for connectivity. Defaults to 'https://www.google.com'.
146-
raise_on_error : bool
147-
If True, never catches errors.
148-
check_before_test : bool
149-
If True, checks connectivity before running the test case.
150-
error_classes : tuple or Exception
151-
error classes to ignore. If not in ``error_classes``, raises the error.
152-
defaults to OSError. Be careful about changing the error classes here.
153-
skip_errnos : iterable of int
154-
Any exception that has .errno or .reason.erno set to one
155-
of these values will be skipped with an appropriate
156-
message.
157-
_skip_on_messages: iterable of string
158-
any exception e for which one of the strings is
159-
a substring of str(e) will be skipped with an appropriate
160-
message. Intended to suppress errors where an errno isn't available.
161-
162-
Notes
163-
-----
164-
* ``raise_on_error`` supersedes ``check_before_test``
165-
166-
Returns
167-
-------
168-
t : callable
169-
The decorated test ``t``, with checks for connectivity errors.
170-
171-
Example
172-
-------
173-
174-
Tests decorated with @network will fail if it's possible to make a network
175-
connection to another URL (defaults to google.com)::
176-
177-
>>> from pandas import _testing as tm
178-
>>> @tm.network
179-
... def test_network():
180-
... with pd.io.common.urlopen("rabbit://bonanza.com"):
181-
... pass
182-
>>> test_network() # doctest: +SKIP
183-
Traceback
184-
...
185-
URLError: <urlopen error unknown url type: rabbit>
186-
187-
You can specify alternative URLs::
188-
189-
>>> @tm.network("https://www.yahoo.com")
190-
... def test_something_with_yahoo():
191-
... raise OSError("Failure Message")
192-
>>> test_something_with_yahoo() # doctest: +SKIP
193-
Traceback (most recent call last):
194-
...
195-
OSError: Failure Message
196-
197-
If you set check_before_test, it will check the url first and not run the
198-
test on failure::
199-
200-
>>> @tm.network("failing://url.blaher", check_before_test=True)
201-
... def test_something():
202-
... print("I ran!")
203-
... raise ValueError("Failure")
204-
>>> test_something() # doctest: +SKIP
205-
Traceback (most recent call last):
206-
...
207-
208-
Errors not related to networking will always be raised.
209-
"""
210-
import pytest
211-
212-
if error_classes is None:
213-
error_classes = _get_default_network_errors()
214-
215-
t.network = True
216-
217-
@wraps(t)
218-
def wrapper(*args, **kwargs):
219-
if (
220-
check_before_test
221-
and not raise_on_error
222-
and not can_connect(url, error_classes)
223-
):
224-
pytest.skip(
225-
f"May not have network connectivity because cannot connect to {url}"
226-
)
227-
try:
228-
return t(*args, **kwargs)
229-
except Exception as err:
230-
errno = getattr(err, "errno", None)
231-
if not errno and hasattr(errno, "reason"):
232-
# error: "Exception" has no attribute "reason"
233-
errno = getattr(err.reason, "errno", None) # type: ignore[attr-defined]
234-
235-
if errno in skip_errnos:
236-
pytest.skip(f"Skipping test due to known errno and error {err}")
237-
238-
e_str = str(err)
239-
240-
if any(m.lower() in e_str.lower() for m in _skip_on_messages):
241-
pytest.skip(
242-
f"Skipping test because exception message is known and error {err}"
243-
)
244-
245-
if not isinstance(err, error_classes) or raise_on_error:
246-
raise
247-
pytest.skip(f"Skipping test due to lack of connectivity and error {err}")
248-
249-
return wrapper
250-
251-
252-
def can_connect(url, error_classes=None) -> bool:
253-
"""
254-
Try to connect to the given url. True if succeeds, False if OSError
255-
raised
256-
257-
Parameters
258-
----------
259-
url : basestring
260-
The URL to try to connect to
261-
262-
Returns
263-
-------
264-
connectable : bool
265-
Return True if no OSError (unable to connect) or URLError (bad url) was
266-
raised
267-
"""
268-
if error_classes is None:
269-
error_classes = _get_default_network_errors()
270-
271-
try:
272-
with urlopen(url, timeout=20) as response:
273-
# Timeout just in case rate-limiting is applied
274-
if (
275-
response.info().get("Content-type") == "text/html"
276-
and response.status != 200
277-
):
278-
return False
279-
except error_classes:
280-
return False
281-
else:
282-
return True
283-
284-
28532
# ------------------------------------------------------------------
28633
# File-IO
28734

0 commit comments

Comments
 (0)