Skip to content

TST: Use pytest-localserver instead of making network connections #53828

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jun 27, 2023
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ci/deps/actions-310.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dependencies:
- pytest-cov
- pytest-xdist>=2.2.0
- pytest-asyncio>=0.17.0
- pytest-localserver>=0.7.1
- boto3

# required dependencies
Expand Down
1 change: 1 addition & 0 deletions ci/deps/actions-311-downstream_compat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ dependencies:
- pytest-cov
- pytest-xdist>=2.2.0
- pytest-asyncio>=0.17.0
- pytest-localserver>=0.7.1
- boto3

# required dependencies
Expand Down
1 change: 1 addition & 0 deletions ci/deps/actions-311.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dependencies:
- pytest-cov
- pytest-xdist>=2.2.0
- pytest-asyncio>=0.17.0
- pytest-localserver>=0.7.1
- boto3

# required dependencies
Expand Down
1 change: 1 addition & 0 deletions ci/deps/actions-39-minimum_versions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ dependencies:
- pytest-cov
- pytest-xdist>=2.2.0
- pytest-asyncio>=0.17.0
- pytest-localserver>=0.7.1
- boto3

# required dependencies
Expand Down
1 change: 1 addition & 0 deletions ci/deps/actions-39.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dependencies:
- pytest-cov
- pytest-xdist>=2.2.0
- pytest-asyncio>=0.17.0
- pytest-localserver>=0.7.1
- boto3

# required dependencies
Expand Down
1 change: 1 addition & 0 deletions ci/deps/circle-310-arm64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dependencies:
- pytest-cov
- pytest-xdist>=2.2.0
- pytest-asyncio>=0.17.0
- pytest-localserver>=0.7.1
- boto3

# required dependencies
Expand Down
19 changes: 6 additions & 13 deletions doc/source/development/contributing_codebase.rst
Original file line number Diff line number Diff line change
Expand Up @@ -613,22 +613,15 @@ Testing involving network connectivity
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

It is highly discouraged to add a test that connects to the internet due to flakiness of network connections and
lack of ownership of the server that is being connected to. If network connectivity is absolutely required, use the
``tm.network`` decorator.
lack of ownership of the server that is being connected to. If network connectivity is absolutely required, mock
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With this, are these still highly discouraged? Seems like all the reasons to discourage them no longer apply.

Copy link
Member Author

@mroeschke mroeschke Jun 26, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point I'll clarify the wording here, "highly discouraged" is probably too strong if they can be mocked properly

the network connection using the ``httpserver`` fixture from the
`pytest-localserver plugin. <https://github.com/pytest-dev/pytest-localserver>`_

.. code-block:: python

@tm.network # noqa
def test_network():
result = package.call_to_internet()

If the test requires data from a specific website, specify ``check_before_test=True`` and the site in the decorator.

.. code-block:: python

@tm.network("https://www.somespecificsite.com", check_before_test=True)
def test_network():
result = pd.read_html("https://www.somespecificsite.com")
def test_network(httpserver):
httpserver.serve_content(content="content")
result = pd.read_html(httpserver.url)

Example
^^^^^^^
Expand Down
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ dependencies:
- pytest-cov
- pytest-xdist>=2.2.0
- pytest-asyncio>=0.17.0
- pytest-localserver>=0.7.1
- coverage

# required dependencies
Expand Down
2 changes: 0 additions & 2 deletions pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@
)
from pandas._testing._io import (
close,
network,
round_trip_localpath,
round_trip_pathlib,
round_trip_pickle,
Expand Down Expand Up @@ -1150,7 +1149,6 @@ def shares_memory(left, right) -> bool:
"makeUIntIndex",
"maybe_produces_warning",
"NARROW_NP_DTYPES",
"network",
"NP_NAT_OBJECTS",
"NULL_OBJECTS",
"OBJECT_DTYPES",
Expand Down
253 changes: 0 additions & 253 deletions pandas/_testing/_io.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
from __future__ import annotations

import bz2
from functools import wraps
import gzip
import io
import socket
import tarfile
from typing import (
TYPE_CHECKING,
Expand All @@ -20,8 +18,6 @@
from pandas._testing._random import rands
from pandas._testing.contexts import ensure_clean

from pandas.io.common import urlopen

if TYPE_CHECKING:
from pandas._typing import (
FilePath,
Expand All @@ -33,255 +29,6 @@
Series,
)

# skip tests on exceptions with these messages
_network_error_messages = (
# 'urlopen error timed out',
# 'timeout: timed out',
# 'socket.timeout: timed out',
"timed out",
"Server Hangup",
"HTTP Error 503: Service Unavailable",
"502: Proxy Error",
"HTTP Error 502: internal error",
"HTTP Error 502",
"HTTP Error 503",
"HTTP Error 403",
"HTTP Error 400",
"Temporary failure in name resolution",
"Name or service not known",
"Connection refused",
"certificate verify",
)

# or this e.errno/e.reason.errno
_network_errno_vals = (
101, # Network is unreachable
111, # Connection refused
110, # Connection timed out
104, # Connection reset Error
54, # Connection reset by peer
60, # urllib.error.URLError: [Errno 60] Connection timed out
)

# Both of the above shouldn't mask real issues such as 404's
# or refused connections (changed DNS).
# But some tests (test_data yahoo) contact incredibly flakey
# servers.

# and conditionally raise on exception types in _get_default_network_errors


def _get_default_network_errors():
# Lazy import for http.client & urllib.error
# because it imports many things from the stdlib
import http.client
import urllib.error

return (
OSError,
http.client.HTTPException,
TimeoutError,
urllib.error.URLError,
socket.timeout,
)


def optional_args(decorator):
"""
allows a decorator to take optional positional and keyword arguments.
Assumes that taking a single, callable, positional argument means that
it is decorating a function, i.e. something like this::

@my_decorator
def function(): pass

Calls decorator with decorator(f, *args, **kwargs)
"""

@wraps(decorator)
def wrapper(*args, **kwargs):
def dec(f):
return decorator(f, *args, **kwargs)

is_decorating = not kwargs and len(args) == 1 and callable(args[0])
if is_decorating:
f = args[0]
args = ()
return dec(f)
else:
return dec

return wrapper


# error: Untyped decorator makes function "network" untyped
@optional_args # type: ignore[misc]
def network(
t,
url: str = "https://www.google.com",
raise_on_error: bool = False,
check_before_test: bool = False,
error_classes=None,
skip_errnos=_network_errno_vals,
_skip_on_messages=_network_error_messages,
):
"""
Label a test as requiring network connection and, if an error is
encountered, only raise if it does not find a network connection.

In comparison to ``network``, this assumes an added contract to your test:
you must assert that, under normal conditions, your test will ONLY fail if
it does not have network connectivity.

You can call this in 3 ways: as a standard decorator, with keyword
arguments, or with a positional argument that is the url to check.

Parameters
----------
t : callable
The test requiring network connectivity.
url : path
The url to test via ``pandas.io.common.urlopen`` to check
for connectivity. Defaults to 'https://www.google.com'.
raise_on_error : bool
If True, never catches errors.
check_before_test : bool
If True, checks connectivity before running the test case.
error_classes : tuple or Exception
error classes to ignore. If not in ``error_classes``, raises the error.
defaults to OSError. Be careful about changing the error classes here.
skip_errnos : iterable of int
Any exception that has .errno or .reason.erno set to one
of these values will be skipped with an appropriate
message.
_skip_on_messages: iterable of string
any exception e for which one of the strings is
a substring of str(e) will be skipped with an appropriate
message. Intended to suppress errors where an errno isn't available.

Notes
-----
* ``raise_on_error`` supersedes ``check_before_test``

Returns
-------
t : callable
The decorated test ``t``, with checks for connectivity errors.

Example
-------

Tests decorated with @network will fail if it's possible to make a network
connection to another URL (defaults to google.com)::

>>> from pandas import _testing as tm
>>> @tm.network
... def test_network():
... with pd.io.common.urlopen("rabbit://bonanza.com"):
... pass
>>> test_network() # doctest: +SKIP
Traceback
...
URLError: <urlopen error unknown url type: rabbit>

You can specify alternative URLs::

>>> @tm.network("https://www.yahoo.com")
... def test_something_with_yahoo():
... raise OSError("Failure Message")
>>> test_something_with_yahoo() # doctest: +SKIP
Traceback (most recent call last):
...
OSError: Failure Message

If you set check_before_test, it will check the url first and not run the
test on failure::

>>> @tm.network("failing://url.blaher", check_before_test=True)
... def test_something():
... print("I ran!")
... raise ValueError("Failure")
>>> test_something() # doctest: +SKIP
Traceback (most recent call last):
...

Errors not related to networking will always be raised.
"""
import pytest

if error_classes is None:
error_classes = _get_default_network_errors()

t.network = True

@wraps(t)
def wrapper(*args, **kwargs):
if (
check_before_test
and not raise_on_error
and not can_connect(url, error_classes)
):
pytest.skip(
f"May not have network connectivity because cannot connect to {url}"
)
try:
return t(*args, **kwargs)
except Exception as err:
errno = getattr(err, "errno", None)
if not errno and hasattr(errno, "reason"):
# error: "Exception" has no attribute "reason"
errno = getattr(err.reason, "errno", None) # type: ignore[attr-defined]

if errno in skip_errnos:
pytest.skip(f"Skipping test due to known errno and error {err}")

e_str = str(err)

if any(m.lower() in e_str.lower() for m in _skip_on_messages):
pytest.skip(
f"Skipping test because exception message is known and error {err}"
)

if not isinstance(err, error_classes) or raise_on_error:
raise
pytest.skip(f"Skipping test due to lack of connectivity and error {err}")

return wrapper


def can_connect(url, error_classes=None) -> bool:
"""
Try to connect to the given url. True if succeeds, False if OSError
raised

Parameters
----------
url : basestring
The URL to try to connect to

Returns
-------
connectable : bool
Return True if no OSError (unable to connect) or URLError (bad url) was
raised
"""
if error_classes is None:
error_classes = _get_default_network_errors()

try:
with urlopen(url, timeout=20) as response:
# Timeout just in case rate-limiting is applied
if (
response.info().get("Content-type") == "text/html"
and response.status != 200
):
return False
except error_classes:
return False
else:
return True


# ------------------------------------------------------------------
# File-IO

Expand Down
Loading