diff --git a/ci/code_checks.sh b/ci/code_checks.sh index d9369b916fe4d..f839d86318e2e 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -203,10 +203,14 @@ if [[ -z "$CHECK" || "$CHECK" == "code" ]]; then import sys import pandas -blacklist = {'bs4', 'gcsfs', 'html5lib', 'ipython', 'jinja2', 'hypothesis', +blacklist = {'bs4', 'gcsfs', 'html5lib', 'http', 'ipython', 'jinja2', 'hypothesis', 'lxml', 'numexpr', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy', - 'tables', 'xlrd', 'xlsxwriter', 'xlwt'} -mods = blacklist & set(m.split('.')[0] for m in sys.modules) + 'tables', 'urllib.request', 'xlrd', 'xlsxwriter', 'xlwt'} + +# GH#28227 for some of these check for top-level modules, while others are +# more specific (e.g. urllib.request) +import_mods = set(m.split('.')[0] for m in sys.modules) | set(sys.modules) +mods = blacklist & import_mods if mods: sys.stderr.write('err: pandas should not import: {}\n'.format(', '.join(mods))) sys.exit(len(mods)) diff --git a/pandas/io/common.py b/pandas/io/common.py index 30228d660e816..ac8dee8467370 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -4,7 +4,6 @@ import codecs import csv import gzip -from http.client import HTTPException # noqa from io import BufferedIOBase, BytesIO import mmap import os @@ -22,7 +21,6 @@ Type, Union, ) -from urllib.error import URLError # noqa from urllib.parse import ( # noqa urlencode, urljoin, @@ -31,7 +29,6 @@ uses_params, uses_relative, ) -from urllib.request import pathname2url, urlopen import zipfile from pandas.compat import _get_lzma_file, _import_lzma @@ -188,6 +185,16 @@ def is_gcs_url(url) -> bool: return False +def urlopen(*args, **kwargs): + """ + Lazy-import wrapper for stdlib urlopen, as that imports a big chunk of + the stdlib. + """ + import urllib.request + + return urllib.request.urlopen(*args, **kwargs) + + def get_filepath_or_buffer( filepath_or_buffer: FilePathOrBuffer, encoding: Optional[str] = None, @@ -261,6 +268,9 @@ def file_path_to_url(path: str) -> str: ------- a valid FILE URL """ + # lazify expensive import (~30ms) + from urllib.request import pathname2url + return urljoin("file:", pathname2url(path)) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 997edf49d9e8f..949eff45c0e92 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -4,7 +4,6 @@ from io import BytesIO import os from textwrap import fill -from urllib.request import urlopen from pandas._config import config @@ -21,6 +20,7 @@ _stringify_path, _validate_header_arg, get_filepath_or_buffer, + urlopen, ) from pandas.io.excel._util import ( _fill_mi_header, diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index a39cface0e015..5326f2df68972 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -3,6 +3,7 @@ from datetime import datetime, time from functools import partial import os +from urllib.error import URLError import warnings import numpy as np @@ -14,8 +15,6 @@ from pandas import DataFrame, Index, MultiIndex, Series import pandas.util.testing as tm -from pandas.io.common import URLError - @contextlib.contextmanager def ignore_xlrd_time_clock_warning(): diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index 0586593c87cc5..756463e9d8d33 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -11,6 +11,7 @@ import os import platform from tempfile import TemporaryFile +from urllib.error import URLError import numpy as np import pytest @@ -21,7 +22,6 @@ from pandas import DataFrame, Index, MultiIndex, Series, compat, concat import pandas.util.testing as tm -from pandas.io.common import URLError from pandas.io.parsers import CParserWrapper, TextFileReader, TextParser diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 615e2735cd288..183d217eb09d6 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -4,6 +4,7 @@ import os import re import threading +from urllib.error import URLError import numpy as np from numpy.random import rand @@ -17,7 +18,7 @@ import pandas.util.testing as tm from pandas.util.testing import makeCustomDataframe as mkdf, network -from pandas.io.common import URLError, file_path_to_url +from pandas.io.common import file_path_to_url import pandas.io.html from pandas.io.html import read_html diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 0d543f891a5f6..c54dab046f57e 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -4,7 +4,6 @@ from datetime import datetime from functools import wraps import gzip -import http.client import os import re from shutil import rmtree @@ -2275,11 +2274,17 @@ def dec(f): # But some tests (test_data yahoo) contact incredibly flakey # servers. -# and conditionally raise on these exception types -_network_error_classes = (IOError, http.client.HTTPException, TimeoutError) +# and conditionally raise on exception types in _get_default_network_errors -def can_connect(url, error_classes=_network_error_classes): +def _get_default_network_errors(): + # Lazy import for http.client because it imports many things from the stdlib + import http.client + + return (IOError, http.client.HTTPException, TimeoutError) + + +def can_connect(url, error_classes=None): """Try to connect to the given url. True if succeeds, False if IOError raised @@ -2294,6 +2299,10 @@ def can_connect(url, error_classes=_network_error_classes): Return True if no IOError (unable to connect) or URLError (bad url) was raised """ + + if error_classes is None: + error_classes = _get_default_network_errors() + try: with urlopen(url): pass @@ -2309,7 +2318,7 @@ def network( url="http://www.google.com", raise_on_error=_RAISE_NETWORK_ERROR_DEFAULT, check_before_test=False, - error_classes=_network_error_classes, + error_classes=None, skip_errnos=_network_errno_vals, _skip_on_messages=_network_error_messages, ): @@ -2397,6 +2406,9 @@ def network( """ from pytest import skip + if error_classes is None: + error_classes = _get_default_network_errors() + t.network = True @wraps(t)