pandas-dev · TomAugspurger · Sep 5, 2019 · Aug 29, 2019 · Aug 30, 2019 · Aug 30, 2019
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -203,10 +203,14 @@ if [[ -z "$CHECK" || "$CHECK" == "code" ]]; then
 import sys
 import pandas
 
-blacklist = {'bs4', 'gcsfs', 'html5lib', 'ipython', 'jinja2', 'hypothesis',
+blacklist = {'bs4', 'gcsfs', 'html5lib', 'http', 'ipython', 'jinja2', 'hypothesis',
              'lxml', 'numexpr', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy',
-             'tables', 'xlrd', 'xlsxwriter', 'xlwt'}
-mods = blacklist & set(m.split('.')[0] for m in sys.modules)
+             'tables', 'urllib.request', 'xlrd', 'xlsxwriter', 'xlwt'}
+
+# GH#28227 for some of these check for top-level modules, while others are
+#  more specific (e.g. urllib.request)
+import_mods = set(m.split('.')[0] for m in sys.modules) | set(sys.modules)
+mods = blacklist & import_mods
 if mods:
     sys.stderr.write('err: pandas should not import: {}\n'.format(', '.join(mods)))
     sys.exit(len(mods))

diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -4,7 +4,6 @@
 import codecs
 import csv
 import gzip
-from http.client import HTTPException  # noqa
 from io import BufferedIOBase, BytesIO
 import mmap
 import os
@@ -22,7 +21,6 @@
     Type,
     Union,
 )
-from urllib.error import URLError  # noqa
 from urllib.parse import (  # noqa
     urlencode,
     urljoin,
@@ -31,7 +29,6 @@
     uses_params,
     uses_relative,
 )
-from urllib.request import pathname2url, urlopen
 import zipfile
 
 from pandas.compat import _get_lzma_file, _import_lzma
@@ -188,6 +185,16 @@ def is_gcs_url(url) -> bool:
         return False
 
 
+def urlopen(*args, **kwargs):
+    """
+    Lazy-import wrapper for stdlib urlopen, as that imports a big chunk of
+    the stdlib.
+    """
+    import urllib.request
+
+    return urllib.request.urlopen(*args, **kwargs)
+
+
 def get_filepath_or_buffer(
     filepath_or_buffer: FilePathOrBuffer,
     encoding: Optional[str] = None,
@@ -261,6 +268,9 @@ def file_path_to_url(path: str) -> str:
     -------
     a valid FILE URL
     """
+    # lazify expensive import (~30ms)
+    from urllib.request import pathname2url
+
     return urljoin("file:", pathname2url(path))
 
 

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
@@ -4,7 +4,6 @@
 from io import BytesIO
 import os
 from textwrap import fill
-from urllib.request import urlopen
 
 from pandas._config import config
 
@@ -21,6 +20,7 @@
     _stringify_path,
     _validate_header_arg,
     get_filepath_or_buffer,
+    urlopen,
 )
 from pandas.io.excel._util import (
     _fill_mi_header,

diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
@@ -3,6 +3,7 @@
 from datetime import datetime, time
 from functools import partial
 import os
+from urllib.error import URLError
 import warnings
 
 import numpy as np
@@ -14,8 +15,6 @@
 from pandas import DataFrame, Index, MultiIndex, Series
 import pandas.util.testing as tm
 
-from pandas.io.common import URLError
-
 
 @contextlib.contextmanager
 def ignore_xlrd_time_clock_warning():

diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py
@@ -11,6 +11,7 @@
 import os
 import platform
 from tempfile import TemporaryFile
+from urllib.error import URLError
 
 import numpy as np
 import pytest
@@ -21,7 +22,6 @@
 from pandas import DataFrame, Index, MultiIndex, Series, compat, concat
 import pandas.util.testing as tm
 
-from pandas.io.common import URLError
 from pandas.io.parsers import CParserWrapper, TextFileReader, TextParser
 
 

diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py
@@ -4,6 +4,7 @@
 import os
 import re
 import threading
+from urllib.error import URLError
 
 import numpy as np
 from numpy.random import rand
@@ -17,7 +18,7 @@
 import pandas.util.testing as tm
 from pandas.util.testing import makeCustomDataframe as mkdf, network
 
-from pandas.io.common import URLError, file_path_to_url
+from pandas.io.common import file_path_to_url
 import pandas.io.html
 from pandas.io.html import read_html
 

diff --git a/pandas/util/testing.py b/pandas/util/testing.py
@@ -4,7 +4,6 @@
 from datetime import datetime
 from functools import wraps
 import gzip
-import http.client
 import os
 import re
 from shutil import rmtree
@@ -2275,11 +2274,17 @@ def dec(f):
 # But some tests (test_data yahoo) contact incredibly flakey
 # servers.
 
-# and conditionally raise on these exception types
-_network_error_classes = (IOError, http.client.HTTPException, TimeoutError)
+# and conditionally raise on exception types in _get_default_network_errors
 
 
-def can_connect(url, error_classes=_network_error_classes):
+def _get_default_network_errors():
+    # Lazy import for http.client because it imports many things from the stdlib
+    import http.client
+
+    return (IOError, http.client.HTTPException, TimeoutError)
+
+
+def can_connect(url, error_classes=None):
     """Try to connect to the given url. True if succeeds, False if IOError
     raised
 
@@ -2294,6 +2299,10 @@ def can_connect(url, error_classes=_network_error_classes):
         Return True if no IOError (unable to connect) or URLError (bad url) was
         raised
     """
+
+    if error_classes is None:
+        error_classes = _get_default_network_errors()
+
     try:
         with urlopen(url):
             pass
@@ -2309,7 +2318,7 @@ def network(
     url="http://www.google.com",
     raise_on_error=_RAISE_NETWORK_ERROR_DEFAULT,
     check_before_test=False,
-    error_classes=_network_error_classes,
+    error_classes=None,
     skip_errnos=_network_errno_vals,
     _skip_on_messages=_network_error_messages,
 ):
@@ -2397,6 +2406,9 @@ def network(
     """
     from pytest import skip
 
+    if error_classes is None:
+        error_classes = _get_default_network_errors()
+
     t.network = True
 
     @wraps(t)