Skip to content

PERF: trim import time ~5% #28227

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Sep 5, 2019
10 changes: 7 additions & 3 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -203,10 +203,14 @@ if [[ -z "$CHECK" || "$CHECK" == "code" ]]; then
import sys
import pandas

blacklist = {'bs4', 'gcsfs', 'html5lib', 'ipython', 'jinja2', 'hypothesis',
blacklist = {'bs4', 'gcsfs', 'html5lib', 'http', 'ipython', 'jinja2', 'hypothesis',
'lxml', 'numexpr', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy',
'tables', 'xlrd', 'xlsxwriter', 'xlwt'}
mods = blacklist & set(m.split('.')[0] for m in sys.modules)
'tables', 'urllib.request', 'xlrd', 'xlsxwriter', 'xlwt'}

# GH#28227 for some of these check for top-level modules, while others are
# more specific (e.g. urllib.request)
import_mods = set(m.split('.')[0] for m in sys.modules) | set(sys.modules)
mods = blacklist & import_mods
if mods:
sys.stderr.write('err: pandas should not import: {}\n'.format(', '.join(mods)))
sys.exit(len(mods))
Expand Down
16 changes: 13 additions & 3 deletions pandas/io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import codecs
import csv
import gzip
from http.client import HTTPException # noqa
from io import BufferedIOBase, BytesIO
import mmap
import os
Expand All @@ -22,7 +21,6 @@
Type,
Union,
)
from urllib.error import URLError # noqa
from urllib.parse import ( # noqa
urlencode,
urljoin,
Expand All @@ -31,7 +29,6 @@
uses_params,
uses_relative,
)
from urllib.request import pathname2url, urlopen
import zipfile

from pandas.compat import _get_lzma_file, _import_lzma
Expand Down Expand Up @@ -188,6 +185,16 @@ def is_gcs_url(url) -> bool:
return False


def urlopen(*args, **kwargs):
"""
Lazy-import wrapper for stdlib urlopen, as that imports a big chunk of
the stdlib.
"""
import urllib.request

return urllib.request.urlopen(*args, **kwargs)


def get_filepath_or_buffer(
filepath_or_buffer: FilePathOrBuffer,
encoding: Optional[str] = None,
Expand Down Expand Up @@ -261,6 +268,9 @@ def file_path_to_url(path: str) -> str:
-------
a valid FILE URL
"""
# lazify expensive import (~30ms)
from urllib.request import pathname2url

return urljoin("file:", pathname2url(path))


Expand Down
2 changes: 1 addition & 1 deletion pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from io import BytesIO
import os
from textwrap import fill
from urllib.request import urlopen

from pandas._config import config

Expand All @@ -21,6 +20,7 @@
_stringify_path,
_validate_header_arg,
get_filepath_or_buffer,
urlopen,
)
from pandas.io.excel._util import (
_fill_mi_header,
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/io/excel/test_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from datetime import datetime, time
from functools import partial
import os
from urllib.error import URLError
import warnings

import numpy as np
Expand All @@ -14,8 +15,6 @@
from pandas import DataFrame, Index, MultiIndex, Series
import pandas.util.testing as tm

from pandas.io.common import URLError


@contextlib.contextmanager
def ignore_xlrd_time_clock_warning():
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/parser/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import os
import platform
from tempfile import TemporaryFile
from urllib.error import URLError

import numpy as np
import pytest
Expand All @@ -21,7 +22,6 @@
from pandas import DataFrame, Index, MultiIndex, Series, compat, concat
import pandas.util.testing as tm

from pandas.io.common import URLError
from pandas.io.parsers import CParserWrapper, TextFileReader, TextParser


Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/io/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import re
import threading
from urllib.error import URLError

import numpy as np
from numpy.random import rand
Expand All @@ -17,7 +18,7 @@
import pandas.util.testing as tm
from pandas.util.testing import makeCustomDataframe as mkdf, network

from pandas.io.common import URLError, file_path_to_url
from pandas.io.common import file_path_to_url
import pandas.io.html
from pandas.io.html import read_html

Expand Down
22 changes: 17 additions & 5 deletions pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from datetime import datetime
from functools import wraps
import gzip
import http.client
import os
import re
from shutil import rmtree
Expand Down Expand Up @@ -2275,11 +2274,17 @@ def dec(f):
# But some tests (test_data yahoo) contact incredibly flakey
# servers.

# and conditionally raise on these exception types
_network_error_classes = (IOError, http.client.HTTPException, TimeoutError)
# and conditionally raise on exception types in _get_default_network_errors


def can_connect(url, error_classes=_network_error_classes):
def _get_default_network_errors():
# Lazy import for http.client because it imports many things from the stdlib
import http.client

return (IOError, http.client.HTTPException, TimeoutError)


def can_connect(url, error_classes=None):
"""Try to connect to the given url. True if succeeds, False if IOError
raised

Expand All @@ -2294,6 +2299,10 @@ def can_connect(url, error_classes=_network_error_classes):
Return True if no IOError (unable to connect) or URLError (bad url) was
raised
"""

if error_classes is None:
error_classes = _get_default_network_errors()

try:
with urlopen(url):
pass
Expand All @@ -2309,7 +2318,7 @@ def network(
url="http://www.google.com",
raise_on_error=_RAISE_NETWORK_ERROR_DEFAULT,
check_before_test=False,
error_classes=_network_error_classes,
error_classes=None,
skip_errnos=_network_errno_vals,
_skip_on_messages=_network_error_messages,
):
Expand Down Expand Up @@ -2397,6 +2406,9 @@ def network(
"""
from pytest import skip

if error_classes is None:
error_classes = _get_default_network_errors()

t.network = True

@wraps(t)
Expand Down