Skip to content

Commit 732c854

Browse files
jbrockmendelproost
authored andcommitted
PERF: trim import time ~5% (pandas-dev#28227)
* PERF: trim import time ~5% with lazy imports
1 parent b013414 commit 732c854

File tree

7 files changed

+42
-16
lines changed

7 files changed

+42
-16
lines changed

ci/code_checks.sh

+7-3
Original file line numberDiff line numberDiff line change
@@ -203,10 +203,14 @@ if [[ -z "$CHECK" || "$CHECK" == "code" ]]; then
203203
import sys
204204
import pandas
205205
206-
blacklist = {'bs4', 'gcsfs', 'html5lib', 'ipython', 'jinja2', 'hypothesis',
206+
blacklist = {'bs4', 'gcsfs', 'html5lib', 'http', 'ipython', 'jinja2', 'hypothesis',
207207
'lxml', 'numexpr', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy',
208-
'tables', 'xlrd', 'xlsxwriter', 'xlwt'}
209-
mods = blacklist & set(m.split('.')[0] for m in sys.modules)
208+
'tables', 'urllib.request', 'xlrd', 'xlsxwriter', 'xlwt'}
209+
210+
# GH#28227 for some of these check for top-level modules, while others are
211+
# more specific (e.g. urllib.request)
212+
import_mods = set(m.split('.')[0] for m in sys.modules) | set(sys.modules)
213+
mods = blacklist & import_mods
210214
if mods:
211215
sys.stderr.write('err: pandas should not import: {}\n'.format(', '.join(mods)))
212216
sys.exit(len(mods))

pandas/io/common.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import codecs
55
import csv
66
import gzip
7-
from http.client import HTTPException # noqa
87
from io import BufferedIOBase, BytesIO
98
import mmap
109
import os
@@ -22,7 +21,6 @@
2221
Type,
2322
Union,
2423
)
25-
from urllib.error import URLError # noqa
2624
from urllib.parse import ( # noqa
2725
urlencode,
2826
urljoin,
@@ -31,7 +29,6 @@
3129
uses_params,
3230
uses_relative,
3331
)
34-
from urllib.request import pathname2url, urlopen
3532
import zipfile
3633

3734
from pandas.compat import _get_lzma_file, _import_lzma
@@ -188,6 +185,16 @@ def is_gcs_url(url) -> bool:
188185
return False
189186

190187

188+
def urlopen(*args, **kwargs):
189+
"""
190+
Lazy-import wrapper for stdlib urlopen, as that imports a big chunk of
191+
the stdlib.
192+
"""
193+
import urllib.request
194+
195+
return urllib.request.urlopen(*args, **kwargs)
196+
197+
191198
def get_filepath_or_buffer(
192199
filepath_or_buffer: FilePathOrBuffer,
193200
encoding: Optional[str] = None,
@@ -261,6 +268,9 @@ def file_path_to_url(path: str) -> str:
261268
-------
262269
a valid FILE URL
263270
"""
271+
# lazify expensive import (~30ms)
272+
from urllib.request import pathname2url
273+
264274
return urljoin("file:", pathname2url(path))
265275

266276

pandas/io/excel/_base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from io import BytesIO
55
import os
66
from textwrap import fill
7-
from urllib.request import urlopen
87

98
from pandas._config import config
109

@@ -21,6 +20,7 @@
2120
_stringify_path,
2221
_validate_header_arg,
2322
get_filepath_or_buffer,
23+
urlopen,
2424
)
2525
from pandas.io.excel._util import (
2626
_fill_mi_header,

pandas/tests/io/excel/test_readers.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from datetime import datetime, time
44
from functools import partial
55
import os
6+
from urllib.error import URLError
67
import warnings
78

89
import numpy as np
@@ -14,8 +15,6 @@
1415
from pandas import DataFrame, Index, MultiIndex, Series
1516
import pandas.util.testing as tm
1617

17-
from pandas.io.common import URLError
18-
1918

2019
@contextlib.contextmanager
2120
def ignore_xlrd_time_clock_warning():

pandas/tests/io/parser/test_common.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import os
1212
import platform
1313
from tempfile import TemporaryFile
14+
from urllib.error import URLError
1415

1516
import numpy as np
1617
import pytest
@@ -21,7 +22,6 @@
2122
from pandas import DataFrame, Index, MultiIndex, Series, compat, concat
2223
import pandas.util.testing as tm
2324

24-
from pandas.io.common import URLError
2525
from pandas.io.parsers import CParserWrapper, TextFileReader, TextParser
2626

2727

pandas/tests/io/test_html.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import os
55
import re
66
import threading
7+
from urllib.error import URLError
78

89
import numpy as np
910
from numpy.random import rand
@@ -17,7 +18,7 @@
1718
import pandas.util.testing as tm
1819
from pandas.util.testing import makeCustomDataframe as mkdf, network
1920

20-
from pandas.io.common import URLError, file_path_to_url
21+
from pandas.io.common import file_path_to_url
2122
import pandas.io.html
2223
from pandas.io.html import read_html
2324

pandas/util/testing.py

+17-5
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from datetime import datetime
55
from functools import wraps
66
import gzip
7-
import http.client
87
import os
98
import re
109
from shutil import rmtree
@@ -2275,11 +2274,17 @@ def dec(f):
22752274
# But some tests (test_data yahoo) contact incredibly flakey
22762275
# servers.
22772276

2278-
# and conditionally raise on these exception types
2279-
_network_error_classes = (IOError, http.client.HTTPException, TimeoutError)
2277+
# and conditionally raise on exception types in _get_default_network_errors
22802278

22812279

2282-
def can_connect(url, error_classes=_network_error_classes):
2280+
def _get_default_network_errors():
2281+
# Lazy import for http.client because it imports many things from the stdlib
2282+
import http.client
2283+
2284+
return (IOError, http.client.HTTPException, TimeoutError)
2285+
2286+
2287+
def can_connect(url, error_classes=None):
22832288
"""Try to connect to the given url. True if succeeds, False if IOError
22842289
raised
22852290
@@ -2294,6 +2299,10 @@ def can_connect(url, error_classes=_network_error_classes):
22942299
Return True if no IOError (unable to connect) or URLError (bad url) was
22952300
raised
22962301
"""
2302+
2303+
if error_classes is None:
2304+
error_classes = _get_default_network_errors()
2305+
22972306
try:
22982307
with urlopen(url):
22992308
pass
@@ -2309,7 +2318,7 @@ def network(
23092318
url="http://www.google.com",
23102319
raise_on_error=_RAISE_NETWORK_ERROR_DEFAULT,
23112320
check_before_test=False,
2312-
error_classes=_network_error_classes,
2321+
error_classes=None,
23132322
skip_errnos=_network_errno_vals,
23142323
_skip_on_messages=_network_error_messages,
23152324
):
@@ -2397,6 +2406,9 @@ def network(
23972406
"""
23982407
from pytest import skip
23992408

2409+
if error_classes is None:
2410+
error_classes = _get_default_network_errors()
2411+
24002412
t.network = True
24012413

24022414
@wraps(t)

0 commit comments

Comments
 (0)