Skip to content

Commit d6d62ab

Browse files
TomAugspurgeralanbato
authored andcommitted
Delay import (pandas-dev#17710)
closes pandas-dev#16764
1 parent fc13ce3 commit d6d62ab

21 files changed

+183
-181
lines changed

.travis.yml

+2
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ script:
121121
- ci/script_single.sh
122122
- ci/script_multi.sh
123123
- ci/lint.sh
124+
- echo "checking imports"
125+
- source activate pandas && python ci/check_imports.py
124126
- echo "script done"
125127

126128
after_success:

ci/check_imports.py

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
"""
2+
Check that certain modules are not loaded by `import pandas`
3+
"""
4+
import sys
5+
6+
blacklist = {
7+
'bs4',
8+
'html5lib',
9+
'ipython',
10+
'jinja2'
11+
'lxml',
12+
'matplotlib',
13+
'numexpr',
14+
'openpyxl',
15+
'py',
16+
'pytest',
17+
's3fs',
18+
'scipy',
19+
'tables',
20+
'xlrd',
21+
'xlsxwriter',
22+
'xlwt',
23+
}
24+
25+
26+
def main():
27+
import pandas # noqa
28+
29+
modules = set(x.split('.')[0] for x in sys.modules)
30+
imported = modules & blacklist
31+
if modules & blacklist:
32+
sys.exit("Imported {}".format(imported))
33+
34+
35+
if __name__ == '__main__':
36+
main()

doc/source/whatsnew/v0.21.0.txt

+3
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ Other Enhancements
164164
- :func:`read_excel` raises ``ImportError`` with a better message if ``xlrd`` is not installed. (:issue:`17613`)
165165
- :func:`read_json` now accepts a ``chunksize`` parameter that can be used when ``lines=True``. If ``chunksize`` is passed, read_json now returns an iterator which reads in ``chunksize`` lines with each iteration. (:issue:`17048`)
166166
- :meth:`DataFrame.assign` will preserve the original order of ``**kwargs`` for Python 3.6+ users instead of sorting the column names
167+
- Improved the import time of pandas by about 2.25x (:issue:`16764`)
167168

168169

169170
.. _whatsnew_0210.api_breaking:
@@ -559,6 +560,8 @@ Other API Changes
559560
- :func:`to_datetime` when passed a tz-aware ``origin=`` kwarg will now raise a more informative ``ValueError`` rather than a ``TypeError`` (:issue:`16842`)
560561
- Renamed non-functional ``index`` to ``index_col`` in :func:`read_stata` to improve API consistency (:issue:`16342`)
561562
- Bug in :func:`DataFrame.drop` caused boolean labels ``False`` and ``True`` to be treated as labels 0 and 1 respectively when dropping indices from a numeric index. This will now raise a ValueError (:issue:`16877`)
563+
- Pandas no longer registers matplotlib converters on import. The converters
564+
will be registered and used when the first plot is draw (:issue:`17710`)
562565

563566
.. _whatsnew_0210.deprecations:
564567

pandas/core/computation/__init__.py

-23
Original file line numberDiff line numberDiff line change
@@ -1,23 +0,0 @@
1-
2-
import warnings
3-
from distutils.version import LooseVersion
4-
5-
_NUMEXPR_INSTALLED = False
6-
_MIN_NUMEXPR_VERSION = "2.4.6"
7-
8-
try:
9-
import numexpr as ne
10-
ver = ne.__version__
11-
_NUMEXPR_INSTALLED = ver >= LooseVersion(_MIN_NUMEXPR_VERSION)
12-
13-
if not _NUMEXPR_INSTALLED:
14-
warnings.warn(
15-
"The installed version of numexpr {ver} is not supported "
16-
"in pandas and will be not be used\nThe minimum supported "
17-
"version is {min_ver}\n".format(
18-
ver=ver, min_ver=_MIN_NUMEXPR_VERSION), UserWarning)
19-
20-
except ImportError: # pragma: no cover
21-
pass
22-
23-
__all__ = ['_NUMEXPR_INSTALLED']

pandas/core/computation/check.py

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import warnings
2+
from distutils.version import LooseVersion
3+
4+
_NUMEXPR_INSTALLED = False
5+
_MIN_NUMEXPR_VERSION = "2.4.6"
6+
7+
try:
8+
import numexpr as ne
9+
ver = ne.__version__
10+
_NUMEXPR_INSTALLED = ver >= LooseVersion(_MIN_NUMEXPR_VERSION)
11+
12+
if not _NUMEXPR_INSTALLED:
13+
warnings.warn(
14+
"The installed version of numexpr {ver} is not supported "
15+
"in pandas and will be not be used\nThe minimum supported "
16+
"version is {min_ver}\n".format(
17+
ver=ver, min_ver=_MIN_NUMEXPR_VERSION), UserWarning)
18+
19+
except ImportError: # pragma: no cover
20+
pass
21+
22+
__all__ = ['_NUMEXPR_INSTALLED']

pandas/core/computation/eval.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55

66
import tokenize
77
from pandas.io.formats.printing import pprint_thing
8-
from pandas.core.computation import _NUMEXPR_INSTALLED
9-
from pandas.core.computation.expr import Expr, _parsers, tokenize_string
108
from pandas.core.computation.scope import _ensure_scope
119
from pandas.compat import string_types
1210
from pandas.core.computation.engines import _engines
@@ -32,6 +30,7 @@ def _check_engine(engine):
3230
string engine
3331
3432
"""
33+
from pandas.core.computation.check import _NUMEXPR_INSTALLED
3534

3635
if engine is None:
3736
if _NUMEXPR_INSTALLED:
@@ -69,6 +68,8 @@ def _check_parser(parser):
6968
KeyError
7069
* If an invalid parser is passed
7170
"""
71+
from pandas.core.computation.expr import _parsers
72+
7273
if parser not in _parsers:
7374
raise KeyError('Invalid parser {parser!r} passed, valid parsers are'
7475
' {valid}'.format(parser=parser, valid=_parsers.keys()))
@@ -129,6 +130,8 @@ def _convert_expression(expr):
129130

130131

131132
def _check_for_locals(expr, stack_level, parser):
133+
from pandas.core.computation.expr import tokenize_string
134+
132135
at_top_of_stack = stack_level == 0
133136
not_pandas_parser = parser != 'pandas'
134137

@@ -252,6 +255,7 @@ def eval(expr, parser='pandas', engine=None, truediv=True,
252255
pandas.DataFrame.query
253256
pandas.DataFrame.eval
254257
"""
258+
from pandas.core.computation.expr import Expr
255259

256260
inplace = validate_bool_kwarg(inplace, "inplace")
257261

pandas/core/computation/expressions.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import warnings
1010
import numpy as np
1111
from pandas.core.common import _values_from_object
12-
from pandas.core.computation import _NUMEXPR_INSTALLED
12+
from pandas.core.computation.check import _NUMEXPR_INSTALLED
1313
from pandas.core.config import get_option
1414

1515
if _NUMEXPR_INSTALLED:

pandas/core/config_init.py

+30-28
Original file line numberDiff line numberDiff line change
@@ -437,34 +437,36 @@ def use_inf_as_na_cb(key):
437437
writer_engine_doc = """
438438
: string
439439
The default Excel writer engine for '{ext}' files. Available options:
440-
'{default}' (the default){others}.
441-
"""
442-
443-
with cf.config_prefix('io.excel'):
444-
# going forward, will be additional writers
445-
for ext, options in [('xls', ['xlwt']), ('xlsm', ['openpyxl'])]:
446-
default = options.pop(0)
447-
if options:
448-
options = " " + ", ".join(options)
449-
else:
450-
options = ""
451-
doc = writer_engine_doc.format(ext=ext, default=default,
452-
others=options)
453-
cf.register_option(ext + '.writer', default, doc, validator=str)
454-
455-
def _register_xlsx(engine, other):
456-
others = ", '{other}'".format(other=other)
457-
doc = writer_engine_doc.format(ext='xlsx', default=engine,
458-
others=others)
459-
cf.register_option('xlsx.writer', engine, doc, validator=str)
460-
461-
try:
462-
# better memory footprint
463-
import xlsxwriter # noqa
464-
_register_xlsx('xlsxwriter', 'openpyxl')
465-
except ImportError:
466-
# fallback
467-
_register_xlsx('openpyxl', 'xlsxwriter')
440+
auto, {others}.
441+
"""
442+
443+
_xls_options = ['xlwt']
444+
_xlsm_options = ['openpyxl']
445+
_xlsx_options = ['openpyxl', 'xlsxwriter']
446+
447+
448+
with cf.config_prefix("io.excel.xls"):
449+
cf.register_option("writer", "auto",
450+
writer_engine_doc.format(
451+
ext='xls',
452+
others=', '.join(_xls_options)),
453+
validator=str)
454+
455+
with cf.config_prefix("io.excel.xlsm"):
456+
cf.register_option("writer", "auto",
457+
writer_engine_doc.format(
458+
ext='xlsm',
459+
others=', '.join(_xlsm_options)),
460+
validator=str)
461+
462+
463+
with cf.config_prefix("io.excel.xlsx"):
464+
cf.register_option("writer", "auto",
465+
writer_engine_doc.format(
466+
ext='xlsx',
467+
others=', '.join(_xlsx_options)),
468+
validator=str)
469+
468470

469471
# Set up the io.parquet specific configuration.
470472
parquet_engine_doc = """

pandas/core/frame.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,7 @@
7676
create_block_manager_from_blocks)
7777
from pandas.core.series import Series
7878
from pandas.core.categorical import Categorical
79-
import pandas.core.computation.expressions as expressions
8079
import pandas.core.algorithms as algorithms
81-
from pandas.core.computation.eval import eval as _eval
8280
from pandas.compat import (range, map, zip, lrange, lmap, lzip, StringIO, u,
8381
OrderedDict, raise_with_traceback)
8482
from pandas import compat
@@ -2296,6 +2294,8 @@ def eval(self, expr, inplace=False, **kwargs):
22962294
>>> df.eval('a + b')
22972295
>>> df.eval('c = a + b')
22982296
"""
2297+
from pandas.core.computation.eval import eval as _eval
2298+
22992299
inplace = validate_bool_kwarg(inplace, 'inplace')
23002300
resolvers = kwargs.pop('resolvers', None)
23012301
kwargs['level'] = kwargs.pop('level', 0) + 1
@@ -3840,6 +3840,7 @@ def _combine_const(self, other, func, raise_on_error=True, try_cast=True):
38403840

38413841
def _compare_frame_evaluate(self, other, func, str_rep, try_cast=True):
38423842

3843+
import pandas.core.computation.expressions as expressions
38433844
# unique
38443845
if self.columns.is_unique:
38453846

@@ -3992,6 +3993,7 @@ def combine_first(self, other):
39923993
-------
39933994
combined : DataFrame
39943995
"""
3996+
import pandas.core.computation.expressions as expressions
39953997

39963998
def combiner(x, y, needs_i8_conversion=False):
39973999
x_values = x.values if hasattr(x, 'values') else x
@@ -4027,6 +4029,7 @@ def update(self, other, join='left', overwrite=True, filter_func=None,
40274029
If True, will raise an error if the DataFrame and other both
40284030
contain data in the same place.
40294031
"""
4032+
import pandas.core.computation.expressions as expressions
40304033
# TODO: Support other joins
40314034
if join != 'left': # pragma: no cover
40324035
raise NotImplementedError("Only left join is supported")

pandas/core/internals.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@
6666
from pandas._libs.tslib import Timedelta
6767
from pandas._libs.lib import BlockPlacement
6868

69-
import pandas.core.computation.expressions as expressions
7069
from pandas.util._decorators import cache_readonly
7170
from pandas.util._validators import validate_bool_kwarg
7271
from pandas import compat
@@ -1395,6 +1394,8 @@ def where(self, other, cond, align=True, raise_on_error=True,
13951394
-------
13961395
a new block(s), the result of the func
13971396
"""
1397+
import pandas.core.computation.expressions as expressions
1398+
13981399
values = self.values
13991400
orig_other = other
14001401
if transpose:

pandas/core/ops.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616

1717
from pandas import compat
1818
from pandas.util._decorators import Appender
19-
import pandas.core.computation.expressions as expressions
2019

2120
from pandas.compat import bind_method
2221
import pandas.core.missing as missing
@@ -668,8 +667,9 @@ def _arith_method_SERIES(op, name, str_rep, fill_zeros=None, default_axis=None,
668667
Wrapper function for Series arithmetic operations, to avoid
669668
code duplication.
670669
"""
671-
672670
def na_op(x, y):
671+
import pandas.core.computation.expressions as expressions
672+
673673
try:
674674
result = expressions.evaluate(op, str_rep, x, y,
675675
raise_on_error=True, **eval_kwargs)
@@ -1193,6 +1193,8 @@ def to_series(right):
11931193
def _arith_method_FRAME(op, name, str_rep=None, default_axis='columns',
11941194
fill_zeros=None, **eval_kwargs):
11951195
def na_op(x, y):
1196+
import pandas.core.computation.expressions as expressions
1197+
11961198
try:
11971199
result = expressions.evaluate(op, str_rep, x, y,
11981200
raise_on_error=True, **eval_kwargs)
@@ -1349,6 +1351,8 @@ def _arith_method_PANEL(op, name, str_rep=None, fill_zeros=None,
13491351
# copied from Series na_op above, but without unnecessary branch for
13501352
# non-scalar
13511353
def na_op(x, y):
1354+
import pandas.core.computation.expressions as expressions
1355+
13521356
try:
13531357
result = expressions.evaluate(op, str_rep, x, y,
13541358
raise_on_error=True, **eval_kwargs)
@@ -1378,6 +1382,8 @@ def f(self, other):
13781382

13791383
def _comp_method_PANEL(op, name, str_rep=None, masker=False):
13801384
def na_op(x, y):
1385+
import pandas.core.computation.expressions as expressions
1386+
13811387
try:
13821388
result = expressions.evaluate(op, str_rep, x, y,
13831389
raise_on_error=True)

pandas/core/panel.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
is_string_like, is_scalar)
1616
from pandas.core.dtypes.missing import notna
1717

18-
import pandas.core.computation.expressions as expressions
1918
import pandas.core.common as com
2019
import pandas.core.ops as ops
2120
import pandas.core.missing as missing
@@ -1500,6 +1499,8 @@ def _add_aggregate_operations(cls, use_numexpr=True):
15001499
def _panel_arith_method(op, name, str_rep=None, default_axis=None,
15011500
fill_zeros=None, **eval_kwargs):
15021501
def na_op(x, y):
1502+
import pandas.core.computation.expressions as expressions
1503+
15031504
try:
15041505
result = expressions.evaluate(op, str_rep, x, y,
15051506
raise_on_error=True,

0 commit comments

Comments
 (0)