Skip to content

Delay import #17710

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Oct 2, 2017
2 changes: 2 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ script:
- ci/script_single.sh
- ci/script_multi.sh
- ci/lint.sh
- echo "checking imports"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe make a checking_imports.sh just to make style similar to existing

- source activate pandas && python ci/check_imports.py
- echo "script done"

after_success:
Expand Down
36 changes: 36 additions & 0 deletions ci/check_imports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""
Check that certain modules are not loaded by `import pandas`
"""
import sys

blacklist = {
'bs4',
'html5lib',
'ipython',
'jinja2'
'lxml',
'matplotlib',
'numexpr',
'openpyxl',
'py',
'pytest',
's3fs',
'scipy',
'tables',
'xlrd',
'xlsxwriter',
'xlwt',
}


def main():
import pandas # noqa

modules = set(x.split('.')[0] for x in sys.modules)
imported = modules & blacklist
if modules & blacklist:
sys.exit("Imported {}".format(imported))


if __name__ == '__main__':
main()
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ Other Enhancements
- :func:`read_excel` raises ``ImportError`` with a better message if ``xlrd`` is not installed. (:issue:`17613`)
- :func:`read_json` now accepts a ``chunksize`` parameter that can be used when ``lines=True``. If ``chunksize`` is passed, read_json now returns an iterator which reads in ``chunksize`` lines with each iteration. (:issue:`17048`)
- :meth:`DataFrame.assign` will preserve the original order of ``**kwargs`` for Python 3.6+ users instead of sorting the column names
- Improved the import time of pandas by about 2.25x (:issue:`16764`)


.. _whatsnew_0210.api_breaking:
Expand Down Expand Up @@ -559,6 +560,8 @@ Other API Changes
- :func:`to_datetime` when passed a tz-aware ``origin=`` kwarg will now raise a more informative ``ValueError`` rather than a ``TypeError`` (:issue:`16842`)
- Renamed non-functional ``index`` to ``index_col`` in :func:`read_stata` to improve API consistency (:issue:`16342`)
- Bug in :func:`DataFrame.drop` caused boolean labels ``False`` and ``True`` to be treated as labels 0 and 1 respectively when dropping indices from a numeric index. This will now raise a ValueError (:issue:`16877`)
- Pandas no longer registers matplotlib converters on import. The converters
will be registered and used when the first plot is draw (:issue:`17710`)

.. _whatsnew_0210.deprecations:

Expand Down
23 changes: 0 additions & 23 deletions pandas/core/computation/__init__.py
Original file line number Diff line number Diff line change
@@ -1,23 +0,0 @@

import warnings
from distutils.version import LooseVersion

_NUMEXPR_INSTALLED = False
_MIN_NUMEXPR_VERSION = "2.4.6"

try:
import numexpr as ne
ver = ne.__version__
_NUMEXPR_INSTALLED = ver >= LooseVersion(_MIN_NUMEXPR_VERSION)

if not _NUMEXPR_INSTALLED:
warnings.warn(
"The installed version of numexpr {ver} is not supported "
"in pandas and will be not be used\nThe minimum supported "
"version is {min_ver}\n".format(
ver=ver, min_ver=_MIN_NUMEXPR_VERSION), UserWarning)

except ImportError: # pragma: no cover
pass

__all__ = ['_NUMEXPR_INSTALLED']
22 changes: 22 additions & 0 deletions pandas/core/computation/check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import warnings
from distutils.version import LooseVersion

_NUMEXPR_INSTALLED = False
_MIN_NUMEXPR_VERSION = "2.4.6"

try:
import numexpr as ne
ver = ne.__version__
_NUMEXPR_INSTALLED = ver >= LooseVersion(_MIN_NUMEXPR_VERSION)

if not _NUMEXPR_INSTALLED:
warnings.warn(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we do the same type of thing with bottleneck

"The installed version of numexpr {ver} is not supported "
"in pandas and will be not be used\nThe minimum supported "
"version is {min_ver}\n".format(
ver=ver, min_ver=_MIN_NUMEXPR_VERSION), UserWarning)

except ImportError: # pragma: no cover
pass

__all__ = ['_NUMEXPR_INSTALLED']
8 changes: 6 additions & 2 deletions pandas/core/computation/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@

import tokenize
from pandas.io.formats.printing import pprint_thing
from pandas.core.computation import _NUMEXPR_INSTALLED
from pandas.core.computation.expr import Expr, _parsers, tokenize_string
from pandas.core.computation.scope import _ensure_scope
from pandas.compat import string_types
from pandas.core.computation.engines import _engines
Expand All @@ -32,6 +30,7 @@ def _check_engine(engine):
string engine

"""
from pandas.core.computation.check import _NUMEXPR_INSTALLED

if engine is None:
if _NUMEXPR_INSTALLED:
Expand Down Expand Up @@ -69,6 +68,8 @@ def _check_parser(parser):
KeyError
* If an invalid parser is passed
"""
from pandas.core.computation.expr import _parsers

if parser not in _parsers:
raise KeyError('Invalid parser {parser!r} passed, valid parsers are'
' {valid}'.format(parser=parser, valid=_parsers.keys()))
Expand Down Expand Up @@ -129,6 +130,8 @@ def _convert_expression(expr):


def _check_for_locals(expr, stack_level, parser):
from pandas.core.computation.expr import tokenize_string

at_top_of_stack = stack_level == 0
not_pandas_parser = parser != 'pandas'

Expand Down Expand Up @@ -252,6 +255,7 @@ def eval(expr, parser='pandas', engine=None, truediv=True,
pandas.DataFrame.query
pandas.DataFrame.eval
"""
from pandas.core.computation.expr import Expr

inplace = validate_bool_kwarg(inplace, "inplace")

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/computation/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import warnings
import numpy as np
from pandas.core.common import _values_from_object
from pandas.core.computation import _NUMEXPR_INSTALLED
from pandas.core.computation.check import _NUMEXPR_INSTALLED
from pandas.core.config import get_option

if _NUMEXPR_INSTALLED:
Expand Down
58 changes: 30 additions & 28 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,34 +437,36 @@ def use_inf_as_na_cb(key):
writer_engine_doc = """
: string
The default Excel writer engine for '{ext}' files. Available options:
'{default}' (the default){others}.
"""

with cf.config_prefix('io.excel'):
# going forward, will be additional writers
for ext, options in [('xls', ['xlwt']), ('xlsm', ['openpyxl'])]:
default = options.pop(0)
if options:
options = " " + ", ".join(options)
else:
options = ""
doc = writer_engine_doc.format(ext=ext, default=default,
others=options)
cf.register_option(ext + '.writer', default, doc, validator=str)

def _register_xlsx(engine, other):
others = ", '{other}'".format(other=other)
doc = writer_engine_doc.format(ext='xlsx', default=engine,
others=others)
cf.register_option('xlsx.writer', engine, doc, validator=str)

try:
# better memory footprint
import xlsxwriter # noqa
_register_xlsx('xlsxwriter', 'openpyxl')
except ImportError:
# fallback
_register_xlsx('openpyxl', 'xlsxwriter')
auto, {others}.
"""

_xls_options = ['xlwt']
_xlsm_options = ['openpyxl']
_xlsx_options = ['openpyxl', 'xlsxwriter']


with cf.config_prefix("io.excel.xls"):
cf.register_option("writer", "auto",
writer_engine_doc.format(
ext='xls',
others=', '.join(_xls_options)),
validator=str)

with cf.config_prefix("io.excel.xlsm"):
cf.register_option("writer", "auto",
writer_engine_doc.format(
ext='xlsm',
others=', '.join(_xlsm_options)),
validator=str)


with cf.config_prefix("io.excel.xlsx"):
cf.register_option("writer", "auto",
writer_engine_doc.format(
ext='xlsx',
others=', '.join(_xlsx_options)),
validator=str)


# Set up the io.parquet specific configuration.
parquet_engine_doc = """
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,7 @@
create_block_manager_from_blocks)
from pandas.core.series import Series
from pandas.core.categorical import Categorical
import pandas.core.computation.expressions as expressions
import pandas.core.algorithms as algorithms
from pandas.core.computation.eval import eval as _eval
from pandas.compat import (range, map, zip, lrange, lmap, lzip, StringIO, u,
OrderedDict, raise_with_traceback)
from pandas import compat
Expand Down Expand Up @@ -2296,6 +2294,8 @@ def eval(self, expr, inplace=False, **kwargs):
>>> df.eval('a + b')
>>> df.eval('c = a + b')
"""
from pandas.core.computation.eval import eval as _eval

inplace = validate_bool_kwarg(inplace, 'inplace')
resolvers = kwargs.pop('resolvers', None)
kwargs['level'] = kwargs.pop('level', 0) + 1
Expand Down Expand Up @@ -3840,6 +3840,7 @@ def _combine_const(self, other, func, raise_on_error=True, try_cast=True):

def _compare_frame_evaluate(self, other, func, str_rep, try_cast=True):

import pandas.core.computation.expressions as expressions
# unique
if self.columns.is_unique:

Expand Down Expand Up @@ -3992,6 +3993,7 @@ def combine_first(self, other):
-------
combined : DataFrame
"""
import pandas.core.computation.expressions as expressions

def combiner(x, y, needs_i8_conversion=False):
x_values = x.values if hasattr(x, 'values') else x
Expand Down Expand Up @@ -4027,6 +4029,7 @@ def update(self, other, join='left', overwrite=True, filter_func=None,
If True, will raise an error if the DataFrame and other both
contain data in the same place.
"""
import pandas.core.computation.expressions as expressions
# TODO: Support other joins
if join != 'left': # pragma: no cover
raise NotImplementedError("Only left join is supported")
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@
from pandas._libs.tslib import Timedelta
from pandas._libs.lib import BlockPlacement

import pandas.core.computation.expressions as expressions
from pandas.util._decorators import cache_readonly
from pandas.util._validators import validate_bool_kwarg
from pandas import compat
Expand Down Expand Up @@ -1395,6 +1394,8 @@ def where(self, other, cond, align=True, raise_on_error=True,
-------
a new block(s), the result of the func
"""
import pandas.core.computation.expressions as expressions

values = self.values
orig_other = other
if transpose:
Expand Down
10 changes: 8 additions & 2 deletions pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

from pandas import compat
from pandas.util._decorators import Appender
import pandas.core.computation.expressions as expressions

from pandas.compat import bind_method
import pandas.core.missing as missing
Expand Down Expand Up @@ -668,8 +667,9 @@ def _arith_method_SERIES(op, name, str_rep, fill_zeros=None, default_axis=None,
Wrapper function for Series arithmetic operations, to avoid
code duplication.
"""

def na_op(x, y):
import pandas.core.computation.expressions as expressions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if these imports become an issue i can do a global check (like u do with matplotlib)


try:
result = expressions.evaluate(op, str_rep, x, y,
raise_on_error=True, **eval_kwargs)
Expand Down Expand Up @@ -1193,6 +1193,8 @@ def to_series(right):
def _arith_method_FRAME(op, name, str_rep=None, default_axis='columns',
fill_zeros=None, **eval_kwargs):
def na_op(x, y):
import pandas.core.computation.expressions as expressions

try:
result = expressions.evaluate(op, str_rep, x, y,
raise_on_error=True, **eval_kwargs)
Expand Down Expand Up @@ -1349,6 +1351,8 @@ def _arith_method_PANEL(op, name, str_rep=None, fill_zeros=None,
# copied from Series na_op above, but without unnecessary branch for
# non-scalar
def na_op(x, y):
import pandas.core.computation.expressions as expressions

try:
result = expressions.evaluate(op, str_rep, x, y,
raise_on_error=True, **eval_kwargs)
Expand Down Expand Up @@ -1378,6 +1382,8 @@ def f(self, other):

def _comp_method_PANEL(op, name, str_rep=None, masker=False):
def na_op(x, y):
import pandas.core.computation.expressions as expressions

try:
result = expressions.evaluate(op, str_rep, x, y,
raise_on_error=True)
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
is_string_like, is_scalar)
from pandas.core.dtypes.missing import notna

import pandas.core.computation.expressions as expressions
import pandas.core.common as com
import pandas.core.ops as ops
import pandas.core.missing as missing
Expand Down Expand Up @@ -1500,6 +1499,8 @@ def _add_aggregate_operations(cls, use_numexpr=True):
def _panel_arith_method(op, name, str_rep=None, default_axis=None,
fill_zeros=None, **eval_kwargs):
def na_op(x, y):
import pandas.core.computation.expressions as expressions

try:
result = expressions.evaluate(op, str_rep, x, y,
raise_on_error=True,
Expand Down
Loading