From 86050cd5dad0a1df8514a5aa980359ad86879bd2 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 23 Jun 2017 10:16:32 -0500 Subject: [PATCH 01/14] Delay matplotlib import --- pandas/plotting/__init__.py | 6 ------ pandas/plotting/_core.py | 22 ++++++++++++++++------ 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/pandas/plotting/__init__.py b/pandas/plotting/__init__.py index c3cbedb0fc28c..8f98e297e3e66 100644 --- a/pandas/plotting/__init__.py +++ b/pandas/plotting/__init__.py @@ -4,12 +4,6 @@ # flake8: noqa -try: # mpl optional - from pandas.plotting import _converter - _converter.register() # needs to override so set_xlim works with str/number -except ImportError: - pass - from pandas.plotting._misc import (scatter_matrix, radviz, andrews_curves, bootstrap_plot, parallel_coordinates, lag_plot, diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index aa919d600ec52..8ced50c7df696 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -40,12 +40,7 @@ _get_xlim, _set_ticks_props, format_date_labels) - -if _mpl_ge_1_5_0(): - # Compat with mp 1.5, which uses cycler. - import cycler - colors = mpl_stylesheet.pop('axes.color_cycle') - mpl_stylesheet['axes.prop_cycle'] = cycler.cycler('color', colors) +_registered = False def _get_standard_kind(kind): @@ -95,6 +90,7 @@ def __init__(self, data, kind=None, by=None, subplots=False, sharex=None, secondary_y=False, colormap=None, table=False, layout=None, **kwds): + self._setup() self.data = data self.by = by @@ -178,6 +174,20 @@ def __init__(self, data, kind=None, by=None, subplots=False, sharex=None, self._validate_color_args() + def _setup(self): + global _registered + if not _registered: + from pandas.plotting import _converter + _converter.register() + + if _mpl_ge_1_5_0(): + # Compat with mp 1.5, which uses cycler. + import cycler + colors = mpl_stylesheet.pop('axes.color_cycle') + mpl_stylesheet['axes.prop_cycle'] = cycler.cycler('color', colors) + + _registered = True + def _validate_color_args(self): if 'color' not in self.kwds and 'colors' in self.kwds: warnings.warn(("'colors' is being deprecated. Please use 'color'" From be3d13eeb923be9718e7de7b26b24e0df51d2bb4 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 27 Sep 2017 12:41:57 -0500 Subject: [PATCH 02/14] delay pytest --- pandas/util/_tester.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/pandas/util/_tester.py b/pandas/util/_tester.py index aeb4259a9edae..d18467f17ec5b 100644 --- a/pandas/util/_tester.py +++ b/pandas/util/_tester.py @@ -7,21 +7,19 @@ PKG = os.path.dirname(os.path.dirname(__file__)) -try: - import pytest -except ImportError: - def test(): +def test(extra_args=None): + try: + import pytest + except ImportError: raise ImportError("Need pytest>=3.0 to run tests") -else: - def test(extra_args=None): - cmd = ['--skip-slow', '--skip-network'] - if extra_args: - if not isinstance(extra_args, list): - extra_args = [extra_args] - cmd = extra_args - cmd += [PKG] - print("running: pytest {}".format(' '.join(cmd))) - sys.exit(pytest.main(cmd)) + cmd = ['--skip-slow', '--skip-network'] + if extra_args: + if not isinstance(extra_args, list): + extra_args = [extra_args] + cmd = extra_args + cmd += [PKG] + print("running: pytest {}".format(' '.join(cmd))) + sys.exit(pytest.main(cmd)) __all__ = ['test'] From cd2226c0018d06580771050d8dd9e737ec198799 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 27 Sep 2017 13:27:56 -0500 Subject: [PATCH 03/14] Delay excel, probably broken --- pandas/core/config_init.py | 24 +++++++++++++++++------- pandas/io/common.py | 12 +++++------- pandas/io/excel.py | 4 +++- 3 files changed, 25 insertions(+), 15 deletions(-) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 5652424a8f75b..09e80cb7bdee3 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -458,13 +458,23 @@ def _register_xlsx(engine, other): others=others) cf.register_option('xlsx.writer', engine, doc, validator=str) - try: - # better memory footprint - import xlsxwriter # noqa - _register_xlsx('xlsxwriter', 'openpyxl') - except ImportError: - # fallback - _register_xlsx('openpyxl', 'xlsxwriter') +_excel_registered = False + + +def _register_excel_engines(): + global _excel_registered + + if not _excel_registered: + with cf.config_prefix('io.excel'): + try: + # better memory footprint + import xlsxwriter # noqa + _register_xlsx('xlsxwriter', 'openpyxl') + except ImportError: + # fallback + _register_xlsx('openpyxl', 'xlsxwriter') + _excel_registered = True + # Set up the io.parquet specific configuration. parquet_engine_doc = """ diff --git a/pandas/io/common.py b/pandas/io/common.py index 69a7e69ea724b..3f04d34d37186 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -19,13 +19,6 @@ # gh-12665: Alias for now and remove later. CParserError = ParserError - -try: - from s3fs import S3File - need_text_wrapping = (BytesIO, S3File) -except ImportError: - need_text_wrapping = (BytesIO,) - # common NA values # no longer excluding inf representations # '1.#INF','-1.#INF', '1.#INF000000', @@ -322,6 +315,11 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None, handles : list of file-like objects A list of file-like object that were openned in this function. """ + try: + from s3fs import S3File + need_text_wrapping = (BytesIO, S3File) + except ImportError: + need_text_wrapping = (BytesIO,) handles = list() f = path_or_buf diff --git a/pandas/io/excel.py b/pandas/io/excel.py index afecd76c498ef..a7a3b64a95a48 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -25,7 +25,7 @@ import pandas._libs.json as json from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass, string_types, OrderedDict) -from pandas.core import config +from pandas.core import config, config_init from pandas.io.formats.printing import pprint_thing import pandas.compat as compat import pandas.compat.openpyxl_compat as openpyxl_compat @@ -690,6 +690,8 @@ class ExcelWriter(object): # ExcelWriter. def __new__(cls, path, engine=None, **kwargs): # only switch class if generic(ExcelWriter) + config_init._register_excel_engines() + if issubclass(cls, ExcelWriter): if engine is None: if isinstance(path, string_types): From 95d96fc0a1e5c740838d8f8eda4989795e0f6561 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 28 Sep 2017 14:16:27 -0500 Subject: [PATCH 04/14] Matplotlib cleanup --- pandas/plotting/_core.py | 9 +----- pandas/plotting/_style.py | 67 --------------------------------------- 2 files changed, 1 insertion(+), 75 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 8ced50c7df696..54fa12781542d 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -33,7 +33,7 @@ from pandas.plotting._compat import (_mpl_ge_1_3_1, _mpl_ge_1_5_0, _mpl_ge_2_0_0) -from pandas.plotting._style import (mpl_stylesheet, plot_params, +from pandas.plotting._style import (plot_params, _get_standard_colors) from pandas.plotting._tools import (_subplots, _flatten, table, _handle_shared_axes, _get_all_lines, @@ -179,13 +179,6 @@ def _setup(self): if not _registered: from pandas.plotting import _converter _converter.register() - - if _mpl_ge_1_5_0(): - # Compat with mp 1.5, which uses cycler. - import cycler - colors = mpl_stylesheet.pop('axes.color_cycle') - mpl_stylesheet['axes.prop_cycle'] = cycler.cycler('color', colors) - _registered = True def _validate_color_args(self): diff --git a/pandas/plotting/_style.py b/pandas/plotting/_style.py index 8cb4e30e0d91c..f1d53da5f1396 100644 --- a/pandas/plotting/_style.py +++ b/pandas/plotting/_style.py @@ -14,73 +14,6 @@ from pandas.plotting._compat import _mpl_ge_2_0_0 -# Extracted from https://gist.github.com/huyng/816622 -# this is the rcParams set when setting display.with_mpl_style -# to True. -mpl_stylesheet = { - 'axes.axisbelow': True, - 'axes.color_cycle': ['#348ABD', - '#7A68A6', - '#A60628', - '#467821', - '#CF4457', - '#188487', - '#E24A33'], - 'axes.edgecolor': '#bcbcbc', - 'axes.facecolor': '#eeeeee', - 'axes.grid': True, - 'axes.labelcolor': '#555555', - 'axes.labelsize': 'large', - 'axes.linewidth': 1.0, - 'axes.titlesize': 'x-large', - 'figure.edgecolor': 'white', - 'figure.facecolor': 'white', - 'figure.figsize': (6.0, 4.0), - 'figure.subplot.hspace': 0.5, - 'font.family': 'monospace', - 'font.monospace': ['Andale Mono', - 'Nimbus Mono L', - 'Courier New', - 'Courier', - 'Fixed', - 'Terminal', - 'monospace'], - 'font.size': 10, - 'interactive': True, - 'keymap.all_axes': ['a'], - 'keymap.back': ['left', 'c', 'backspace'], - 'keymap.forward': ['right', 'v'], - 'keymap.fullscreen': ['f'], - 'keymap.grid': ['g'], - 'keymap.home': ['h', 'r', 'home'], - 'keymap.pan': ['p'], - 'keymap.save': ['s'], - 'keymap.xscale': ['L', 'k'], - 'keymap.yscale': ['l'], - 'keymap.zoom': ['o'], - 'legend.fancybox': True, - 'lines.antialiased': True, - 'lines.linewidth': 1.0, - 'patch.antialiased': True, - 'patch.edgecolor': '#EEEEEE', - 'patch.facecolor': '#348ABD', - 'patch.linewidth': 0.5, - 'toolbar': 'toolbar2', - 'xtick.color': '#555555', - 'xtick.direction': 'in', - 'xtick.major.pad': 6.0, - 'xtick.major.size': 0.0, - 'xtick.minor.pad': 6.0, - 'xtick.minor.size': 0.0, - 'ytick.color': '#555555', - 'ytick.direction': 'in', - 'ytick.major.pad': 6.0, - 'ytick.major.size': 0.0, - 'ytick.minor.pad': 6.0, - 'ytick.minor.size': 0.0 -} - - def _get_standard_colors(num_colors=None, colormap=None, color_type='default', color=None): import matplotlib.pyplot as plt From bede1f9ecc987c75215cfa1102b633816b0cd70f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 28 Sep 2017 14:36:40 -0500 Subject: [PATCH 05/14] excel configuration --- pandas/core/config_init.py | 66 +++++++++++++++++--------------------- pandas/io/excel.py | 16 +++++++-- 2 files changed, 43 insertions(+), 39 deletions(-) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 09e80cb7bdee3..33531e80449d8 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -437,43 +437,35 @@ def use_inf_as_na_cb(key): writer_engine_doc = """ : string The default Excel writer engine for '{ext}' files. Available options: - '{default}' (the default){others}. -""" - -with cf.config_prefix('io.excel'): - # going forward, will be additional writers - for ext, options in [('xls', ['xlwt']), ('xlsm', ['openpyxl'])]: - default = options.pop(0) - if options: - options = " " + ", ".join(options) - else: - options = "" - doc = writer_engine_doc.format(ext=ext, default=default, - others=options) - cf.register_option(ext + '.writer', default, doc, validator=str) - - def _register_xlsx(engine, other): - others = ", '{other}'".format(other=other) - doc = writer_engine_doc.format(ext='xlsx', default=engine, - others=others) - cf.register_option('xlsx.writer', engine, doc, validator=str) - -_excel_registered = False - - -def _register_excel_engines(): - global _excel_registered - - if not _excel_registered: - with cf.config_prefix('io.excel'): - try: - # better memory footprint - import xlsxwriter # noqa - _register_xlsx('xlsxwriter', 'openpyxl') - except ImportError: - # fallback - _register_xlsx('openpyxl', 'xlsxwriter') - _excel_registered = True + auto, {others}. +""" + +_xls_options = ['xlwt'] +_xlsm_options = ['openpyxl'] +_xlsx_options = ['openpyxl', 'xlsxwriter'] + + +with cf.config_prefix("io.excel.xls"): + cf.register_option("writer", "auto", + writer_engine_doc.format( + ext='xls', + others=', '.join(_xls_options)), + validator=str) + +with cf.config_prefix("io.excel.xlsm"): + cf.register_option("writer", "auto", + writer_engine_doc.format( + ext='xlsm', + others=', '.join(_xlsm_options)), + validator=str) + + +with cf.config_prefix("io.excel.xlsx"): + cf.register_option("writer", "auto", + writer_engine_doc.format( + ext='xlsx', + others=', '.join(_xlsx_options)), + validator=str) # Set up the io.parquet specific configuration. diff --git a/pandas/io/excel.py b/pandas/io/excel.py index a7a3b64a95a48..9bcd24c0b660d 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -170,6 +170,16 @@ def register_writer(klass): _writer_extensions.append(ext) +def _get_default_writer(ext): + _default_writers = {'xlsx': 'openpyxl', 'xlsm': 'openpyxl', 'xls': 'xlwt'} + try: + import xlsxwriter # noqa + _default_writers['xlsx'] = 'xlsxwriter' + except ImportError: + pass + return _default_writers[ext] + + def get_writer(engine_name): if engine_name == 'openpyxl': try: @@ -690,10 +700,10 @@ class ExcelWriter(object): # ExcelWriter. def __new__(cls, path, engine=None, **kwargs): # only switch class if generic(ExcelWriter) - config_init._register_excel_engines() if issubclass(cls, ExcelWriter): - if engine is None: + if engine is None or (isinstance(engine, string_types) and + engine == 'auto'): if isinstance(path, string_types): ext = os.path.splitext(path)[-1][1:] else: @@ -702,6 +712,8 @@ def __new__(cls, path, engine=None, **kwargs): try: engine = config.get_option('io.excel.{ext}.writer' .format(ext=ext)) + if engine == 'auto': + engine = _get_default_writer(ext) except KeyError: error = ValueError("No engine for filetype: '{ext}'" .format(ext=ext)) From a1a15bb64249c83364d5ce39f8d134071ddcb6a1 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 28 Sep 2017 15:55:00 -0500 Subject: [PATCH 06/14] PERF: delay numexpr --- pandas/core/computation/__init__.py | 23 ----------------------- pandas/core/computation/check.py | 22 ++++++++++++++++++++++ pandas/core/computation/eval.py | 8 ++++++-- pandas/core/computation/expressions.py | 2 +- pandas/core/frame.py | 7 +++++-- pandas/core/internals.py | 3 ++- pandas/core/ops.py | 10 ++++++++-- pandas/core/panel.py | 3 ++- pandas/tests/computation/test_compat.py | 4 ++-- pandas/tests/frame/test_query_eval.py | 2 +- pandas/util/testing.py | 8 +++++--- 11 files changed, 54 insertions(+), 38 deletions(-) create mode 100644 pandas/core/computation/check.py diff --git a/pandas/core/computation/__init__.py b/pandas/core/computation/__init__.py index e13faf890d1f8..e69de29bb2d1d 100644 --- a/pandas/core/computation/__init__.py +++ b/pandas/core/computation/__init__.py @@ -1,23 +0,0 @@ - -import warnings -from distutils.version import LooseVersion - -_NUMEXPR_INSTALLED = False -_MIN_NUMEXPR_VERSION = "2.4.6" - -try: - import numexpr as ne - ver = ne.__version__ - _NUMEXPR_INSTALLED = ver >= LooseVersion(_MIN_NUMEXPR_VERSION) - - if not _NUMEXPR_INSTALLED: - warnings.warn( - "The installed version of numexpr {ver} is not supported " - "in pandas and will be not be used\nThe minimum supported " - "version is {min_ver}\n".format( - ver=ver, min_ver=_MIN_NUMEXPR_VERSION), UserWarning) - -except ImportError: # pragma: no cover - pass - -__all__ = ['_NUMEXPR_INSTALLED'] diff --git a/pandas/core/computation/check.py b/pandas/core/computation/check.py new file mode 100644 index 0000000000000..bb8cc74bad3c2 --- /dev/null +++ b/pandas/core/computation/check.py @@ -0,0 +1,22 @@ +import warnings +from distutils.version import LooseVersion + +_NUMEXPR_INSTALLED = False +_MIN_NUMEXPR_VERSION = "2.4.6" + +try: + import numexpr as ne + ver = ne.__version__ + _NUMEXPR_INSTALLED = ver >= LooseVersion(_MIN_NUMEXPR_VERSION) + + if not _NUMEXPR_INSTALLED: + warnings.warn( + "The installed version of numexpr {ver} is not supported " + "in pandas and will be not be used\nThe minimum supported " + "version is {min_ver}\n".format( + ver=ver, min_ver=_MIN_NUMEXPR_VERSION), UserWarning) + +except ImportError: # pragma: no cover + pass + +__all__ = ['_NUMEXPR_INSTALLED'] diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index d391764794c1c..a5df6aea055ab 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -5,8 +5,6 @@ import tokenize from pandas.io.formats.printing import pprint_thing -from pandas.core.computation import _NUMEXPR_INSTALLED -from pandas.core.computation.expr import Expr, _parsers, tokenize_string from pandas.core.computation.scope import _ensure_scope from pandas.compat import string_types from pandas.core.computation.engines import _engines @@ -32,6 +30,7 @@ def _check_engine(engine): string engine """ + from pandas.core.computation.check import _NUMEXPR_INSTALLED if engine is None: if _NUMEXPR_INSTALLED: @@ -69,6 +68,8 @@ def _check_parser(parser): KeyError * If an invalid parser is passed """ + from pandas.core.computation.expr import _parsers + if parser not in _parsers: raise KeyError('Invalid parser {parser!r} passed, valid parsers are' ' {valid}'.format(parser=parser, valid=_parsers.keys())) @@ -129,6 +130,8 @@ def _convert_expression(expr): def _check_for_locals(expr, stack_level, parser): + from pandas.core.computation.expr import tokenize_string + at_top_of_stack = stack_level == 0 not_pandas_parser = parser != 'pandas' @@ -252,6 +255,7 @@ def eval(expr, parser='pandas', engine=None, truediv=True, pandas.DataFrame.query pandas.DataFrame.eval """ + from pandas.core.computation.expr import Expr inplace = validate_bool_kwarg(inplace, "inplace") diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index 8ddc625887a51..2196fb5917a44 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -9,7 +9,7 @@ import warnings import numpy as np from pandas.core.common import _values_from_object -from pandas.core.computation import _NUMEXPR_INSTALLED +from pandas.core.computation.check import _NUMEXPR_INSTALLED from pandas.core.config import get_option if _NUMEXPR_INSTALLED: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5d439f88bca15..01e83821d4524 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -76,9 +76,7 @@ create_block_manager_from_blocks) from pandas.core.series import Series from pandas.core.categorical import Categorical -import pandas.core.computation.expressions as expressions import pandas.core.algorithms as algorithms -from pandas.core.computation.eval import eval as _eval from pandas.compat import (range, map, zip, lrange, lmap, lzip, StringIO, u, OrderedDict, raise_with_traceback) from pandas import compat @@ -2296,6 +2294,8 @@ def eval(self, expr, inplace=False, **kwargs): >>> df.eval('a + b') >>> df.eval('c = a + b') """ + from pandas.core.computation.eval import eval as _eval + inplace = validate_bool_kwarg(inplace, 'inplace') resolvers = kwargs.pop('resolvers', None) kwargs['level'] = kwargs.pop('level', 0) + 1 @@ -3840,6 +3840,7 @@ def _combine_const(self, other, func, raise_on_error=True, try_cast=True): def _compare_frame_evaluate(self, other, func, str_rep, try_cast=True): + import pandas.core.computation.expressions as expressions # unique if self.columns.is_unique: @@ -3992,6 +3993,7 @@ def combine_first(self, other): ------- combined : DataFrame """ + import pandas.core.computation.expressions as expressions def combiner(x, y, needs_i8_conversion=False): x_values = x.values if hasattr(x, 'values') else x @@ -4027,6 +4029,7 @@ def update(self, other, join='left', overwrite=True, filter_func=None, If True, will raise an error if the DataFrame and other both contain data in the same place. """ + import pandas.core.computation.expressions as expressions # TODO: Support other joins if join != 'left': # pragma: no cover raise NotImplementedError("Only left join is supported") diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 9e348819ce5a3..12ac7a5fd9f20 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -66,7 +66,6 @@ from pandas._libs.tslib import Timedelta from pandas._libs.lib import BlockPlacement -import pandas.core.computation.expressions as expressions from pandas.util._decorators import cache_readonly from pandas.util._validators import validate_bool_kwarg from pandas import compat @@ -1395,6 +1394,8 @@ def where(self, other, cond, align=True, raise_on_error=True, ------- a new block(s), the result of the func """ + import pandas.core.computation.expressions as expressions + values = self.values orig_other = other if transpose: diff --git a/pandas/core/ops.py b/pandas/core/ops.py index d37acf48ed9c2..506b9267f32b4 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -16,7 +16,6 @@ from pandas import compat from pandas.util._decorators import Appender -import pandas.core.computation.expressions as expressions from pandas.compat import bind_method import pandas.core.missing as missing @@ -668,8 +667,9 @@ def _arith_method_SERIES(op, name, str_rep, fill_zeros=None, default_axis=None, Wrapper function for Series arithmetic operations, to avoid code duplication. """ - def na_op(x, y): + import pandas.core.computation.expressions as expressions + try: result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True, **eval_kwargs) @@ -1193,6 +1193,8 @@ def to_series(right): def _arith_method_FRAME(op, name, str_rep=None, default_axis='columns', fill_zeros=None, **eval_kwargs): def na_op(x, y): + import pandas.core.computation.expressions as expressions + try: result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True, **eval_kwargs) @@ -1349,6 +1351,8 @@ def _arith_method_PANEL(op, name, str_rep=None, fill_zeros=None, # copied from Series na_op above, but without unnecessary branch for # non-scalar def na_op(x, y): + import pandas.core.computation.expressions as expressions + try: result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True, **eval_kwargs) @@ -1378,6 +1382,8 @@ def f(self, other): def _comp_method_PANEL(op, name, str_rep=None, masker=False): def na_op(x, y): + import pandas.core.computation.expressions as expressions + try: result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index a3e35492ad9af..68733a3a8b94e 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -15,7 +15,6 @@ is_string_like, is_scalar) from pandas.core.dtypes.missing import notna -import pandas.core.computation.expressions as expressions import pandas.core.common as com import pandas.core.ops as ops import pandas.core.missing as missing @@ -1500,6 +1499,8 @@ def _add_aggregate_operations(cls, use_numexpr=True): def _panel_arith_method(op, name, str_rep=None, default_axis=None, fill_zeros=None, **eval_kwargs): def na_op(x, y): + import pandas.core.computation.expressions as expressions + try: result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True, diff --git a/pandas/tests/computation/test_compat.py b/pandas/tests/computation/test_compat.py index ed569625177d3..af39ee9815313 100644 --- a/pandas/tests/computation/test_compat.py +++ b/pandas/tests/computation/test_compat.py @@ -5,13 +5,13 @@ from pandas.core.computation.engines import _engines import pandas.core.computation.expr as expr -from pandas.core.computation import _MIN_NUMEXPR_VERSION +from pandas.core.computation.check import _MIN_NUMEXPR_VERSION def test_compat(): # test we have compat with our version of nu - from pandas.core.computation import _NUMEXPR_INSTALLED + from pandas.core.computation.check import _NUMEXPR_INSTALLED try: import numexpr as ne ver = ne.__version__ diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index f0f1a2df27e93..a6c36792ef074 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -17,7 +17,7 @@ makeCustomDataframe as mkdf) import pandas.util.testing as tm -from pandas.core.computation import _NUMEXPR_INSTALLED +from pandas.core.computation.check import _NUMEXPR_INSTALLED from pandas.tests.frame.common import TestData diff --git a/pandas/util/testing.py b/pandas/util/testing.py index c5f73ca0e885b..202c9473eea12 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -41,8 +41,6 @@ StringIO, PY3 ) -from pandas.core.computation import expressions as expr - from pandas import (bdate_range, CategoricalIndex, Categorical, IntervalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex, RangeIndex, Index, MultiIndex, @@ -2660,7 +2658,11 @@ def __exit__(self, exc_type, exc_value, traceback): @contextmanager -def use_numexpr(use, min_elements=expr._MIN_ELEMENTS): +def use_numexpr(use, min_elements=None): + from pandas.core.computation import expressions as expr + if min_elements is None: + min_elements = expr._MIN_ELEMENTS + olduse = expr._USE_NUMEXPR oldmin = expr._MIN_ELEMENTS expr.set_use_numexpr(use) From 028bb8a7794fb7c9ed481c576a6fed0d490e55d8 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 28 Sep 2017 15:58:19 -0500 Subject: [PATCH 07/14] fixup! Matplotlib cleanup --- pandas/plotting/_core.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 54fa12781542d..211d9777e7515 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -43,6 +43,15 @@ _registered = False +def _setup(): + # delay the import of matplotlib until nescessary + global _registered + if not _registered: + from pandas.plotting import _converter + _converter.register() + _registered = True + + def _get_standard_kind(kind): return {'density': 'kde'}.get(kind, kind) @@ -90,7 +99,7 @@ def __init__(self, data, kind=None, by=None, subplots=False, sharex=None, secondary_y=False, colormap=None, table=False, layout=None, **kwds): - self._setup() + _setup() self.data = data self.by = by @@ -174,13 +183,6 @@ def __init__(self, data, kind=None, by=None, subplots=False, sharex=None, self._validate_color_args() - def _setup(self): - global _registered - if not _registered: - from pandas.plotting import _converter - _converter.register() - _registered = True - def _validate_color_args(self): if 'color' not in self.kwds and 'colors' in self.kwds: warnings.warn(("'colors' is being deprecated. Please use 'color'" @@ -2059,6 +2061,7 @@ def boxplot_frame(self, column=None, by=None, ax=None, fontsize=None, rot=0, grid=True, figsize=None, layout=None, return_type=None, **kwds): import matplotlib.pyplot as plt + _setup() ax = boxplot(self, column=column, by=by, ax=ax, fontsize=fontsize, grid=grid, rot=rot, figsize=figsize, layout=layout, return_type=return_type, **kwds) @@ -2154,7 +2157,7 @@ def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, kwds : other plotting keyword arguments To be passed to hist function """ - + _setup() if by is not None: axes = grouped_hist(data, column=column, by=by, ax=ax, grid=grid, figsize=figsize, sharex=sharex, sharey=sharey, @@ -2351,6 +2354,7 @@ def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, >>> grouped = df.unstack(level='lvl1').groupby(level=0, axis=1) >>> boxplot_frame_groupby(grouped, subplots=False) """ + _setup() if subplots is True: naxes = len(grouped) fig, axes = _subplots(naxes=naxes, squeeze=False, From 85f8baa18878ae4aff9f6182ed02486312924586 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 28 Sep 2017 16:27:27 -0500 Subject: [PATCH 08/14] PERF: delay import of py.path, Pathlib --- pandas/io/common.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/pandas/io/common.py b/pandas/io/common.py index 3f04d34d37186..534c1e0671150 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -27,19 +27,6 @@ 'N/A', 'n/a', 'NA', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', '' ]) -try: - import pathlib - _PATHLIB_INSTALLED = True -except ImportError: - _PATHLIB_INSTALLED = False - - -try: - from py.path import local as LocalPath - _PY_PATH_INSTALLED = True -except: - _PY_PATH_INSTALLED = False - if compat.PY3: from urllib.request import urlopen, pathname2url @@ -160,6 +147,18 @@ def _stringify_path(filepath_or_buffer): Any other object is passed through unchanged, which includes bytes, strings, buffers, or anything else that's not even path-like. """ + try: + import pathlib + _PATHLIB_INSTALLED = True + except ImportError: + _PATHLIB_INSTALLED = False + + try: + from py.path import local as LocalPath + _PY_PATH_INSTALLED = True + except ImportError: + _PY_PATH_INSTALLED = False + if hasattr(filepath_or_buffer, '__fspath__'): return filepath_or_buffer.__fspath__() if _PATHLIB_INSTALLED and isinstance(filepath_or_buffer, pathlib.Path): From 748ea2d8aab440113e22e8abb5c9430b25b41b5c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 28 Sep 2017 16:37:50 -0500 Subject: [PATCH 09/14] Add a script to test imports --- scripts/check_imports.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 scripts/check_imports.py diff --git a/scripts/check_imports.py b/scripts/check_imports.py new file mode 100644 index 0000000000000..982e6ff67dc4a --- /dev/null +++ b/scripts/check_imports.py @@ -0,0 +1,27 @@ +""" +Check that certain modules are not loaded by `import pandas` +""" +import sys + + +blacklist = { + 'matplotlib', + 'numexpr', + 'xlsxwriter', + 'openpyxl', + 'xlwt', + 'numexpr', +} + + +def main(): + import pandas # noqa + + modules = set(x.split('.')[0] for x in sys.modules) + imported = modules & blacklist + if modules & blacklist: + sys.exit("Imported {}".format(imported)) + + +if __name__ == '__main__': + main() From 52295b85d87ad82273e9a60b00a0b4290167fa6f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 28 Sep 2017 16:40:38 -0500 Subject: [PATCH 10/14] CI: Check for accidental imports --- .travis.yml | 2 ++ {scripts => ci}/check_imports.py | 14 +++++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) rename {scripts => ci}/check_imports.py (76%) diff --git a/.travis.yml b/.travis.yml index 034e2a32bb75c..9a7e40237bbb4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -121,6 +121,8 @@ script: - ci/script_single.sh - ci/script_multi.sh - ci/lint.sh + - echo "checking imports" + - python ci/check_imports.py - echo "script done" after_success: diff --git a/scripts/check_imports.py b/ci/check_imports.py similarity index 76% rename from scripts/check_imports.py rename to ci/check_imports.py index 982e6ff67dc4a..cee13d70c0dce 100644 --- a/scripts/check_imports.py +++ b/ci/check_imports.py @@ -3,14 +3,22 @@ """ import sys - blacklist = { + 'bs4', + 'html5lib', + 'jinja2' + 'lxml', 'matplotlib', 'numexpr', - 'xlsxwriter', 'openpyxl', + 'py', + 'pytest', + 's3fs', + 'scipy', + 'tables', + 'xlrd', + 'xlsxwriter', 'xlwt', - 'numexpr', } From 067b561e6c30af492d058d5ec93116b4e5acbe03 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 28 Sep 2017 16:59:08 -0500 Subject: [PATCH 11/14] whatsnew --- doc/source/whatsnew/v0.21.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index c8a0a6bff5cc7..ff2d6cffcd10d 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -164,6 +164,7 @@ Other Enhancements - :func:`read_excel` raises ``ImportError`` with a better message if ``xlrd`` is not installed. (:issue:`17613`) - :func:`read_json` now accepts a ``chunksize`` parameter that can be used when ``lines=True``. If ``chunksize`` is passed, read_json now returns an iterator which reads in ``chunksize`` lines with each iteration. (:issue:`17048`) - :meth:`DataFrame.assign` will preserve the original order of ``**kwargs`` for Python 3.6+ users instead of sorting the column names +- Improved the import time of pandas by about 2.25x (:issue:`16764`) .. _whatsnew_0210.api_breaking: From b9f6a14e5c06097f4ba92432f24e298f9a5db74d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 29 Sep 2017 06:03:55 -0500 Subject: [PATCH 12/14] Added release note --- doc/source/whatsnew/v0.21.0.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index ff2d6cffcd10d..6b968a3f1ae32 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -560,6 +560,8 @@ Other API Changes - :func:`to_datetime` when passed a tz-aware ``origin=`` kwarg will now raise a more informative ``ValueError`` rather than a ``TypeError`` (:issue:`16842`) - Renamed non-functional ``index`` to ``index_col`` in :func:`read_stata` to improve API consistency (:issue:`16342`) - Bug in :func:`DataFrame.drop` caused boolean labels ``False`` and ``True`` to be treated as labels 0 and 1 respectively when dropping indices from a numeric index. This will now raise a ValueError (:issue:`16877`) +- Pandas no longer registers matplotlib converters on import. The converters + will be registered and used when the first plot is draw (:issue:`17710`) .. _whatsnew_0210.deprecations: From 9bbd9c61f04e6125204a587b55599e699b8f6bd0 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 29 Sep 2017 10:38:16 -0500 Subject: [PATCH 13/14] Fix script call --- .travis.yml | 2 +- ci/check_imports.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 9a7e40237bbb4..fe1a2950dbf08 100644 --- a/.travis.yml +++ b/.travis.yml @@ -122,7 +122,7 @@ script: - ci/script_multi.sh - ci/lint.sh - echo "checking imports" - - python ci/check_imports.py + - source activate pandas && python ci/check_imports.py - echo "script done" after_success: diff --git a/ci/check_imports.py b/ci/check_imports.py index cee13d70c0dce..a83436e7d258c 100644 --- a/ci/check_imports.py +++ b/ci/check_imports.py @@ -6,6 +6,7 @@ blacklist = { 'bs4', 'html5lib', + 'ipython', 'jinja2' 'lxml', 'matplotlib', From 9d0f74a73aa26f8ba7a6e59e797dfa3638272eb5 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 29 Sep 2017 12:12:16 -0500 Subject: [PATCH 14/14] pep8 --- pandas/io/excel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 9bcd24c0b660d..41e3b5283a532 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -25,7 +25,7 @@ import pandas._libs.json as json from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass, string_types, OrderedDict) -from pandas.core import config, config_init +from pandas.core import config from pandas.io.formats.printing import pprint_thing import pandas.compat as compat import pandas.compat.openpyxl_compat as openpyxl_compat