diff --git a/doc/source/development/contributing.rst b/doc/source/development/contributing.rst index ba0558cff07eb..135fb55cfce50 100644 --- a/doc/source/development/contributing.rst +++ b/doc/source/development/contributing.rst @@ -499,6 +499,21 @@ as possible to avoid mass breakages. Additional standards are outlined on the `code style wiki page `_. +Optional dependencies +--------------------- + +Optional dependencies (e.g. matplotlib) should be imported with the private helper +``pandas.compat._optional.import_optional_dependency``. This ensures a +consistent error message when the dependency is not met. + +All methods using an optional dependency should include a test asserting that an +``ImportError`` is raised when the optional dependency is not found. This test +should be skipped if the library is present. + +All optional dependencies should be documented in +:ref:`install.optional_dependencies` and the minimum required version should be +set in the ``pandas.compat._optional.VERSIONS`` dict. + C (cpplint) ~~~~~~~~~~~ diff --git a/doc/source/install.rst b/doc/source/install.rst index 98443ede2e965..2b8352ff9a1a5 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -252,87 +252,69 @@ Recommended Dependencies Optional Dependencies ~~~~~~~~~~~~~~~~~~~~~ -* `Cython `__: Only necessary to build development - version. Version 0.28.2 or higher. -* `SciPy `__: miscellaneous statistical functions, Version 0.19.0 or higher -* `xarray `__: pandas like handling for > 2 dims. Version 0.8.2 or higher is recommended. -* `PyTables `__: necessary for HDF5-based storage, Version 3.4.2 or higher -* `pyarrow `__ (>= 0.9.0): necessary for feather-based storage. -* `Apache Parquet `__, either `pyarrow `__ (>= 0.9.0) or `fastparquet `__ (>= 0.2.1) for parquet-based storage. The `snappy `__ and `brotli `__ are available for compression support. -* `SQLAlchemy `__: for SQL database support. Version 1.1.4 or higher recommended. Besides SQLAlchemy, you also need a database specific driver. You can find an overview of supported drivers for each SQL dialect in the `SQLAlchemy docs `__. Some common drivers are: - - * `psycopg2 `__: for PostgreSQL - * `pymysql `__: for MySQL. - * `SQLite `__: for SQLite, this is included in Python's standard library by default. - -* `matplotlib `__: for plotting, Version 2.2.2 or higher. -* For Excel I/O: - - * `xlrd/xlwt `__: Excel reading (xlrd), version 1.0.0 or higher required, and writing (xlwt) - * `openpyxl `__: openpyxl version 2.4.0 - for writing .xlsx files (xlrd >= 1.0.0) - * `XlsxWriter `__: Alternative Excel writer - -* `Jinja2 `__: Template engine for conditional HTML formatting. -* `s3fs `__: necessary for Amazon S3 access (s3fs >= 0.0.8). -* `blosc `__: for msgpack compression using ``blosc`` -* `gcsfs `__: necessary for Google Cloud Storage access (gcsfs >= 0.1.0). -* One of - `qtpy `__ (requires PyQt or PySide), - `PyQt5 `__, - `PyQt4 `__, - `xsel `__, or - `xclip `__: necessary to use - :func:`~pandas.read_clipboard`. Most package managers on Linux distributions will have ``xclip`` and/or ``xsel`` immediately available for installation. -* `pandas-gbq - `__: - for Google BigQuery I/O. (pandas-gbq >= 0.8.0) - -* One of the following combinations of libraries is needed to use the - top-level :func:`~pandas.read_html` function: - - .. versionchanged:: 0.23.0 - - .. note:: - - If using BeautifulSoup4 a minimum version of 4.4.1 is required - - * `BeautifulSoup4`_ and `html5lib`_ (Any recent version of `html5lib`_ is - okay.) - * `BeautifulSoup4`_ and `lxml`_ - * `BeautifulSoup4`_ and `html5lib`_ and `lxml`_ - * Only `lxml`_, although see :ref:`HTML Table Parsing ` - for reasons as to why you should probably **not** take this approach. - - .. warning:: - - * if you install `BeautifulSoup4`_ you must install either - `lxml`_ or `html5lib`_ or both. - :func:`~pandas.read_html` will **not** work with *only* - `BeautifulSoup4`_ installed. - * You are highly encouraged to read :ref:`HTML Table Parsing gotchas `. - It explains issues surrounding the installation and - usage of the above three libraries. - - .. note:: - - * if you're on a system with ``apt-get`` you can do - - .. code-block:: sh - - sudo apt-get build-dep python-lxml - - to get the necessary dependencies for installation of `lxml`_. This - will prevent further headaches down the line. - +Pandas has many optional dependencies that are only used for specific methods. +For example, :func:`pandas.read_hdf` requires the ``pytables`` package. If the +optional dependency is not installed, pandas will raise an ``ImportError`` when +the method requiring that dependency is called. + +========================= ================== ============================================================= +Dependency Minimum Version Notes +========================= ================== ============================================================= +BeautifulSoup4 4.4.1 HTML parser for read_html (see :ref:`note `) +Jinja2 Conditional formatting with DataFrame.style +PyQt4 Clipboard I/O +PyQt5 Clipboard I/O +PyTables 3.4.2 HDF5-based reading / writing +SQLAlchemy 1.1.4 SQL support for databases other than sqlite +SciPy 0.19.0 Miscellaneous statistical functions +XLsxWriter Excel writing +blosc Compression for msgpack +fastparquet 0.2.1 Parquet reading / writing +gcsfs 0.1.0 Google Cloud Storage access +html5lib HTML parser for read_html (see :ref:`note `) +lxml HTML parser for read_html (see :ref:`note `) +matplotlib 2.2.2 Visualization +openpyxl 2.4.0 Reading / writing for xlsx files +pandas-gbq 0.8.0 Google Big Query access +psycopg2 PostgreSQL engine for sqlalchemy +pyarrow 0.9.0 Parquet and feather reading / writing +pymysql MySQL engine for sqlalchemy +qtpy Clipboard I/O +s3fs 0.0.8 Amazon S3 access +xarray 0.8.2 pandas-like API for N-dimensional data +xclip Clipboard I/O on linux +xlrd 1.0.0 Excel reading +xlwt 2.4.0 Excel writing +xsel Clipboard I/O on linux +zlib Compression for msgpack +========================= ================== ============================================================= + +.. _optional_html: + +Optional Dependencies for Parsing HTML +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +One of the following combinations of libraries is needed to use the +top-level :func:`~pandas.read_html` function: + +.. versionchanged:: 0.23.0 + +* `BeautifulSoup4`_ and `html5lib`_ +* `BeautifulSoup4`_ and `lxml`_ +* `BeautifulSoup4`_ and `html5lib`_ and `lxml`_ +* Only `lxml`_, although see :ref:`HTML Table Parsing ` + for reasons as to why you should probably **not** take this approach. + +.. warning:: + + * if you install `BeautifulSoup4`_ you must install either + `lxml`_ or `html5lib`_ or both. + :func:`~pandas.read_html` will **not** work with *only* + `BeautifulSoup4`_ installed. + * You are highly encouraged to read :ref:`HTML Table Parsing gotchas `. + It explains issues surrounding the installation and + usage of the above three libraries. .. _html5lib: https://github.com/html5lib/html5lib-python .. _BeautifulSoup4: http://www.crummy.com/software/BeautifulSoup .. _lxml: http://lxml.de - -.. note:: - - Without the optional dependencies, many useful features will not - work. Hence, it is highly recommended that you install these. A packaged - distribution like `Anaconda `__, `ActivePython `__ (version 2.7 or 3.5), or `Enthought Canopy - `__ may be worth considering. diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py new file mode 100644 index 0000000000000..a3c0e6691dd22 --- /dev/null +++ b/pandas/compat/_optional.py @@ -0,0 +1,115 @@ +import distutils.version +import importlib +import types +from typing import Optional +import warnings + +# Update install.rst when updating versions! + +VERSIONS = { + "bs4": "4.4.1", + "bottleneck": "1.2.1", + "fastparquet": "0.2.1", + "gcsfs": "0.1.0", + "matplotlib": "2.2.2", + "numexpr": "2.6.2", + "openpyxl": "2.4.0", + "pandas_gbq": "0.8.0", + "pyarrow": "0.9.0", + "pytables": "3.4.2", + "s3fs": "0.0.8", + "scipy": "0.19.0", + "sqlalchemy": "1.1.4", + "xarray": "0.8.2", + "xlrd": "1.0.0", + "xlwt": "2.4.0", +} + +message = ( + "Missing optional dependency '{name}'. {extra} " + "Use pip or conda to install {name}." +) +version_message = ( + "Pandas requires version '{minimum_version}' or newer of '{name}' " + "(version '{actual_version}' currently installed)." +) + + +def _get_version(module: types.ModuleType) -> str: + version = getattr(module, '__version__', None) + if version is None: + # xlrd uses a capitalized attribute name + version = getattr(module, '__VERSION__', None) + + if version is None: + raise ImportError( + "Can't determine version for {}".format(module.__name__) + ) + return version + + +def import_optional_dependency( + name: str, + extra: str = "", + raise_on_missing: bool = True, + on_version: str = "raise", +) -> Optional[types.ModuleType]: + """ + Import an optional dependency. + + By default, if a dependency is missing an ImportError with a nice + message will be raised. If a dependency is present, but too old, + we raise. + + Parameters + ---------- + name : str + The module name. This should be top-level only, so that the + version may be checked. + extra : str + Additional text to include in the ImportError message. + raise_on_missing : bool, default True + Whether to raise if the optional dependency is not found. + When False and the module is not present, None is returned. + on_version : str {'raise', 'warn'} + What to do when a dependency's version is too old. + + * raise : Raise an ImportError + * warn : Warn that the version is too old. Returns None + * ignore: Return the module, even if the version is too old. + It's expected that users validate the version locally when + using ``on_version="ignore"`` (see. ``io/html.py``) + + Returns + ------- + maybe_module : Optional[ModuleType] + The imported module, when found and the version is correct. + None is returned when the package is not found and `raise_on_missing` + is False, or when the package's version is too old and `on_version` + is ``'warn'``. + """ + try: + module = importlib.import_module(name) + except ImportError: + if raise_on_missing: + raise ImportError(message.format(name=name, extra=extra)) from None + else: + return None + + minimum_version = VERSIONS.get(name) + if minimum_version: + version = _get_version(module) + if distutils.version.LooseVersion(version) < minimum_version: + assert on_version in {"warn", "raise", "ignore"} + msg = version_message.format( + minimum_version=minimum_version, + name=name, + actual_version=version, + ) + if on_version == "warn": + warnings.warn(msg, UserWarning) + return None + elif on_version == "raise": + raise ImportError(msg) + + return module diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index 926ed6a829a6d..ca6e2c04f5a69 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -15,6 +15,7 @@ from pandas._libs.sparse import BlockIndex, IntIndex, SparseIndex from pandas._libs.tslibs import NaT import pandas.compat as compat +from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv from pandas.errors import PerformanceWarning @@ -2205,10 +2206,8 @@ def to_coo(self): float32. By numpy.find_common_type convention, mixing int64 and and uint64 will result in a float64 dtype. """ - try: - from scipy.sparse import coo_matrix - except ImportError: - raise ImportError('Scipy is not installed') + import_optional_dependency("scipy") + from scipy.sparse import coo_matrix dtype = find_common_type(self._parent.dtypes) if isinstance(dtype, SparseDtype): diff --git a/pandas/core/computation/check.py b/pandas/core/computation/check.py index c9b68661fd596..fc6b9a2522824 100644 --- a/pandas/core/computation/check.py +++ b/pandas/core/computation/check.py @@ -1,24 +1,11 @@ -from distutils.version import LooseVersion -import warnings - -_NUMEXPR_INSTALLED = False -_MIN_NUMEXPR_VERSION = "2.6.2" -_NUMEXPR_VERSION = None - -try: - import numexpr as ne - ver = LooseVersion(ne.__version__) - _NUMEXPR_INSTALLED = ver >= LooseVersion(_MIN_NUMEXPR_VERSION) - _NUMEXPR_VERSION = ver - - if not _NUMEXPR_INSTALLED: - warnings.warn( - "The installed version of numexpr {ver} is not supported " - "in pandas and will be not be used\nThe minimum supported " - "version is {min_ver}\n".format( - ver=ver, min_ver=_MIN_NUMEXPR_VERSION), UserWarning) - -except ImportError: # pragma: no cover - pass +from pandas.compat._optional import import_optional_dependency + +ne = import_optional_dependency("numexpr", raise_on_missing=False, + on_version="warn") +_NUMEXPR_INSTALLED = ne is not None +if _NUMEXPR_INSTALLED: + _NUMEXPR_VERSION = ne.__version__ +else: + _NUMEXPR_VERSION = None __all__ = ['_NUMEXPR_INSTALLED', '_NUMEXPR_VERSION'] diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 865eab9d71eff..903fd7ffe706a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -16,6 +16,7 @@ from pandas._libs import Timestamp, iNaT, properties from pandas.compat import set_function_name +from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError from pandas.util._decorators import ( @@ -2750,15 +2751,7 @@ class (index) object 'bird' 'bird' 'mammal' 'mammal' Data variables: speed (date, animal) int64 350 18 361 15 """ - try: - import xarray - except ImportError: - # Give a nice error message - raise ImportError("the xarray library is not installed\n" - "you can install via conda\n" - "conda install xarray\n" - "or via pip\n" - "pip install xarray\n") + xarray = import_optional_dependency("xarray") if self.ndim == 1: return xarray.DataArray.from_series(self) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index b54418e9d58c3..cdb3b77567829 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -6,6 +6,7 @@ import numpy as np from pandas._libs import algos, lib +from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.cast import infer_dtype_from_array from pandas.core.dtypes.common import ( @@ -246,13 +247,9 @@ def _interpolate_scipy_wrapper(x, y, new_x, method, fill_value=None, Returns an array interpolated at new_x. Add any new methods to the list in _clean_interp_method. """ - try: - from scipy import interpolate - # TODO: Why is DatetimeIndex being imported here? - from pandas import DatetimeIndex # noqa - except ImportError: - raise ImportError('{method} interpolation requires SciPy' - .format(method=method)) + extra = '{method} interpolation requires SciPy.'.format(method=method) + import_optional_dependency('scipy', extra=extra) + from scipy import interpolate new_x = np.asarray(new_x) @@ -275,12 +272,7 @@ def _interpolate_scipy_wrapper(x, y, new_x, method, fill_value=None, raise ImportError("Your version of Scipy does not support " "PCHIP interpolation.") elif method == 'akima': - try: - from scipy.interpolate import Akima1DInterpolator # noqa - alt_methods['akima'] = _akima_interpolate - except ImportError: - raise ImportError("Your version of Scipy does not support " - "Akima interpolation.") + alt_methods['akima'] = _akima_interpolate interp1d_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'polynomial'] @@ -392,11 +384,8 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0): """ from scipy import interpolate - try: - P = interpolate.Akima1DInterpolator(xi, yi, axis=axis) - except TypeError: - # Scipy earlier than 0.17.0 missing axis - P = interpolate.Akima1DInterpolator(xi, yi) + P = interpolate.Akima1DInterpolator(xi, yi, axis=axis) + if der == 0: return P(x) elif interpolate._isscalar(der): diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 56698ff3d9c35..7923e463c7719 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1,15 +1,14 @@ -from distutils.version import LooseVersion import functools import itertools import operator from typing import Any, Optional, Tuple, Union -import warnings import numpy as np from pandas._config import get_option from pandas._libs import iNaT, lib, tslibs +from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask from pandas.core.dtypes.common import ( @@ -22,26 +21,10 @@ import pandas.core.common as com -_BOTTLENECK_INSTALLED = False -_MIN_BOTTLENECK_VERSION = '1.2.1' - -try: - import bottleneck as bn - ver = bn.__version__ - _BOTTLENECK_INSTALLED = (LooseVersion(ver) >= - LooseVersion(_MIN_BOTTLENECK_VERSION)) - - if not _BOTTLENECK_INSTALLED: - warnings.warn( - "The installed version of bottleneck {ver} is not supported " - "in pandas and will be not be used\nThe minimum supported " - "version is {min_ver}\n".format( - ver=ver, min_ver=_MIN_BOTTLENECK_VERSION), UserWarning) - -except ImportError: # pragma: no cover - pass - - +bn = import_optional_dependency("bottleneck", + raise_on_missing=False, + on_version="warn") +_BOTTLENECK_INSTALLED = bn is not None _USE_BOTTLENECK = False diff --git a/pandas/core/window.py b/pandas/core/window.py index ab1f49aec2ad1..2b3cc4f0bf00a 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -11,6 +11,7 @@ import numpy as np import pandas._libs.window as libwindow +from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution, cache_readonly @@ -595,11 +596,11 @@ def validate(self): elif is_integer(window): if window <= 0: raise ValueError("window must be > 0 ") - try: - import scipy.signal as sig - except ImportError: - raise ImportError('Please install scipy to generate window ' - 'weight') + import_optional_dependency( + "scipy", + extra="Scipy is required to generate window weight." + ) + import scipy.signal as sig if not isinstance(self.win_type, str): raise ValueError('Invalid win_type {0}'.format(self.win_type)) diff --git a/pandas/io/excel/_util.py b/pandas/io/excel/_util.py index ccf9a4e91e961..286efea9f120e 100644 --- a/pandas/io/excel/_util.py +++ b/pandas/io/excel/_util.py @@ -1,5 +1,7 @@ import warnings +from pandas.compat._optional import import_optional_dependency + from pandas.core.dtypes.common import is_integer, is_list_like _writers = {} @@ -36,11 +38,11 @@ def _get_default_writer(ext): The default engine for the extension. """ _default_writers = {'xlsx': 'openpyxl', 'xlsm': 'openpyxl', 'xls': 'xlwt'} - try: - import xlsxwriter # noqa + xlsxwriter = import_optional_dependency("xlsxwriter", + raise_on_missing=False, + on_version="warn") + if xlsxwriter: _default_writers['xlsx'] = 'xlsxwriter' - except ImportError: - pass return _default_writers[ext] diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py index 18e751274dab9..fcc432dc7a5ad 100644 --- a/pandas/io/excel/_xlrd.py +++ b/pandas/io/excel/_xlrd.py @@ -1,8 +1,9 @@ from datetime import time -from distutils.version import LooseVersion import numpy as np +from pandas.compat._optional import import_optional_dependency + from pandas.io.excel._base import _BaseExcelReader @@ -17,16 +18,7 @@ def __init__(self, filepath_or_buffer): Object to be parsed. """ err_msg = "Install xlrd >= 1.0.0 for Excel support" - - try: - import xlrd - except ImportError: - raise ImportError(err_msg) - else: - if xlrd.__VERSION__ < LooseVersion("1.0.0"): - raise ImportError(err_msg + - ". Current version " + xlrd.__VERSION__) - + import_optional_dependency("xlrd", extra=err_msg) super().__init__(filepath_or_buffer) @property diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index 9d60349faaffe..93252f3a09ceb 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -2,6 +2,7 @@ from distutils.version import LooseVersion +from pandas.compat._optional import import_optional_dependency from pandas.util._decorators import deprecate_kwarg from pandas import DataFrame, Int64Index, RangeIndex @@ -9,30 +10,6 @@ from pandas.io.common import _stringify_path -def _try_import(): - # since pandas is a dependency of pyarrow - # we need to import on first use - try: - import pyarrow - from pyarrow import feather - except ImportError: - # give a nice error message - raise ImportError("pyarrow is not installed\n\n" - "you can install via conda\n" - "conda install pyarrow -c conda-forge\n" - "or via pip\n" - "pip install -U pyarrow\n") - - if LooseVersion(pyarrow.__version__) < LooseVersion('0.9.0'): - raise ImportError("pyarrow >= 0.9.0 required for feather support\n\n" - "you can install via conda\n" - "conda install pyarrow -c conda-forge" - "or via pip\n" - "pip install -U pyarrow\n") - - return feather, pyarrow - - def to_feather(df, path): """ Write a DataFrame to the feather-format @@ -43,11 +20,14 @@ def to_feather(df, path): path : string file path, or file-like object """ + import_optional_dependency("pyarrow") + from pyarrow import feather + path = _stringify_path(path) + if not isinstance(df, DataFrame): raise ValueError("feather only support IO with DataFrames") - feather = _try_import()[0] valid_types = {'string', 'unicode'} # validate index @@ -110,8 +90,9 @@ def read_feather(path, columns=None, use_threads=True): ------- type of object stored in file """ + pyarrow = import_optional_dependency("pyarrow") + from pyarrow import feather - feather, pyarrow = _try_import() path = _stringify_path(path) if LooseVersion(pyarrow.__version__) < LooseVersion('0.11.0'): diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 1e80677e1a597..0d9b5fe4314a3 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -14,6 +14,7 @@ from pandas._config import get_option +from pandas.compat._optional import import_optional_dependency from pandas.util._decorators import Appender from pandas.core.dtypes.common import is_float, is_string_like @@ -25,14 +26,9 @@ from pandas.core.generic import _shared_docs from pandas.core.indexing import _maybe_numeric_slice, _non_reducing_slice -try: - from jinja2 import ( - PackageLoader, Environment, ChoiceLoader, FileSystemLoader - ) -except ImportError: - raise ImportError("pandas.Styler requires jinja2. " - "Please install with `conda install Jinja2`\n" - "or `pip install Jinja2`") +jinja2 = import_optional_dependency( + "jinja2", extra="DataFrame.style requires jinja2." +) try: @@ -75,7 +71,7 @@ class Styler: Attributes ---------- - env : Jinja2 Environment + env : Jinja2 jinja2.Environment template : Jinja2 Template loader : Jinja2 Loader @@ -112,8 +108,8 @@ class Styler: * Blank cells include ``blank`` * Data cells include ``data`` """ - loader = PackageLoader("pandas", "io/formats/templates") - env = Environment( + loader = jinja2.PackageLoader("pandas", "io/formats/templates") + env = jinja2.Environment( loader=loader, trim_blocks=True, ) @@ -1231,13 +1227,13 @@ def from_custom_template(cls, searchpath, name): MyStyler : subclass of Styler Has the correct ``env`` and ``template`` class attributes set. """ - loader = ChoiceLoader([ - FileSystemLoader(searchpath), + loader = jinja2.ChoiceLoader([ + jinja2.FileSystemLoader(searchpath), cls.loader, ]) class MyStyler(cls): - env = Environment(loader=loader) + env = jinja2.Environment(loader=loader) template = env.get_template(name) return MyStyler diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 3a6d538b5e616..a9eff003f2249 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -1,23 +1,18 @@ """ Google BigQuery support """ +from pandas.compat._optional import import_optional_dependency def _try_import(): # since pandas is a dependency of pandas-gbq # we need to import on first use - try: - import pandas_gbq - except ImportError: - - # give a nice error message - raise ImportError("Load data from Google BigQuery\n" - "\n" - "the pandas-gbq package is not installed\n" - "see the docs: https://pandas-gbq.readthedocs.io\n" - "\n" - "you can install via pip or conda:\n" - "pip install pandas-gbq\n" - "conda install pandas-gbq -c conda-forge\n") - + msg = ( + "pandas-gbq is required to load data from Google BigQuery. " + "See the docs: https://pandas-gbq.readthedocs.io." + ) + pandas_gbq = import_optional_dependency( + "pandas_gbq", + extra=msg, + ) return pandas_gbq diff --git a/pandas/io/gcs.py b/pandas/io/gcs.py index 89dade27ad543..310a1f9f398e9 100644 --- a/pandas/io/gcs.py +++ b/pandas/io/gcs.py @@ -1,8 +1,10 @@ """ GCS support for remote file interactivity """ -try: - import gcsfs -except ImportError: - raise ImportError("The gcsfs library is required to handle GCS files") +from pandas.compat._optional import import_optional_dependency + +gcsfs = import_optional_dependency( + "gcsfs", + extra="The gcsfs library is required to handle GCS files" +) def get_filepath_or_buffer(filepath_or_buffer, encoding=None, diff --git a/pandas/io/html.py b/pandas/io/html.py index cbdc513cfbbe3..2e2327a35f2c7 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -10,6 +10,7 @@ import re from pandas.compat import raise_with_traceback +from pandas.compat._optional import import_optional_dependency from pandas.errors import AbstractMethodError, EmptyDataError from pandas.core.dtypes.common import is_list_like @@ -35,24 +36,17 @@ def _importers(): return global _HAS_BS4, _HAS_LXML, _HAS_HTML5LIB + bs4 = import_optional_dependency("bs4", raise_on_missing=False, + on_version="ignore") + _HAS_BS4 = bs4 is not None - try: - import bs4 # noqa - _HAS_BS4 = True - except ImportError: - pass - - try: - import lxml # noqa - _HAS_LXML = True - except ImportError: - pass - - try: - import html5lib # noqa - _HAS_HTML5LIB = True - except ImportError: - pass + lxml = import_optional_dependency("lxml", raise_on_missing=False, + on_version="ignore") + _HAS_LXML = lxml is not None + + html5lib = import_optional_dependency("html5lib", raise_on_missing=False, + on_version="ignore") + _HAS_HTML5LIB = html5lib is not None _IMPORTS = True diff --git a/pandas/io/packers.py b/pandas/io/packers.py index ead0fbd263ebf..e3d45548e4978 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -41,12 +41,12 @@ from datetime import date, datetime, timedelta from io import BytesIO import os -from textwrap import dedent import warnings from dateutil.parser import parse import numpy as np +from pandas.compat._optional import import_optional_dependency from pandas.errors import PerformanceWarning from pandas.util._move import ( BadMove as _BadMove, move_into_mutable_buffer as _move_into_mutable_buffer) @@ -69,47 +69,6 @@ from pandas.io.common import _stringify_path, get_filepath_or_buffer from pandas.io.msgpack import ExtType, Packer as _Packer, Unpacker as _Unpacker -# check which compression libs we have installed -try: - import zlib - - def _check_zlib(): - pass -except ImportError: - def _check_zlib(): - raise ImportError('zlib is not installed') - -_check_zlib.__doc__ = dedent( - """\ - Check if zlib is installed. - - Raises - ------ - ImportError - Raised when zlib is not installed. - """, -) - -try: - import blosc - - def _check_blosc(): - pass -except ImportError: - def _check_blosc(): - raise ImportError('blosc is not installed') - -_check_blosc.__doc__ = dedent( - """\ - Check if blosc is installed. - - Raises - ------ - ImportError - Raised when blosc is not installed. - """, -) - # until we can pass this into our conversion functions, # this is pretty hacky compressor = None @@ -274,7 +233,10 @@ def convert(values): v = values.ravel() if compressor == 'zlib': - _check_zlib() + zlib = import_optional_dependency( + "zlib", + extra="zlib is required when `compress='zlib'`." + ) # return string arrays like they are if dtype == np.object_: @@ -285,7 +247,10 @@ def convert(values): return ExtType(0, zlib.compress(v)) elif compressor == 'blosc': - _check_blosc() + blosc = import_optional_dependency( + "blosc", + extra="zlib is required when `compress='blosc'`." + ) # return string arrays like they are if dtype == np.object_: @@ -319,10 +284,16 @@ def unconvert(values, dtype, compress=None): if compress: if compress == 'zlib': - _check_zlib() + zlib = import_optional_dependency( + "zlib", + extra="zlib is required when `compress='zlib'`." + ) decompress = zlib.decompress elif compress == 'blosc': - _check_blosc() + blosc = import_optional_dependency( + "blosc", + extra="zlib is required when `compress='blosc'`." + ) decompress = blosc.decompress else: raise ValueError("compress must be one of 'zlib' or 'blosc'") diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 6f3d70836af47..9a846d1c7845c 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -1,8 +1,8 @@ """ parquet compat """ -from distutils.version import LooseVersion from warnings import catch_warnings +from pandas.compat._optional import import_optional_dependency from pandas.errors import AbstractMethodError from pandas import DataFrame, get_option @@ -75,28 +75,11 @@ def read(self, path, columns=None, **kwargs): class PyArrowImpl(BaseImpl): def __init__(self): - # since pandas is a dependency of pyarrow - # we need to import on first use - try: - import pyarrow - import pyarrow.parquet - except ImportError: - raise ImportError( - "pyarrow is required for parquet support\n\n" - "you can install via conda\n" - "conda install pyarrow -c conda-forge\n" - "\nor via pip\n" - "pip install -U pyarrow\n" - ) - if LooseVersion(pyarrow.__version__) < '0.9.0': - raise ImportError( - "pyarrow >= 0.9.0 is required for parquet support\n\n" - "you can install via conda\n" - "conda install pyarrow -c conda-forge\n" - "\nor via pip\n" - "pip install -U pyarrow\n" - ) - + pyarrow = import_optional_dependency( + "pyarrow", + extra="pyarrow is required for parquet support." + ) + import pyarrow.parquet self.api = pyarrow def write(self, df, path, compression='snappy', @@ -140,25 +123,10 @@ class FastParquetImpl(BaseImpl): def __init__(self): # since pandas is a dependency of fastparquet # we need to import on first use - try: - import fastparquet - except ImportError: - raise ImportError( - "fastparquet is required for parquet support\n\n" - "you can install via conda\n" - "conda install fastparquet -c conda-forge\n" - "\nor via pip\n" - "pip install -U fastparquet" - ) - if LooseVersion(fastparquet.__version__) < '0.2.1': - raise ImportError( - "fastparquet >= 0.2.1 is required for parquet " - "support\n\n" - "you can install via conda\n" - "conda install fastparquet -c conda-forge\n" - "\nor via pip\n" - "pip install -U fastparquet" - ) + fastparquet = import_optional_dependency( + "fastparquet", + extra="fastparquet is required for parquet support." + ) self.api = fastparquet def write(self, df, path, compression='snappy', index=None, diff --git a/pandas/io/s3.py b/pandas/io/s3.py index 607eae27021c3..61fd984789f78 100644 --- a/pandas/io/s3.py +++ b/pandas/io/s3.py @@ -1,12 +1,13 @@ """ s3 support for remote file interactivity """ -try: - import s3fs - from botocore.exceptions import NoCredentialsError -except ImportError: - raise ImportError("The s3fs library is required to handle s3 files") - from urllib.parse import urlparse as parse_url +from pandas.compat._optional import import_optional_dependency + +s3fs = import_optional_dependency( + "s3fs", + extra="The s3fs package is required to handle s3 files." +) + def _strip_schema(url): """Returns the url without the s3:// part""" @@ -16,6 +17,7 @@ def _strip_schema(url): def get_filepath_or_buffer(filepath_or_buffer, encoding=None, compression=None, mode=None): + from botocore.exceptions import NoCredentialsError if mode is None: mode = 'rb' diff --git a/pandas/tests/computation/test_compat.py b/pandas/tests/computation/test_compat.py index 7cc373d06cfe1..3b01851bd39ca 100644 --- a/pandas/tests/computation/test_compat.py +++ b/pandas/tests/computation/test_compat.py @@ -2,8 +2,9 @@ import pytest +from pandas.compat._optional import VERSIONS + import pandas as pd -from pandas.core.computation.check import _MIN_NUMEXPR_VERSION from pandas.core.computation.engines import _engines import pandas.core.computation.expr as expr @@ -15,7 +16,7 @@ def test_compat(): try: import numexpr as ne ver = ne.__version__ - if LooseVersion(ver) < LooseVersion(_MIN_NUMEXPR_VERSION): + if LooseVersion(ver) < LooseVersion(VERSIONS['numexpr']): assert not _NUMEXPR_INSTALLED else: assert _NUMEXPR_INSTALLED @@ -38,7 +39,7 @@ def testit(): pytest.skip("no numexpr") else: if (LooseVersion(ne.__version__) < - LooseVersion(_MIN_NUMEXPR_VERSION)): + LooseVersion(VERSIONS['numexpr'])): with pytest.raises(ImportError): testit() else: diff --git a/pandas/tests/test_optional_dependency.py b/pandas/tests/test_optional_dependency.py new file mode 100644 index 0000000000000..3916bedb8e44b --- /dev/null +++ b/pandas/tests/test_optional_dependency.py @@ -0,0 +1,52 @@ +import sys +import types + +import pytest + +from pandas.compat._optional import VERSIONS, import_optional_dependency + +import pandas.util.testing as tm + + +def test_import_optional(): + match = "Missing .*notapackage.* pip .* conda .* notapackage" + with pytest.raises(ImportError, match=match): + import_optional_dependency("notapackage") + + result = import_optional_dependency("notapackage", raise_on_missing=False) + assert result is None + + +def test_xlrd_version_fallback(): + pytest.importorskip('xlrd') + import_optional_dependency("xlrd") + + +def test_bad_version(): + name = 'fakemodule' + module = types.ModuleType(name) + module.__version__ = "0.9.0" + sys.modules[name] = module + VERSIONS[name] = '1.0.0' + + match = "Pandas requires .*1.0.0.* of .fakemodule.*'0.9.0'" + with pytest.raises(ImportError, match=match): + import_optional_dependency("fakemodule") + + with tm.assert_produces_warning(UserWarning): + result = import_optional_dependency("fakemodule", on_version="warn") + assert result is None + + module.__version__ = "1.0.0" # exact match is OK + result = import_optional_dependency("fakemodule") + assert result is module + + +def test_no_version_raises(): + name = 'fakemodule' + module = types.ModuleType(name) + sys.modules[name] = module + VERSIONS[name] = '1.0.0' + + with pytest.raises(ImportError, match="Can't determine .* fakemodule"): + import_optional_dependency(name)