pandas-dev · WillAyd · Jun 12, 2019 · Jun 7, 2019 · Jun 12, 2019 · Jun 12, 2019
diff --git a/doc/source/development/contributing.rst b/doc/source/development/contributing.rst
@@ -499,6 +499,21 @@ as possible to avoid mass breakages.
 Additional standards are outlined on the `code style wiki
 page <https://github.com/pandas-dev/pandas/wiki/Code-Style-and-Conventions>`_.
 
+Optional dependencies
+---------------------
+
+Optional dependencies (e.g. matplotlib) should be imported with the private helper
+``pandas.compat._optional.import_optional_dependency``. This ensures a
+consistent error message when the dependency is not met.
+
+All methods using an optional dependency should include a test asserting that an
+``ImportError`` is raised when the optional dependency is not found. This test
+should be skipped if the library is present.
+
+All optional dependencies should be documented in
+:ref:`install.optional_dependencies` and the minimum required version should be
+set in the ``pandas.compat._optional.VERSIONS`` dict.
+
 C (cpplint)
 ~~~~~~~~~~~
 

diff --git a/doc/source/install.rst b/doc/source/install.rst
@@ -252,87 +252,69 @@ Recommended Dependencies
 Optional Dependencies
 ~~~~~~~~~~~~~~~~~~~~~
 
-* `Cython <http://www.cython.org>`__: Only necessary to build development
-  version. Version 0.28.2 or higher.
-* `SciPy <http://www.scipy.org>`__: miscellaneous statistical functions, Version 0.19.0 or higher
-* `xarray <http://xarray.pydata.org>`__: pandas like handling for > 2 dims. Version 0.8.2 or higher is recommended.
-* `PyTables <http://www.pytables.org>`__: necessary for HDF5-based storage, Version 3.4.2 or higher
-* `pyarrow <http://arrow.apache.org/docs/python/>`__ (>= 0.9.0): necessary for feather-based storage.
-* `Apache Parquet <https://parquet.apache.org/>`__, either `pyarrow <http://arrow.apache.org/docs/python/>`__ (>= 0.9.0) or `fastparquet <https://fastparquet.readthedocs.io/en/latest>`__ (>= 0.2.1) for parquet-based storage. The `snappy <https://pypi.org/project/python-snappy>`__ and `brotli <https://pypi.org/project/brotlipy>`__ are available for compression support.
-* `SQLAlchemy <http://www.sqlalchemy.org>`__: for SQL database support. Version 1.1.4 or higher recommended. Besides SQLAlchemy, you also need a database specific driver. You can find an overview of supported drivers for each SQL dialect in the `SQLAlchemy docs <http://docs.sqlalchemy.org/en/latest/dialects/index.html>`__. Some common drivers are:
-
-    * `psycopg2 <http://initd.org/psycopg/>`__: for PostgreSQL
-    * `pymysql <https://github.com/PyMySQL/PyMySQL>`__: for MySQL.
-    * `SQLite <https://docs.python.org/3/library/sqlite3.html>`__: for SQLite, this is included in Python's standard library by default.
-
-* `matplotlib <http://matplotlib.org/>`__: for plotting, Version 2.2.2 or higher.
-* For Excel I/O:
-
-    * `xlrd/xlwt <http://www.python-excel.org/>`__: Excel reading (xlrd), version 1.0.0 or higher required, and writing (xlwt)
-    * `openpyxl <https://openpyxl.readthedocs.io/en/stable/>`__: openpyxl version 2.4.0
-      for writing .xlsx files (xlrd >= 1.0.0)
-    * `XlsxWriter <https://pypi.org/project/XlsxWriter>`__: Alternative Excel writer
-
-* `Jinja2 <http://jinja.pocoo.org/>`__: Template engine for conditional HTML formatting.
-* `s3fs <http://s3fs.readthedocs.io/>`__: necessary for Amazon S3 access (s3fs >= 0.0.8).
-* `blosc <https://pypi.org/project/blosc>`__: for msgpack compression using ``blosc``
-* `gcsfs <http://gcsfs.readthedocs.io/>`__: necessary for Google Cloud Storage access (gcsfs >= 0.1.0).
-* One of
-  `qtpy  <https://github.com/spyder-ide/qtpy>`__ (requires PyQt or PySide),
-  `PyQt5 <https://www.riverbankcomputing.com/software/pyqt/download5>`__,
-  `PyQt4 <http://www.riverbankcomputing.com/software/pyqt/download>`__,
-  `xsel <http://www.vergenet.net/~conrad/software/xsel/>`__, or
-  `xclip <https://github.com/astrand/xclip/>`__: necessary to use
-  :func:`~pandas.read_clipboard`. Most package managers on Linux distributions will have ``xclip`` and/or ``xsel`` immediately available for installation.
-* `pandas-gbq
-  <https://pandas-gbq.readthedocs.io/en/latest/install.html#dependencies>`__:
-  for Google BigQuery I/O. (pandas-gbq >= 0.8.0)
-
-* One of the following combinations of libraries is needed to use the
-  top-level :func:`~pandas.read_html` function:
-
-  .. versionchanged:: 0.23.0
-
-  .. note::
-
-     If using BeautifulSoup4 a minimum version of 4.4.1 is required
-
-  * `BeautifulSoup4`_ and `html5lib`_ (Any recent version of `html5lib`_ is
-    okay.)
-  * `BeautifulSoup4`_ and `lxml`_
-  * `BeautifulSoup4`_ and `html5lib`_ and `lxml`_
-  * Only `lxml`_, although see :ref:`HTML Table Parsing <io.html.gotchas>`
-    for reasons as to why you should probably **not** take this approach.
-
-  .. warning::
-
-     * if you install `BeautifulSoup4`_ you must install either
-       `lxml`_ or `html5lib`_ or both.
-       :func:`~pandas.read_html` will **not** work with *only*
-       `BeautifulSoup4`_ installed.
-     * You are highly encouraged to read :ref:`HTML Table Parsing gotchas <io.html.gotchas>`.
-       It explains issues surrounding the installation and
-       usage of the above three libraries.
-
-  .. note::
-
-     * if you're on a system with ``apt-get`` you can do
-
-       .. code-block:: sh
-
-          sudo apt-get build-dep python-lxml
-
-       to get the necessary dependencies for installation of `lxml`_. This
-       will prevent further headaches down the line.
-
+Pandas has many optional dependencies that are only used for specific methods.
+For example, :func:`pandas.read_hdf` requires the ``pytables`` package. If the
+optional dependency is not installed, pandas will raise an ``ImportError`` when
+the method requiring that dependency is called.
+
+========================= ================== =============================================================
+Dependency                Minimum Version    Notes
+========================= ================== =============================================================
+BeautifulSoup4            4.4.1              HTML parser for read_html (see :ref:`note <optional_html>`)
+Jinja2                                       Conditional formatting with DataFrame.style
+PyQt4                                        Clipboard I/O
+PyQt5                                        Clipboard I/O
+PyTables                  3.4.2              HDF5-based reading / writing
+SQLAlchemy                1.1.4              SQL support for databases other than sqlite
+SciPy                     0.19.0             Miscellaneous statistical functions
+XLsxWriter                                   Excel writing
+blosc                                        Compression for msgpack
+fastparquet               0.2.1              Parquet reading / writing
+gcsfs                     0.1.0              Google Cloud Storage access
+html5lib                                     HTML parser for read_html (see :ref:`note <optional_html>`)
+lxml                                         HTML parser for read_html (see :ref:`note <optional_html>`)
+matplotlib                2.2.2              Visualization
+openpyxl                  2.4.0              Reading / writing for xlsx files
+pandas-gbq                0.8.0              Google Big Query access
+psycopg2                                     PostgreSQL engine for sqlalchemy
+pyarrow                   0.9.0              Parquet and feather reading / writing
+pymysql                                      MySQL engine for sqlalchemy
+qtpy                                         Clipboard I/O
+s3fs                      0.0.8              Amazon S3 access
+xarray                    0.8.2              pandas-like API for N-dimensional data
+xclip                                        Clipboard I/O on linux
+xlrd                      1.0.0              Excel reading
+xlwt                      2.4.0              Excel writing
+xsel                                         Clipboard I/O on linux
+zlib                                         Compression for msgpack
+========================= ================== =============================================================
+
+.. _optional_html:
+
+Optional Dependencies for Parsing HTML
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+One of the following combinations of libraries is needed to use the
+top-level :func:`~pandas.read_html` function:
+
+.. versionchanged:: 0.23.0
+
+* `BeautifulSoup4`_ and `html5lib`_
+* `BeautifulSoup4`_ and `lxml`_
+* `BeautifulSoup4`_ and `html5lib`_ and `lxml`_
+* Only `lxml`_, although see :ref:`HTML Table Parsing <io.html.gotchas>`
+  for reasons as to why you should probably **not** take this approach.
+
+.. warning::
+
+    * if you install `BeautifulSoup4`_ you must install either
+      `lxml`_ or `html5lib`_ or both.
+      :func:`~pandas.read_html` will **not** work with *only*
+      `BeautifulSoup4`_ installed.
+    * You are highly encouraged to read :ref:`HTML Table Parsing gotchas <io.html.gotchas>`.
+      It explains issues surrounding the installation and
+      usage of the above three libraries.
 
 .. _html5lib: https://github.com/html5lib/html5lib-python
 .. _BeautifulSoup4: http://www.crummy.com/software/BeautifulSoup
 .. _lxml: http://lxml.de
-
-.. note::
-
-   Without the optional dependencies, many useful features will not
-   work. Hence, it is highly recommended that you install these. A packaged
-   distribution like `Anaconda <http://docs.continuum.io/anaconda/>`__, `ActivePython <https://www.activestate.com/activepython/downloads>`__  (version 2.7 or 3.5), or `Enthought Canopy
-   <http://enthought.com/products/canopy>`__ may be worth considering.
diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py
@@ -0,0 +1,115 @@
+import distutils.version
+import importlib
+import types
+from typing import Optional
+import warnings
+
+# Update install.rst when updating versions!
+
+VERSIONS = {
+    "bs4": "4.4.1",
+    "bottleneck": "1.2.1",
+    "fastparquet": "0.2.1",
+    "gcsfs": "0.1.0",
+    "matplotlib": "2.2.2",
+    "numexpr": "2.6.2",
+    "openpyxl": "2.4.0",
+    "pandas_gbq": "0.8.0",
+    "pyarrow": "0.9.0",
+    "pytables": "3.4.2",
+    "s3fs": "0.0.8",
+    "scipy": "0.19.0",
+    "sqlalchemy": "1.1.4",
+    "xarray": "0.8.2",
+    "xlrd": "1.0.0",
+    "xlwt": "2.4.0",
+}
+
+message = (
+    "Missing optional dependency '{name}'. {extra} "
+    "Use pip or conda to install {name}."
+)
+version_message = (
+    "Pandas requires version '{minimum_version}' or newer of '{name}' "
+    "(version '{actual_version}' currently installed)."
+)
+
+
+def _get_version(module: types.ModuleType) -> str:
+    version = getattr(module, '__version__', None)
+    if version is None:
+        # xlrd uses a capitalized attribute name
+        version = getattr(module, '__VERSION__', None)
+
+    if version is None:
+        raise ImportError(
+            "Can't determine version for {}".format(module.__name__)
+        )
+    return version
+
+
+def import_optional_dependency(
+    name: str,
+    extra: str = "",
+    raise_on_missing: bool = True,
+    on_version: str = "raise",
+) -> Optional[types.ModuleType]:
+    """
+    Import an optional dependency.
+
+    By default, if a dependency is missing an ImportError with a nice
+    message will be raised. If a dependency is present, but too old,
+    we raise.
+
+    Parameters
+    ----------
+    name : str
+        The module name. This should be top-level only, so that the
+        version may be checked.
+    extra : str
+        Additional text to include in the ImportError message.
+    raise_on_missing : bool, default True
+        Whether to raise if the optional dependency is not found.
+        When False and the module is not present, None is returned.
+    on_version : str {'raise', 'warn'}
+        What to do when a dependency's version is too old.
+
+        * raise : Raise an ImportError
+        * warn : Warn that the version is too old. Returns None
+        * ignore: Return the module, even if the version is too old.
+          It's expected that users validate the version locally when
+          using ``on_version="ignore"`` (see. ``io/html.py``)
+
+    Returns
+    -------
+    maybe_module : Optional[ModuleType]
+        The imported module, when found and the version is correct.
+        None is returned when the package is not found and `raise_on_missing`
+        is False, or when the package's version is too old and `on_version`
+        is ``'warn'``.
+    """
+    try:
+        module = importlib.import_module(name)
+    except ImportError:
+        if raise_on_missing:
+            raise ImportError(message.format(name=name, extra=extra)) from None
+        else:
+            return None
+
+    minimum_version = VERSIONS.get(name)
+    if minimum_version:
+        version = _get_version(module)
+        if distutils.version.LooseVersion(version) < minimum_version:
+            assert on_version in {"warn", "raise", "ignore"}
+            msg = version_message.format(
+                minimum_version=minimum_version,
+                name=name,
+                actual_version=version,
+            )
+            if on_version == "warn":
+                warnings.warn(msg, UserWarning)
+                return None
+            elif on_version == "raise":
+                raise ImportError(msg)
+
+    return module
diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py
@@ -15,6 +15,7 @@
 from pandas._libs.sparse import BlockIndex, IntIndex, SparseIndex
 from pandas._libs.tslibs import NaT
 import pandas.compat as compat
+from pandas.compat._optional import import_optional_dependency
 from pandas.compat.numpy import function as nv
 from pandas.errors import PerformanceWarning
 
@@ -2205,10 +2206,8 @@ def to_coo(self):
         float32. By numpy.find_common_type convention, mixing int64 and
         and uint64 will result in a float64 dtype.
         """
-        try:
-            from scipy.sparse import coo_matrix
-        except ImportError:
-            raise ImportError('Scipy is not installed')
+        import_optional_dependency("scipy")
+        from scipy.sparse import coo_matrix
 
         dtype = find_common_type(self._parent.dtypes)
         if isinstance(dtype, SparseDtype):

diff --git a/pandas/core/computation/check.py b/pandas/core/computation/check.py
@@ -1,24 +1,11 @@
-from distutils.version import LooseVersion
-import warnings
-
-_NUMEXPR_INSTALLED = False
-_MIN_NUMEXPR_VERSION = "2.6.2"
-_NUMEXPR_VERSION = None
-
-try:
-    import numexpr as ne
-    ver = LooseVersion(ne.__version__)
-    _NUMEXPR_INSTALLED = ver >= LooseVersion(_MIN_NUMEXPR_VERSION)
-    _NUMEXPR_VERSION = ver
-
-    if not _NUMEXPR_INSTALLED:
-        warnings.warn(
-            "The installed version of numexpr {ver} is not supported "
-            "in pandas and will be not be used\nThe minimum supported "
-            "version is {min_ver}\n".format(
-                ver=ver, min_ver=_MIN_NUMEXPR_VERSION), UserWarning)
-
-except ImportError:  # pragma: no cover
-    pass
+from pandas.compat._optional import import_optional_dependency
+
+ne = import_optional_dependency("numexpr", raise_on_missing=False,
+                                on_version="warn")
+_NUMEXPR_INSTALLED = ne is not None
+if _NUMEXPR_INSTALLED:
+    _NUMEXPR_VERSION = ne.__version__
+else:
+    _NUMEXPR_VERSION = None
 
 __all__ = ['_NUMEXPR_INSTALLED', '_NUMEXPR_VERSION']
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -16,6 +16,7 @@
 
 from pandas._libs import Timestamp, iNaT, properties
 from pandas.compat import set_function_name
+from pandas.compat._optional import import_optional_dependency
 from pandas.compat.numpy import function as nv
 from pandas.errors import AbstractMethodError
 from pandas.util._decorators import (
@@ -2750,15 +2751,7 @@ class      (index) object 'bird' 'bird' 'mammal' 'mammal'
         Data variables:
             speed    (date, animal) int64 350 18 361 15
         """
-        try:
-            import xarray
-        except ImportError:
-            # Give a nice error message
-            raise ImportError("the xarray library is not installed\n"
-                              "you can install via conda\n"
-                              "conda install xarray\n"
-                              "or via pip\n"
-                              "pip install xarray\n")
+        xarray = import_optional_dependency("xarray")
 
         if self.ndim == 1:
             return xarray.DataArray.from_series(self)