From 6717aa06dcaa1950ffb46fef454f5df9404209bd Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 13 Feb 2015 17:51:09 -0500 Subject: [PATCH 1/3] import msgpack deps only as needed (GH9482) --- pandas/io/packers.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 7da86565b51cd..a8ad8c058a2b4 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -61,18 +61,30 @@ import pandas.core.internals as internals from pandas.msgpack import Unpacker as _Unpacker, Packer as _Packer -import zlib - -try: - import blosc - _BLOSC = True -except: - _BLOSC = False # until we can pass this into our conversion functions, # this is pretty hacky compressor = None +_IMPORTS = False +_BLOSC = False + +def _importers(): + # import things we need + # but make this done on a first use basis + + global _IMPORTS + if _IMPORTS: + return + + _IMPORTS = True + global _BLOSC + import zlib + try: + import blosc + _BLOSC = True + except: + pass def to_msgpack(path_or_buf, *args, **kwargs): """ @@ -91,6 +103,7 @@ def to_msgpack(path_or_buf, *args, **kwargs): compress : type of compressor (zlib or blosc), default to None (no compression) """ + _importers() global compressor compressor = kwargs.pop('compress', None) append = kwargs.pop('append', None) @@ -133,6 +146,7 @@ def read_msgpack(path_or_buf, iterator=False, **kwargs): obj : type of object stored in file """ + _importers() path_or_buf, _ = get_filepath_or_buffer(path_or_buf) if iterator: return Iterator(path_or_buf) From be1bbfe5dae5365b5a702d894e06203f76317548 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 13 Feb 2015 18:01:22 -0500 Subject: [PATCH 2/3] make gbq imports on-demand --- pandas/io/gbq.py | 96 ++++++++++++++++++++++++++++-------------------- 1 file changed, 57 insertions(+), 39 deletions(-) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 572a8be5c65e8..225adaf6803a6 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -13,7 +13,7 @@ from pandas.tools.merge import concat from pandas.core.common import PandasError - +_IMPORTS = False _GOOGLE_API_CLIENT_INSTALLED = False _GOOGLE_API_CLIENT_VALID_VERSION = False _GOOGLE_FLAGS_INSTALLED = False @@ -21,52 +21,66 @@ _HTTPLIB2_INSTALLED = False _SETUPTOOLS_INSTALLED = False -if not compat.PY3: +def _importers(): + # import things we need + # but make this done on a first use basis - try: - import pkg_resources - _SETUPTOOLS_INSTALLED = True - except ImportError: - _SETUPTOOLS_INSTALLED = False - - if _SETUPTOOLS_INSTALLED: - try: - from apiclient.discovery import build - from apiclient.http import MediaFileUpload - from apiclient.errors import HttpError + global _IMPORTS + if _IMPORTS: + return - from oauth2client.client import OAuth2WebServerFlow - from oauth2client.client import AccessTokenRefreshError - from oauth2client.client import flow_from_clientsecrets - from oauth2client.file import Storage - from oauth2client.tools import run - _GOOGLE_API_CLIENT_INSTALLED=True - _GOOGLE_API_CLIENT_VERSION = pkg_resources.get_distribution('google-api-python-client').version + _IMPORTS = True - if LooseVersion(_GOOGLE_API_CLIENT_VERSION) >= '1.2.0': - _GOOGLE_API_CLIENT_VALID_VERSION = True + if not compat.PY3: + global _GOOGLE_API_CLIENT_INSTALLED, _GOOGLE_API_CLIENT_VALID_VERSION, \ + _GOOGLE_FLAGS_INSTALLED, _GOOGLE_FLAGS_VALID_VERSION, \ + _HTTPLIB2_INSTALLED, _SETUPTOOLS_INSTALLED + + try: + import pkg_resources + _SETUPTOOLS_INSTALLED = True except ImportError: - _GOOGLE_API_CLIENT_INSTALLED = False + _SETUPTOOLS_INSTALLED = False + if _SETUPTOOLS_INSTALLED: + try: + from apiclient.discovery import build + from apiclient.http import MediaFileUpload + from apiclient.errors import HttpError - try: - import gflags as flags - _GOOGLE_FLAGS_INSTALLED = True + from oauth2client.client import OAuth2WebServerFlow + from oauth2client.client import AccessTokenRefreshError + from oauth2client.client import flow_from_clientsecrets + from oauth2client.file import Storage + from oauth2client.tools import run + _GOOGLE_API_CLIENT_INSTALLED=True + _GOOGLE_API_CLIENT_VERSION = pkg_resources.get_distribution('google-api-python-client').version - _GOOGLE_FLAGS_VERSION = pkg_resources.get_distribution('python-gflags').version + if LooseVersion(_GOOGLE_API_CLIENT_VERSION) >= '1.2.0': + _GOOGLE_API_CLIENT_VALID_VERSION = True - if LooseVersion(_GOOGLE_FLAGS_VERSION) >= '2.0': - _GOOGLE_FLAGS_VALID_VERSION = True + except ImportError: + _GOOGLE_API_CLIENT_INSTALLED = False - except ImportError: - _GOOGLE_FLAGS_INSTALLED = False - try: - import httplib2 - _HTTPLIB2_INSTALLED = True - except ImportError: - _HTTPLIB2_INSTALLED = False + try: + import gflags as flags + _GOOGLE_FLAGS_INSTALLED = True + + _GOOGLE_FLAGS_VERSION = pkg_resources.get_distribution('python-gflags').version + + if LooseVersion(_GOOGLE_FLAGS_VERSION) >= '2.0': + _GOOGLE_FLAGS_VALID_VERSION = True + + except ImportError: + _GOOGLE_FLAGS_INSTALLED = False + + try: + import httplib2 + _HTTPLIB2_INSTALLED = True + except ImportError: + _HTTPLIB2_INSTALLED = False logger = logging.getLogger('pandas.io.gbq') @@ -118,8 +132,10 @@ class InvalidColumnOrder(PandasError, IOError): """ pass -class GbqConnector: +class GbqConnector(object): + def __init__(self, project_id, reauth=False): + self.project_id = project_id self.reauth = reauth self.credentials = self.get_credentials() @@ -298,6 +314,8 @@ def _parse_entry(field_value, field_type): return field_value def _test_imports(): + + _importers() _GOOGLE_API_CLIENT_INSTALLED _GOOGLE_API_CLIENT_VALID_VERSION _GOOGLE_FLAGS_INSTALLED @@ -410,8 +428,8 @@ def to_gbq(dataframe, destination_table, project_id=None, chunksize=10000, the defined table schema and column types. For simplicity, this method uses the Google BigQuery streaming API. The to_gbq method chunks data into a default chunk size of 10,000. Failures return the complete error - response which can be quite long depending on the size of the insert. - There are several important limitations of the Google streaming API + response which can be quite long depending on the size of the insert. + There are several important limitations of the Google streaming API which are detailed at: https://developers.google.com/bigquery/streaming-data-into-bigquery. From cbb6fe8ca3754ecdbe3d1a9b99d175cce53a4917 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 13 Feb 2015 18:08:56 -0500 Subject: [PATCH 3/3] import io/html.py as needed --- pandas/io/html.py | 49 ++++++++++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/pandas/io/html.py b/pandas/io/html.py index 13318203bec67..9f5c10ce128d2 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -20,29 +20,40 @@ from pandas.core import common as com from pandas import Series +_IMPORTS = False +_HAS_BS4 = False +_HAS_LXML = False +_HAS_HTML5LIB = False -try: - import bs4 -except ImportError: - _HAS_BS4 = False -else: - _HAS_BS4 = True +def _importers(): + # import things we need + # but make this done on a first use basis + global _IMPORTS + if _IMPORTS: + return -try: - import lxml -except ImportError: - _HAS_LXML = False -else: - _HAS_LXML = True + _IMPORTS = True + global _HAS_BS4, _HAS_LXML, _HAS_HTML5LIB -try: - import html5lib -except ImportError: - _HAS_HTML5LIB = False -else: - _HAS_HTML5LIB = True + try: + import bs4 + _HAS_BS4 = True + except ImportError: + pass + + try: + import lxml + _HAS_LXML = True + except ImportError: + pass + + try: + import html5lib + _HAS_HTML5LIB = True + except ImportError: + pass ############# @@ -651,6 +662,7 @@ def _parser_dispatch(flavor): raise ImportError("html5lib not found, please install it") if not _HAS_BS4: raise ImportError("BeautifulSoup4 (bs4) not found, please install it") + import bs4 if bs4.__version__ == LooseVersion('4.2.0'): raise ValueError("You're using a version" " of BeautifulSoup4 (4.2.0) that has been" @@ -839,6 +851,7 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None, -------- pandas.read_csv """ + _importers() if infer_types is not None: warnings.warn("infer_types has no effect since 0.15", FutureWarning)