From 55764ea667b14ba858fc8eed14bfe4d8ecc296df Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 16 Oct 2015 16:15:04 -0400 Subject: [PATCH] BUG: Issue in the gbq module when authenticating on remote servers #8489 --- doc/source/io.rst | 24 ++++++++++++----- doc/source/whatsnew/v0.17.1.txt | 3 +++ pandas/io/gbq.py | 47 +++++++++++++++++++++++++++------ pandas/io/tests/test_gbq.py | 35 ++++++++++++++++++------ 4 files changed, 86 insertions(+), 23 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 014daa3f68dbb..e04d2ae569d23 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4059,6 +4059,7 @@ The key functions are: .. autosummary:: :toctree: generated/ + authorize read_gbq to_gbq @@ -4066,6 +4067,22 @@ The key functions are: .. _io.bigquery_reader: +Authorization +''''''''''''' + +Authorization is required in order to use the BigQuery API. You must call the +:func:`~pandas.io.gbq.authorize` function to start the authorization process. In general, +this is as simple as following the prompts in a browser. A code will be provided to complete +the process. A credentials file will be saved to disk so that you only need to authorize once +as long as the credentials have not been revoked. Additional information on the authentication +can be found `here `__. + +To begin the authorization process, use the :func:`~pandas.io.gbq.authorize` function + +.. code-block:: python + + gbq.authorize() + Querying '''''''' @@ -4080,13 +4097,6 @@ into a DataFrame using the :func:`~pandas.io.gbq.read_gbq` function. data_frame = pd.read_gbq('SELECT * FROM test_dataset.test_table', projectid) -You will then be authenticated to the specified BigQuery account -via Google's Oauth2 mechanism. In general, this is as simple as following the -prompts in a browser window which will be opened for you. Should the browser not -be available, or fail to launch, a code will be provided to complete the process -manually. Additional information on the authentication mechanism can be found -`here `__. - You can define which column from BigQuery to use as an index in the destination DataFrame as well as a preferred column order as follows: diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt index 6d4b61bb97f22..7657b74a3bb78 100755 --- a/doc/source/whatsnew/v0.17.1.txt +++ b/doc/source/whatsnew/v0.17.1.txt @@ -23,6 +23,8 @@ Enhancements .. _whatsnew_0171.enhancements.other: - Improve the error message in :func:`pandas.io.gbq.to_gbq` when a streaming insert fails (:issue:`11285`) +- Added :func:`pandas.io.gbq.authorize` to allow users to authenticate with Google BigQuery. + See the :ref:`docs ` for more details (:issue:`11141`). Other Enhancements ^^^^^^^^^^^^^^^^^^ @@ -97,3 +99,4 @@ Bug Fixes - Fixed a bug that prevented the construction of an empty series of dtype ``datetime64[ns, tz]`` (:issue:`11245`). - Bug in ``DataFrame.to_dict()`` produces a ``np.datetime64`` object instead of ``Timestamp`` when only datetime is present in data (:issue:`11327`) +- Resolve the issue where authentication on remote servers fails silently when using the gbq module. (:issue:`11141`) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index e7241036b94c4..d68466316586b 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -15,6 +15,8 @@ from pandas.util.decorators import deprecate from pandas.compat import lzip, bytes_to_str +CREDENTIALS_FILE = 'bigquery_credentials.dat' + def _check_google_client_version(): try: @@ -109,7 +111,7 @@ class TableCreationError(PandasError, ValueError): class GbqConnector(object): - def __init__(self, project_id, reauth=False): + def __init__(self, project_id=None, reauth=False): self.test_google_api_imports() self.project_id = project_id self.reauth = reauth @@ -128,23 +130,44 @@ def test_google_api_imports(self): except ImportError as e: raise ImportError("Missing module required for Google BigQuery support: {0}".format(str(e))) - def get_credentials(self): + def authorize(self): from oauth2client.client import OAuth2WebServerFlow from oauth2client.file import Storage - from oauth2client.tools import run_flow, argparser _check_google_client_version() + storage = Storage(CREDENTIALS_FILE) flow = OAuth2WebServerFlow(client_id='495642085510-k0tmvj2m941jhre2nbqka17vqpjfddtd.apps.googleusercontent.com', client_secret='kOc9wMptUtxkcIFbtZCcrEAc', scope='https://www.googleapis.com/auth/bigquery', redirect_uri='urn:ietf:wg:oauth:2.0:oob') + print('Please visit the following url to obtain an authorization code: {0}'.format(flow.step1_get_authorize_url())) + + authorization_prompt_message = 'Enter authorization code and press enter: ' + + if compat.PY3: + code = eval(input(authorization_prompt_message)) + else: + code = raw_input(authorization_prompt_message) - storage = Storage('bigquery_credentials.dat') + code.strip() + storage.put(flow.step2_exchange(code)) credentials = storage.get() - if credentials is None or credentials.invalid or self.reauth: - credentials = run_flow(flow, storage, argparser.parse_args([])) + return credentials + + def get_credentials(self): + from oauth2client.file import Storage + + _check_google_client_version() + + credentials = Storage(CREDENTIALS_FILE).get() + + if self.reauth: + credentials = self.authorize() + + if credentials is None or credentials.invalid: + raise AccessDenied("The credentials are missing or invalid. Please run gbq.authorize().") return credentials @@ -215,8 +238,8 @@ def run_query(self, query, verbose=True): try: query_reply = job_collection.insert(projectId=self.project_id, body=job_data).execute() except AccessTokenRefreshError: - raise AccessDenied("The credentials have been revoked or expired, please re-run the application " - "to re-authorize") + raise AccessDenied("The credentials have been revoked or expired, please run gbq.authorize() " + "to re-authorize.") except HttpError as ex: self.process_http_error(ex) @@ -518,6 +541,12 @@ def to_gbq(dataframe, destination_table, project_id, chunksize=10000, connector.load_data(dataframe, dataset_id, table_id, chunksize, verbose) +def authorize(): + """ Allows users to create the credentials file required for BigQuery authorization """ + + GbqConnector(reauth=True) + + def generate_bq_schema(df, default_type='STRING'): # deprecation TimeSeries, #11121 @@ -526,6 +555,7 @@ def generate_bq_schema(df, default_type='STRING'): return _generate_bq_schema(df, default_type=default_type) + def _generate_bq_schema(df, default_type='STRING'): """ Given a passed df, generate the associated Google BigQuery schema. @@ -554,6 +584,7 @@ def _generate_bq_schema(df, default_type='STRING'): return {'fields': fields} + class _Table(GbqConnector): def __init__(self, project_id, dataset_id, reauth=False): diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py index cc1e901d8f119..f3df2bfa0959b 100644 --- a/pandas/io/tests/test_gbq.py +++ b/pandas/io/tests/test_gbq.py @@ -29,7 +29,7 @@ _SETUPTOOLS_INSTALLED = False -def _test_imports(): +def validate_imports(): global _GOOGLE_API_CLIENT_INSTALLED, _GOOGLE_API_CLIENT_VALID_VERSION, \ _HTTPLIB2_INSTALLED, _SETUPTOOLS_INSTALLED @@ -83,13 +83,22 @@ def _test_imports(): raise ImportError("pandas requires httplib2 for Google BigQuery support") -def test_requirements(): +def validate_requirements(): try: - _test_imports() + validate_imports() except (ImportError, NotImplementedError) as import_exception: raise nose.SkipTest(import_exception) +def validate_authorization(): + try: + gbq.GbqConnector(PROJECT_ID) + except gbq.AccessDenied: + gbq.authorize() + except ImportError as import_exception: + raise nose.SkipTest(import_exception) + + def clean_gbq_environment(): dataset = gbq._Dataset(PROJECT_ID) @@ -126,12 +135,20 @@ def test_generate_bq_schema_deprecated(): gbq.generate_bq_schema(df) class TestGBQConnectorIntegration(tm.TestCase): - def setUp(self): - test_requirements() + + @classmethod + def setUpClass(cls): + # - GLOBAL CLASS FIXTURES - + # put here any instruction you want to execute only *ONCE* *BEFORE* executing *ALL* tests + # described below. if not PROJECT_ID: raise nose.SkipTest("Cannot run integration tests without a project id") + validate_requirements() + validate_authorization() + + def setUp(self): self.sut = gbq.GbqConnector(PROJECT_ID) def test_should_be_able_to_make_a_connector(self): @@ -157,7 +174,7 @@ def test_should_be_able_to_get_results_from_query(self): class TestReadGBQUnitTests(tm.TestCase): def setUp(self): - test_requirements() + validate_requirements() def test_should_return_bigquery_integers_as_python_floats(self): result = gbq._parse_entry(1, 'INTEGER') @@ -201,6 +218,7 @@ def test_that_parse_data_works_properly(self): class TestReadGBQIntegration(tm.TestCase): + @classmethod def setUpClass(cls): # - GLOBAL CLASS FIXTURES - @@ -210,7 +228,7 @@ def setUpClass(cls): if not PROJECT_ID: raise nose.SkipTest("Cannot run integration tests without a project id") - test_requirements() + validate_requirements() def setUp(self): # - PER-TEST FIXTURES - @@ -373,7 +391,8 @@ def setUpClass(cls): if not PROJECT_ID: raise nose.SkipTest("Cannot run integration tests without a project id") - test_requirements() + validate_requirements() + validate_authorization() clean_gbq_environment() gbq._Dataset(PROJECT_ID).create(DATASET_ID + "1")