From 2ba878839ccf6d317e0c9003026bf2a6bfcd479a Mon Sep 17 00:00:00 2001 From: ichuang Date: Mon, 20 Oct 2014 20:52:36 -0400 Subject: [PATCH 01/10] ENH allow bigquery connector to optionally use gcloud credentials gbq fix allowing credentials file path to be specified ENH: allow bigquery connector to optionally use gcloud credentials --- pandas/io/gbq.py | 57 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 46 insertions(+), 11 deletions(-) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 20c1e9f591081..00d5039dc7402 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -38,6 +38,7 @@ from oauth2client.client import OAuth2WebServerFlow from oauth2client.client import AccessTokenRefreshError from oauth2client.client import flow_from_clientsecrets + from oauth2client.client import Credentials from oauth2client.file import Storage from oauth2client.tools import run _GOOGLE_API_CLIENT_INSTALLED=True @@ -72,6 +73,13 @@ logger = logging.getLogger('pandas.io.gbq') logger.setLevel(logging.ERROR) +class MissingOauthCredentials(PandasError, IOError): + """ + Raised when Google BigQuery authentication credentials + file is missing, but was needed. + """ + pass + class InvalidPageToken(PandasError, IOError): """ Raised when Google BigQuery fails to return, @@ -119,11 +127,12 @@ class InvalidColumnOrder(PandasError, IOError): pass class GbqConnector: - def __init__(self, project_id, reauth=False): - self.project_id = project_id - self.reauth = reauth - self.credentials = self.get_credentials() - self.service = self.get_service(self.credentials) + def __init__(self, project_id, reauth=False, gcloud_credentials=None): + self.project_id = project_id + self.reauth = reauth + self.gcloud_credentials = gcloud_credentials + self.credentials = self.get_credentials() + self.service = self.get_service(self.credentials) def get_credentials(self): flow = OAuth2WebServerFlow(client_id='495642085510-k0tmvj2m941jhre2nbqka17vqpjfddtd.apps.googleusercontent.com', @@ -131,8 +140,21 @@ def get_credentials(self): scope='https://www.googleapis.com/auth/bigquery', redirect_uri='urn:ietf:wg:oauth:2.0:oob') - storage = Storage('bigquery_credentials.dat') - credentials = storage.get() + gcfp = self.gcloud_credentials # a bit of mangling since this is dual-typed, str or bool + if self.gcloud_credentials == True: + gcfp = '' + + if self.gcloud_credentials is not None: + import json + import os + credfn = os.path.expanduser(gcfp or '~/.config/gcloud/credentials') + if not os.path.exists(credfn): + raise MissingOauthCredentials("Required google cloud authentication credentials file {0} missing.".format(credfn)) + gcloud_cred = json.loads(open(credfn).read())['data'][0]['credential'] + credentials = Credentials.new_from_json(json.dumps(gcloud_cred)) + else: + storage = Storage('bigquery_credentials.dat') + credentials = storage.get() if credentials is None or credentials.invalid or self.reauth: credentials = run(flow, storage) @@ -328,7 +350,8 @@ def _test_imports(): if not _HTTPLIB2_INSTALLED: raise ImportError("pandas requires httplib2 for Google BigQuery support") -def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=False): +def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=False, + gcloud_credentials=None): """Load data from Google BigQuery. THIS IS AN EXPERIMENTAL LIBRARY @@ -353,6 +376,12 @@ def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=Fa reauth : boolean (default False) Force Google BigQuery to reauthenticate the user. This is useful if multiple accounts are used. + gcloud_credentials: boolean or str (default None) + Use oauth2 credentials from gcloud auth login. This is useful + if pandas is being run in an ipython notebook, and the user + has pre-existing authentication tokens. + Set to True to use the default path, ~/.config/gcloud/credentials. + Else provide an explicit path to file to use for credentials. Returns ------- @@ -366,7 +395,7 @@ def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=Fa if not project_id: raise TypeError("Missing required parameter: project_id") - connector = GbqConnector(project_id, reauth = reauth) + connector = GbqConnector(project_id, reauth = reauth, gcloud_credentials = gcloud_credentials) schema, pages = connector.run_query(query) dataframe_list = [] while len(pages) > 0: @@ -401,7 +430,7 @@ def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=Fa return final_df def to_gbq(dataframe, destination_table, project_id=None, chunksize=10000, - verbose=True, reauth=False): + verbose=True, reauth=False, gcloud_credentials=None): """Write a DataFrame to a Google BigQuery table. THIS IS AN EXPERIMENTAL LIBRARY @@ -430,6 +459,12 @@ def to_gbq(dataframe, destination_table, project_id=None, chunksize=10000, reauth : boolean (default False) Force Google BigQuery to reauthenticate the user. This is useful if multiple accounts are used. + gcloud_credentials: boolean or str (default None) + Use oauth2 credentials from gcloud auth login. This is useful + if pandas is being run in an ipython notebook, and the user + has pre-existing authentication tokens. + Set to True to use the default path, ~/.config/gcloud/credentials. + Else provide an explicit path to file to use for credentials. """ _test_imports() @@ -440,7 +475,7 @@ def to_gbq(dataframe, destination_table, project_id=None, chunksize=10000, if not '.' in destination_table: raise NotFoundException("Invalid Table Name. Should be of the form 'datasetId.tableId' ") - connector = GbqConnector(project_id, reauth = reauth) + connector = GbqConnector(project_id, reauth = reauth, gcloud_credentials = gcloud_credentials) dataset_id, table_id = destination_table.rsplit('.',1) connector.load_data(dataframe, dataset_id, table_id, chunksize, verbose) From 6c44bbca5049d885173be79b99ec30bc4b818529 Mon Sep 17 00:00:00 2001 From: ichuang Date: Mon, 20 Oct 2014 23:05:28 -0400 Subject: [PATCH 02/10] move gcfp into if clause --- pandas/io/gbq.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 00d5039dc7402..3b64aa7e20d83 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -140,14 +140,14 @@ def get_credentials(self): scope='https://www.googleapis.com/auth/bigquery', redirect_uri='urn:ietf:wg:oauth:2.0:oob') - gcfp = self.gcloud_credentials # a bit of mangling since this is dual-typed, str or bool - if self.gcloud_credentials == True: - gcfp = '' if self.gcloud_credentials is not None: import json import os - credfn = os.path.expanduser(gcfp or '~/.config/gcloud/credentials') + gcfp = self.gcloud_credentials # a bit of mangling since this is dual-typed, str or bool + if self.gcloud_credentials == True: + gcfp = '~/.config/gcloud/credentials' + credfn = os.path.expanduser(gcfp) if not os.path.exists(credfn): raise MissingOauthCredentials("Required google cloud authentication credentials file {0} missing.".format(credfn)) gcloud_cred = json.loads(open(credfn).read())['data'][0]['credential'] From 36f38764db4e3d9d1a70d264122013cc680195d5 Mon Sep 17 00:00:00 2001 From: ichuang Date: Wed, 22 Oct 2014 21:50:47 -0400 Subject: [PATCH 03/10] formatting for proper sphinx rendering --- pandas/io/gbq.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 3b64aa7e20d83..b989a4f0bc53b 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -376,7 +376,7 @@ def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=Fa reauth : boolean (default False) Force Google BigQuery to reauthenticate the user. This is useful if multiple accounts are used. - gcloud_credentials: boolean or str (default None) + gcloud_credentials : boolean or str (default None) Use oauth2 credentials from gcloud auth login. This is useful if pandas is being run in an ipython notebook, and the user has pre-existing authentication tokens. @@ -459,7 +459,7 @@ def to_gbq(dataframe, destination_table, project_id=None, chunksize=10000, reauth : boolean (default False) Force Google BigQuery to reauthenticate the user. This is useful if multiple accounts are used. - gcloud_credentials: boolean or str (default None) + gcloud_credentials : boolean or str (default None) Use oauth2 credentials from gcloud auth login. This is useful if pandas is being run in an ipython notebook, and the user has pre-existing authentication tokens. From 42f4b482e52bf7e8571ba0650f5fe450abcd6f8e Mon Sep 17 00:00:00 2001 From: Sean Schaefer Date: Mon, 10 Nov 2014 20:42:13 -0700 Subject: [PATCH 04/10] Add unit tests for gcloud authentication code. --- pandas/io/gbq.py | 1 + pandas/io/tests/data/gcloud_credentials | 62 +++++++++++++++++++++++++ pandas/io/tests/test_gbq.py | 44 ++++++++++++++++++ 3 files changed, 107 insertions(+) create mode 100644 pandas/io/tests/data/gcloud_credentials diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index b989a4f0bc53b..c813e05611f64 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -152,6 +152,7 @@ def get_credentials(self): raise MissingOauthCredentials("Required google cloud authentication credentials file {0} missing.".format(credfn)) gcloud_cred = json.loads(open(credfn).read())['data'][0]['credential'] credentials = Credentials.new_from_json(json.dumps(gcloud_cred)) + return credentials else: storage = Storage('bigquery_credentials.dat') credentials = storage.get() diff --git a/pandas/io/tests/data/gcloud_credentials b/pandas/io/tests/data/gcloud_credentials new file mode 100644 index 0000000000000..d0bd0381048a8 --- /dev/null +++ b/pandas/io/tests/data/gcloud_credentials @@ -0,0 +1,62 @@ +{ + "data": [ + { + "credential": { + "_class": "OAuth2Credentials", + "_module": "oauth2client.client", + "access_token": "ya29.xXx", + "client_id": "1112223456.apps.googleusercontent.com", + "client_secret": "aBc467", + "id_token": { + "at_hash": "OlAp__aV", + "aud": "1112223456.apps.googleusercontent.com", + "azp": "1112223456.apps.googleusercontent.com", + "cid": "1112223456.apps.googleusercontent.com", + "email": "luv-python-pandas@somemail.com", + "email_verified": true, + "exp": 1414558238, + "iat": 1414554338, + "id": "113403125016275849302", + "iss": "accounts.google.com", + "sub": "113403125016229475663", + "token_hash": "OlAp__aV", + "verified_email": true + }, + "invalid": @INVALID@, + "refresh_token": "1/asf87bbEGsb78", + "revoke_uri": "https://accounts.google.com/o/oauth2/revoke", + "token_expiry": "2014-10-29T04:50:38Z", + "token_response": { + "access_token": "ya29.bYsadfiU8542B5", + "expires_in": 3600, + "id_token": { + "at_hash": "OlAp__aV", + "aud": "11112233456.apps.googleusercontent.com", + "azp": "11112223456.apps.googleusercontent.com", + "cid": "11112223456.apps.googleusercontent.com", + "email": "luv-python-pandas@somemail.com", + "email_verified": true, + "exp": 1414558238, + "iat": 1414554338, + "id": "11340312501621345098732", + "iss": "accounts.google.com", + "sub": "1134031250162435660892", + "token_hash": "OlAp__aV", + "verified_email": true + }, + "refresh_token": "1/6v6asdf6NrR92", + "token_type": "Bearer" + }, + "token_uri": "https://accounts.google.com/o/oauth2/token", + "user_agent": "Cloud SDK Command Line Tool" + }, + "key": { + "account": "luv-python-pandas@somemail.com", + "clientId": "11112223456.apps.googleusercontent.com", + "scope": "https://www.googleapis.com/auth/appengine.admin https://www.googleapis.com/auth/bigquery https://www.googleapis.com/auth/compute https://www.googleapis.com/auth/devstorage.full_control https://www.googleapis.com/auth/userinfo.email https://www.googleapis.com/auth/ndev.cloudman https://www.googleapis.com/auth/cloud-platform https://www.googleapis.com/auth/sqlservice.admin https://www.googleapis.com/auth/prediction https://www.googleapis.com/auth/projecthosting", + "type": "google-cloud-sdk" + } + } + ], + "file_version": 1 +} diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py index 0f595f75bc66f..4761bb05ea3b2 100644 --- a/pandas/io/tests/test_gbq.py +++ b/pandas/io/tests/test_gbq.py @@ -9,6 +9,7 @@ import sys import platform from time import sleep +from tempfile import NamedTemporaryFile import numpy as np @@ -36,6 +37,27 @@ def test_requirements(): raise nose.SkipTest(import_exception) class TestGBQConnectorIntegration(tm.TestCase): + + @classmethod + def setUpClass(cls): + with open(os.path.join(tm.get_data_path(), 'gcloud_credentials'), 'r') as fin: + creds_json = fin.read() + + creds_json_invalid = creds_json.replace('@INVALID@', '"true"') + creds_json_valid = creds_json.replace('@INVALID@', '"false"') + + cls.creds_file_valid = NamedTemporaryFile() + cls.creds_file_valid.write(creds_json_valid.encode('UTF-8')) + cls.creds_file_valid.flush() + + cls.creds_file_invalid = NamedTemporaryFile() + cls.creds_file_invalid.write(creds_json_invalid.encode('UTF-8')) + cls.creds_file_invalid.flush() + + cls.non_creds_file = NamedTemporaryFile() + cls.non_creds_file.write('{"token": "50414e444153204556455259574845524521"}'.encode('UTF-8')) + cls.non_creds_file.flush() + def setUp(self): test_requirements() @@ -64,6 +86,28 @@ def test_should_be_able_to_get_results_from_query(self): schema, pages = self.sut.run_query('SELECT 1') self.assertTrue(pages is not None) + def test_should_raise_exception_with_invalid_gcloud_creds_path(self): + with tm.assertRaises(gbq.MissingOauthCredentials): + gbq.GbqConnector(PROJECT_ID, gcloud_credentials='missing_file') + + def test_should_fail_with_invalid_gcloud_credentials(self): + credentials = gbq.GbqConnector(PROJECT_ID, gcloud_credentials=self.creds_file_invalid.name).credentials + self.assertEqual(credentials.invalid, "true") + + def test_should_be_able_to_get_valid_gcloud_credentials(self): + credentials = gbq.GbqConnector(PROJECT_ID, gcloud_credentials=self.creds_file_valid.name).credentials + self.assertEqual(credentials.invalid, "false") + + def test_should_fail_if_gcloud_credentials_incorrectly_formatted(self): + with tm.assertRaises(KeyError): + gbq.GbqConnector(PROJECT_ID, gcloud_credentials=self.non_creds_file.name) + + @classmethod + def tearDownClass(cls): + cls.creds_file_valid.close() + cls.creds_file_invalid.close() + cls.non_creds_file.close() + class TestReadGBQUnitTests(tm.TestCase): def setUp(self): test_requirements() From b0ace12b469fe7626e4aa6cc9acf88cf898889cf Mon Sep 17 00:00:00 2001 From: ichuang Date: Sat, 15 Nov 2014 14:05:05 -0500 Subject: [PATCH 05/10] move import os to top for pep8 --- pandas/io/gbq.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index c813e05611f64..215e6827dae75 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -1,4 +1,5 @@ from datetime import datetime +import os import json import logging import sys @@ -142,8 +143,6 @@ def get_credentials(self): if self.gcloud_credentials is not None: - import json - import os gcfp = self.gcloud_credentials # a bit of mangling since this is dual-typed, str or bool if self.gcloud_credentials == True: gcfp = '~/.config/gcloud/credentials' From b023a569b0e192e9211cfe3be532b7c0ba54c4ac Mon Sep 17 00:00:00 2001 From: Sean Schaefer Date: Tue, 18 Nov 2014 22:48:29 -0700 Subject: [PATCH 06/10] Change LooseVersion check for Google Python Client from 1.2.0 to 1.2 --- pandas/io/gbq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index c813e05611f64..2d8f8393fc299 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -44,7 +44,7 @@ _GOOGLE_API_CLIENT_INSTALLED=True _GOOGLE_API_CLIENT_VERSION = pkg_resources.get_distribution('google-api-python-client').version - if LooseVersion(_GOOGLE_API_CLIENT_VERSION) >= '1.2.0': + if LooseVersion(_GOOGLE_API_CLIENT_VERSION) >= '1.2': _GOOGLE_API_CLIENT_VALID_VERSION = True except ImportError: From 4e0b0ef8f15bd4cae737fba8cc44378b255e7639 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Thu, 15 Jan 2015 15:09:06 -0800 Subject: [PATCH 07/10] BUG: fix common.is_hashable for NumPy scalars on Python 3 Fixes GH9276 This now relies entirely on the result of calling ``hash`` on the argument. Note: I had to change the test for old style classes on Python 2 -- these are now considered hashable by `is_hashable`, because they don't fail `hash`. CC aevri --- pandas/core/common.py | 12 ++++++------ pandas/tests/test_common.py | 7 ++----- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index f8f5928ca7d51..d2c0406d87310 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -2537,13 +2537,13 @@ def is_hashable(arg): >>> is_hashable(a) False """ - # don't consider anything not collections.Hashable, so as not to broaden - # the definition of hashable beyond that. For example, old-style classes - # are not collections.Hashable but they won't fail hash(). - if not isinstance(arg, collections.Hashable): - return False + # unfortunately, we can't use isinstance(arg, collections.Hashable), which + # can be faster than calling hash, because numpy scalars on Python 3 fail + # this test + + # reconsider this decision once this numpy bug is fixed: + # https://github.com/numpy/numpy/issues/5562 - # narrow the definition of hashable if hash(arg) fails in practice try: hash(arg) except TypeError: diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 36d6c39586d97..3d232878fb15d 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -424,7 +424,7 @@ def __hash__(self): raise TypeError("Not hashable") hashable = ( - 1, 'a', tuple(), (1,), HashableClass(), + 1, 3.14, np.float64(3.14), 'a', tuple(), (1,), HashableClass(), ) not_hashable = ( [], UnhashableClass1(), @@ -434,13 +434,10 @@ def __hash__(self): ) for i in hashable: - assert isinstance(i, collections.Hashable) assert com.is_hashable(i) for i in not_hashable: - assert not isinstance(i, collections.Hashable) assert not com.is_hashable(i) for i in abc_hashable_not_really_hashable: - assert isinstance(i, collections.Hashable) assert not com.is_hashable(i) # numpy.array is no longer collections.Hashable as of @@ -455,7 +452,7 @@ class OldStyleClass(): pass c = OldStyleClass() assert not isinstance(c, collections.Hashable) - assert not com.is_hashable(c) + assert com.is_hashable(c) hash(c) # this will not raise From d4c4c3776804d1ed524fd97cff2809b3587669fa Mon Sep 17 00:00:00 2001 From: ichuang Date: Mon, 20 Oct 2014 20:52:36 -0400 Subject: [PATCH 08/10] ENH allow bigquery connector to optionally use gcloud credentials. --- doc/source/io.rst | 11 ++++- pandas/io/gbq.py | 58 ++++++++++++++++++----- pandas/io/tests/data/gcloud_credentials | 62 +++++++++++++++++++++++++ pandas/io/tests/test_gbq.py | 44 ++++++++++++++++++ 4 files changed, 162 insertions(+), 13 deletions(-) create mode 100644 pandas/io/tests/data/gcloud_credentials diff --git a/doc/source/io.rst b/doc/source/io.rst index f8fe6fc8a4c3a..ed43b8d731260 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -3643,9 +3643,18 @@ You will then be authenticated to the specified BigQuery account via Google's Oauth2 mechanism. In general, this is as simple as following the prompts in a browser window which will be opened for you. Should the browser not be available, or fail to launch, a code will be provided to complete the process -manually. Additional information on the authentication mechanism can be found +manually. Additional information on this authentication mechanism can be found `here `__ +Alternatively, you can use a headless authentication mechanism via the Google Cloud SDK. More +information on installing the SDK and authenticating is available `here `__ + +Once you have your authentication credentials setup, you can use this approach by including the gcloud_credentials parameter. It will accept either a boolean True (in which case it uses the SDK's default credentials path), or string filepath to the credentials file: + +.. code-block:: python + + data_frame = pd.read_gbq('SELECT * FROM test_dataset.test_table', project_id = projectid, gcloud_credentials = True) + You can define which column from BigQuery to use as an index in the destination DataFrame as well as a preferred column order as follows: diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 572a8be5c65e8..81329b695d3ed 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -1,4 +1,5 @@ from datetime import datetime +import os import json import logging import sys @@ -38,12 +39,13 @@ from oauth2client.client import OAuth2WebServerFlow from oauth2client.client import AccessTokenRefreshError from oauth2client.client import flow_from_clientsecrets + from oauth2client.client import Credentials from oauth2client.file import Storage from oauth2client.tools import run _GOOGLE_API_CLIENT_INSTALLED=True _GOOGLE_API_CLIENT_VERSION = pkg_resources.get_distribution('google-api-python-client').version - if LooseVersion(_GOOGLE_API_CLIENT_VERSION) >= '1.2.0': + if LooseVersion(_GOOGLE_API_CLIENT_VERSION) >= '1.2': _GOOGLE_API_CLIENT_VALID_VERSION = True except ImportError: @@ -72,6 +74,13 @@ logger = logging.getLogger('pandas.io.gbq') logger.setLevel(logging.ERROR) +class MissingOauthCredentials(PandasError, IOError): + """ + Raised when Google BigQuery authentication credentials + file is missing, but was needed. + """ + pass + class InvalidPageToken(PandasError, IOError): """ Raised when Google BigQuery fails to return, @@ -119,11 +128,12 @@ class InvalidColumnOrder(PandasError, IOError): pass class GbqConnector: - def __init__(self, project_id, reauth=False): - self.project_id = project_id - self.reauth = reauth - self.credentials = self.get_credentials() - self.service = self.get_service(self.credentials) + def __init__(self, project_id, reauth=False, gcloud_credentials=None): + self.project_id = project_id + self.reauth = reauth + self.gcloud_credentials = gcloud_credentials + self.credentials = self.get_credentials() + self.service = self.get_service(self.credentials) def get_credentials(self): flow = OAuth2WebServerFlow(client_id='495642085510-k0tmvj2m941jhre2nbqka17vqpjfddtd.apps.googleusercontent.com', @@ -131,8 +141,19 @@ def get_credentials(self): scope='https://www.googleapis.com/auth/bigquery', redirect_uri='urn:ietf:wg:oauth:2.0:oob') - storage = Storage('bigquery_credentials.dat') - credentials = storage.get() + if self.gcloud_credentials is not None: + gcfp = self.gcloud_credentials # a bit of mangling since this is dual-typed, str or bool + if self.gcloud_credentials == True: + gcfp = '~/.config/gcloud/credentials' + credfn = os.path.expanduser(gcfp) + if not os.path.exists(credfn): + raise MissingOauthCredentials("Required google cloud authentication credentials file {0} missing.".format(credfn)) + gcloud_cred = json.loads(open(credfn).read())['data'][0]['credential'] + credentials = Credentials.new_from_json(json.dumps(gcloud_cred)) + return credentials + else: + storage = Storage('bigquery_credentials.dat') + credentials = storage.get() if credentials is None or credentials.invalid or self.reauth: credentials = run(flow, storage) @@ -328,7 +349,8 @@ def _test_imports(): if not _HTTPLIB2_INSTALLED: raise ImportError("pandas requires httplib2 for Google BigQuery support") -def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=False): +def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=False, + gcloud_credentials=None): """Load data from Google BigQuery. THIS IS AN EXPERIMENTAL LIBRARY @@ -353,6 +375,12 @@ def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=Fa reauth : boolean (default False) Force Google BigQuery to reauthenticate the user. This is useful if multiple accounts are used. + gcloud_credentials : boolean or str (default None) + Use oauth2 credentials from gcloud auth login. This is useful + if pandas is being run in an ipython notebook, and the user + has pre-existing authentication tokens. + Set to True to use the default path, ~/.config/gcloud/credentials. + Else provide an explicit path to file to use for credentials. Returns ------- @@ -366,7 +394,7 @@ def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=Fa if not project_id: raise TypeError("Missing required parameter: project_id") - connector = GbqConnector(project_id, reauth = reauth) + connector = GbqConnector(project_id, reauth = reauth, gcloud_credentials = gcloud_credentials) schema, pages = connector.run_query(query) dataframe_list = [] while len(pages) > 0: @@ -401,7 +429,7 @@ def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=Fa return final_df def to_gbq(dataframe, destination_table, project_id=None, chunksize=10000, - verbose=True, reauth=False): + verbose=True, reauth=False, gcloud_credentials=None): """Write a DataFrame to a Google BigQuery table. THIS IS AN EXPERIMENTAL LIBRARY @@ -430,6 +458,12 @@ def to_gbq(dataframe, destination_table, project_id=None, chunksize=10000, reauth : boolean (default False) Force Google BigQuery to reauthenticate the user. This is useful if multiple accounts are used. + gcloud_credentials : boolean or str (default None) + Use oauth2 credentials from gcloud auth login. This is useful + if pandas is being run in an ipython notebook, and the user + has pre-existing authentication tokens. + Set to True to use the default path, ~/.config/gcloud/credentials. + Else provide an explicit path to file to use for credentials. """ _test_imports() @@ -440,7 +474,7 @@ def to_gbq(dataframe, destination_table, project_id=None, chunksize=10000, if not '.' in destination_table: raise NotFoundException("Invalid Table Name. Should be of the form 'datasetId.tableId' ") - connector = GbqConnector(project_id, reauth = reauth) + connector = GbqConnector(project_id, reauth = reauth, gcloud_credentials = gcloud_credentials) dataset_id, table_id = destination_table.rsplit('.',1) connector.load_data(dataframe, dataset_id, table_id, chunksize, verbose) diff --git a/pandas/io/tests/data/gcloud_credentials b/pandas/io/tests/data/gcloud_credentials new file mode 100644 index 0000000000000..d0bd0381048a8 --- /dev/null +++ b/pandas/io/tests/data/gcloud_credentials @@ -0,0 +1,62 @@ +{ + "data": [ + { + "credential": { + "_class": "OAuth2Credentials", + "_module": "oauth2client.client", + "access_token": "ya29.xXx", + "client_id": "1112223456.apps.googleusercontent.com", + "client_secret": "aBc467", + "id_token": { + "at_hash": "OlAp__aV", + "aud": "1112223456.apps.googleusercontent.com", + "azp": "1112223456.apps.googleusercontent.com", + "cid": "1112223456.apps.googleusercontent.com", + "email": "luv-python-pandas@somemail.com", + "email_verified": true, + "exp": 1414558238, + "iat": 1414554338, + "id": "113403125016275849302", + "iss": "accounts.google.com", + "sub": "113403125016229475663", + "token_hash": "OlAp__aV", + "verified_email": true + }, + "invalid": @INVALID@, + "refresh_token": "1/asf87bbEGsb78", + "revoke_uri": "https://accounts.google.com/o/oauth2/revoke", + "token_expiry": "2014-10-29T04:50:38Z", + "token_response": { + "access_token": "ya29.bYsadfiU8542B5", + "expires_in": 3600, + "id_token": { + "at_hash": "OlAp__aV", + "aud": "11112233456.apps.googleusercontent.com", + "azp": "11112223456.apps.googleusercontent.com", + "cid": "11112223456.apps.googleusercontent.com", + "email": "luv-python-pandas@somemail.com", + "email_verified": true, + "exp": 1414558238, + "iat": 1414554338, + "id": "11340312501621345098732", + "iss": "accounts.google.com", + "sub": "1134031250162435660892", + "token_hash": "OlAp__aV", + "verified_email": true + }, + "refresh_token": "1/6v6asdf6NrR92", + "token_type": "Bearer" + }, + "token_uri": "https://accounts.google.com/o/oauth2/token", + "user_agent": "Cloud SDK Command Line Tool" + }, + "key": { + "account": "luv-python-pandas@somemail.com", + "clientId": "11112223456.apps.googleusercontent.com", + "scope": "https://www.googleapis.com/auth/appengine.admin https://www.googleapis.com/auth/bigquery https://www.googleapis.com/auth/compute https://www.googleapis.com/auth/devstorage.full_control https://www.googleapis.com/auth/userinfo.email https://www.googleapis.com/auth/ndev.cloudman https://www.googleapis.com/auth/cloud-platform https://www.googleapis.com/auth/sqlservice.admin https://www.googleapis.com/auth/prediction https://www.googleapis.com/auth/projecthosting", + "type": "google-cloud-sdk" + } + } + ], + "file_version": 1 +} diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py index 2f79cc8ba1826..97e4d76a2eb20 100644 --- a/pandas/io/tests/test_gbq.py +++ b/pandas/io/tests/test_gbq.py @@ -9,6 +9,7 @@ import sys import platform from time import sleep +from tempfile import NamedTemporaryFile import numpy as np @@ -36,6 +37,27 @@ def test_requirements(): raise nose.SkipTest(import_exception) class TestGBQConnectorIntegration(tm.TestCase): + + @classmethod + def setUpClass(cls): + with open(os.path.join(tm.get_data_path(), 'gcloud_credentials'), 'r') as fin: + creds_json = fin.read() + + creds_json_invalid = creds_json.replace('@INVALID@', '"true"') + creds_json_valid = creds_json.replace('@INVALID@', '"false"') + + cls.creds_file_valid = NamedTemporaryFile() + cls.creds_file_valid.write(creds_json_valid.encode('UTF-8')) + cls.creds_file_valid.flush() + + cls.creds_file_invalid = NamedTemporaryFile() + cls.creds_file_invalid.write(creds_json_invalid.encode('UTF-8')) + cls.creds_file_invalid.flush() + + cls.non_creds_file = NamedTemporaryFile() + cls.non_creds_file.write('{"token": "50414e444153204556455259574845524521"}'.encode('UTF-8')) + cls.non_creds_file.flush() + def setUp(self): test_requirements() @@ -64,6 +86,28 @@ def test_should_be_able_to_get_results_from_query(self): schema, pages = self.sut.run_query('SELECT 1') self.assertTrue(pages is not None) + def test_should_raise_exception_with_invalid_gcloud_creds_path(self): + with tm.assertRaises(gbq.MissingOauthCredentials): + gbq.GbqConnector(PROJECT_ID, gcloud_credentials='missing_file') + + def test_should_fail_with_invalid_gcloud_credentials(self): + credentials = gbq.GbqConnector(PROJECT_ID, gcloud_credentials=self.creds_file_invalid.name).credentials + self.assertEqual(credentials.invalid, "true") + + def test_should_be_able_to_get_valid_gcloud_credentials(self): + credentials = gbq.GbqConnector(PROJECT_ID, gcloud_credentials=self.creds_file_valid.name).credentials + self.assertEqual(credentials.invalid, "false") + + def test_should_fail_if_gcloud_credentials_incorrectly_formatted(self): + with tm.assertRaises(KeyError): + gbq.GbqConnector(PROJECT_ID, gcloud_credentials=self.non_creds_file.name) + + @classmethod + def tearDownClass(cls): + cls.creds_file_valid.close() + cls.creds_file_invalid.close() + cls.non_creds_file.close() + class TestReadGBQUnitTests(tm.TestCase): def setUp(self): test_requirements() From 2a9f77e11b4f1108ea7da8af9868cba2d9340817 Mon Sep 17 00:00:00 2001 From: Sean Schaefer Date: Mon, 23 Feb 2015 22:41:55 -0700 Subject: [PATCH 09/10] Add exception printing for import error. --- pandas/io/gbq.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 81329b695d3ed..9cc7d5ac6ff15 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -5,6 +5,7 @@ import sys from time import sleep import uuid +import traceback import numpy as np @@ -49,6 +50,7 @@ _GOOGLE_API_CLIENT_VALID_VERSION = True except ImportError: + traceback.format_exc() _GOOGLE_API_CLIENT_INSTALLED = False From b31574a9ec6bccee427ed08d5b5b995cc6753439 Mon Sep 17 00:00:00 2001 From: Sean Schaefer Date: Mon, 23 Feb 2015 22:51:55 -0700 Subject: [PATCH 10/10] Adding space to run travis builds again... --- pandas/io/gbq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 9cc7d5ac6ff15..f921c027513e2 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -35,7 +35,7 @@ try: from apiclient.discovery import build from apiclient.http import MediaFileUpload - from apiclient.errors import HttpError + from apiclient.errors import HttpError from oauth2client.client import OAuth2WebServerFlow from oauth2client.client import AccessTokenRefreshError