diff --git a/doc/source/io.rst b/doc/source/io.rst index 7917e6b4cdfce..477e1a7d5026a 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4475,6 +4475,15 @@ Additional information on service accounts can be found You will need to install an additional dependency: `oauth2client `__. +Authentication via ``application default credentials`` is also possible. This is only valid +if the parameter ``private_key`` is not provided. This method also requires that +the credentials can be fetched from the environment the code is running in. +Otherwise, the OAuth2 client-side authentication is used. +Additional information on +`application default credentials `__. + +.. versionadded:: 0.19.0 + .. note:: The `'private_key'` parameter can be set to either the file path of the service account key diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index f93e8f4240787..fa1b21116cb4f 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -361,6 +361,8 @@ Google BigQuery Enhancements Other enhancements ^^^^^^^^^^^^^^^^^^ +- The ``.get_credentials()`` method of ``GbqConnector`` can now first try to fetch [the application default credentials](https://developers.google.com/identity/protocols/application-default-credentials). See the :ref:`docs ` for more details (:issue:`13577`). + - The ``.tz_localize()`` method of ``DatetimeIndex`` and ``Timestamp`` has gained the ``errors`` keyword, so you can potentially coerce nonexistent timestamps to ``NaT``. The default behaviour remains to raising a ``NonExistentTimeError`` (:issue:`13057`) - ``pd.to_numeric()`` now accepts a ``downcast`` parameter, which will downcast the data if possible to smallest specified numerical dtype (:issue:`13352`) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 6288fdb609962..326e32c84ebe6 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -160,7 +160,60 @@ def get_credentials(self): if self.private_key: return self.get_service_account_credentials() else: - return self.get_user_account_credentials() + # Try to retrieve Application Default Credentials + credentials = self.get_application_default_credentials() + if not credentials: + credentials = self.get_user_account_credentials() + return credentials + + def get_application_default_credentials(self): + """ + This method tries to retrieve the "default application credentials". + This could be useful for running code on Google Cloud Platform. + + .. versionadded:: 0.19.0 + + Parameters + ---------- + None + + Returns + ------- + - GoogleCredentials, + If the default application credentials can be retrieved + from the environment. The retrieved credentials should also + have access to the project (self.project_id) on BigQuery. + - OR None, + If default application credentials can not be retrieved + from the environment. Or, the retrieved credentials do not + have access to the project (self.project_id) on BigQuery. + """ + import httplib2 + try: + from googleapiclient.discovery import build + except ImportError: + from apiclient.discovery import build + try: + from oauth2client.client import GoogleCredentials + except ImportError: + return None + + try: + credentials = GoogleCredentials.get_application_default() + except: + return None + + http = httplib2.Http() + try: + http = credentials.authorize(http) + bigquery_service = build('bigquery', 'v2', http=http) + # Check if the application has rights to the BigQuery project + jobs = bigquery_service.jobs() + job_data = {'configuration': {'query': {'query': 'SELECT 1'}}} + jobs.insert(projectId=self.project_id, body=job_data).execute() + return credentials + except: + return None def get_user_account_credentials(self): from oauth2client.client import OAuth2WebServerFlow @@ -578,10 +631,16 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, https://developers.google.com/api-client-library/python/apis/bigquery/v2 Authentication to the Google BigQuery service is via OAuth 2.0. - By default user account credentials are used. You will be asked to - grant permissions for product name 'pandas GBQ'. It is also posible - to authenticate via service account credentials by using - private_key parameter. + - If "private_key" is not provided: + By default "application default credentials" are used. + + .. versionadded:: 0.19.0 + + If default application credentials are not found or are restrictive, + user account credentials are used. In this case, you will be asked to + grant permissions for product name 'pandas GBQ'. + - If "private_key" is provided: + Service account credentials will be used to authenticate. Parameters ---------- @@ -689,10 +748,16 @@ def to_gbq(dataframe, destination_table, project_id, chunksize=10000, https://developers.google.com/api-client-library/python/apis/bigquery/v2 Authentication to the Google BigQuery service is via OAuth 2.0. - By default user account credentials are used. You will be asked to - grant permissions for product name 'pandas GBQ'. It is also posible - to authenticate via service account credentials by using - private_key parameter. + - If "private_key" is not provided: + By default "application default credentials" are used. + + .. versionadded:: 0.19.0 + + If default application credentials are not found or are restrictive, + user account credentials are used. In this case, you will be asked to + grant permissions for product name 'pandas GBQ'. + - If "private_key" is provided: + Service account credentials will be used to authenticate. Parameters ---------- diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py index 0d8512ffb5524..4b71192c907f8 100644 --- a/pandas/io/tests/test_gbq.py +++ b/pandas/io/tests/test_gbq.py @@ -151,6 +151,30 @@ def test_requirements(): raise nose.SkipTest(import_exception) +def _check_if_can_get_correct_default_credentials(): + # Checks if "Application Default Credentials" can be fetched + # from the environment the tests are running in. + # See Issue #13577 + test_requirements() + import httplib2 + try: + from googleapiclient.discovery import build + except ImportError: + from apiclient.discovery import build + try: + from oauth2client.client import GoogleCredentials + credentials = GoogleCredentials.get_application_default() + http = httplib2.Http() + http = credentials.authorize(http) + bigquery_service = build('bigquery', 'v2', http=http) + jobs = bigquery_service.jobs() + job_data = {'configuration': {'query': {'query': 'SELECT 1'}}} + jobs.insert(projectId=PROJECT_ID, body=job_data).execute() + return True + except: + return False + + def clean_gbq_environment(private_key=None): dataset = gbq._Dataset(PROJECT_ID, private_key=private_key) @@ -217,6 +241,21 @@ def test_should_be_able_to_get_results_from_query(self): schema, pages = self.sut.run_query('SELECT 1') self.assertTrue(pages is not None) + def test_get_application_default_credentials_does_not_throw_error(self): + if _check_if_can_get_correct_default_credentials(): + raise nose.SkipTest("Can get default_credentials " + "from the environment!") + credentials = self.sut.get_application_default_credentials() + self.assertIsNone(credentials) + + def test_get_application_default_credentials_returns_credentials(self): + if not _check_if_can_get_correct_default_credentials(): + raise nose.SkipTest("Cannot get default_credentials " + "from the environment!") + from oauth2client.client import GoogleCredentials + credentials = self.sut.get_application_default_credentials() + self.assertTrue(isinstance(credentials, GoogleCredentials)) + class TestGBQConnectorServiceAccountKeyPathIntegration(tm.TestCase): def setUp(self):