Skip to content

Commit 2ba8788

Browse files
committed
ENH allow bigquery connector to optionally use gcloud credentials
gbq fix allowing credentials file path to be specified ENH: allow bigquery connector to optionally use gcloud credentials
1 parent 403f38d commit 2ba8788

File tree

1 file changed

+46
-11
lines changed

1 file changed

+46
-11
lines changed

pandas/io/gbq.py

+46-11
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
from oauth2client.client import OAuth2WebServerFlow
3939
from oauth2client.client import AccessTokenRefreshError
4040
from oauth2client.client import flow_from_clientsecrets
41+
from oauth2client.client import Credentials
4142
from oauth2client.file import Storage
4243
from oauth2client.tools import run
4344
_GOOGLE_API_CLIENT_INSTALLED=True
@@ -72,6 +73,13 @@
7273
logger = logging.getLogger('pandas.io.gbq')
7374
logger.setLevel(logging.ERROR)
7475

76+
class MissingOauthCredentials(PandasError, IOError):
77+
"""
78+
Raised when Google BigQuery authentication credentials
79+
file is missing, but was needed.
80+
"""
81+
pass
82+
7583
class InvalidPageToken(PandasError, IOError):
7684
"""
7785
Raised when Google BigQuery fails to return,
@@ -119,20 +127,34 @@ class InvalidColumnOrder(PandasError, IOError):
119127
pass
120128

121129
class GbqConnector:
122-
def __init__(self, project_id, reauth=False):
123-
self.project_id = project_id
124-
self.reauth = reauth
125-
self.credentials = self.get_credentials()
126-
self.service = self.get_service(self.credentials)
130+
def __init__(self, project_id, reauth=False, gcloud_credentials=None):
131+
self.project_id = project_id
132+
self.reauth = reauth
133+
self.gcloud_credentials = gcloud_credentials
134+
self.credentials = self.get_credentials()
135+
self.service = self.get_service(self.credentials)
127136

128137
def get_credentials(self):
129138
flow = OAuth2WebServerFlow(client_id='495642085510-k0tmvj2m941jhre2nbqka17vqpjfddtd.apps.googleusercontent.com',
130139
client_secret='kOc9wMptUtxkcIFbtZCcrEAc',
131140
scope='https://www.googleapis.com/auth/bigquery',
132141
redirect_uri='urn:ietf:wg:oauth:2.0:oob')
133142

134-
storage = Storage('bigquery_credentials.dat')
135-
credentials = storage.get()
143+
gcfp = self.gcloud_credentials # a bit of mangling since this is dual-typed, str or bool
144+
if self.gcloud_credentials == True:
145+
gcfp = ''
146+
147+
if self.gcloud_credentials is not None:
148+
import json
149+
import os
150+
credfn = os.path.expanduser(gcfp or '~/.config/gcloud/credentials')
151+
if not os.path.exists(credfn):
152+
raise MissingOauthCredentials("Required google cloud authentication credentials file {0} missing.".format(credfn))
153+
gcloud_cred = json.loads(open(credfn).read())['data'][0]['credential']
154+
credentials = Credentials.new_from_json(json.dumps(gcloud_cred))
155+
else:
156+
storage = Storage('bigquery_credentials.dat')
157+
credentials = storage.get()
136158

137159
if credentials is None or credentials.invalid or self.reauth:
138160
credentials = run(flow, storage)
@@ -328,7 +350,8 @@ def _test_imports():
328350
if not _HTTPLIB2_INSTALLED:
329351
raise ImportError("pandas requires httplib2 for Google BigQuery support")
330352

331-
def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=False):
353+
def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=False,
354+
gcloud_credentials=None):
332355
"""Load data from Google BigQuery.
333356
334357
THIS IS AN EXPERIMENTAL LIBRARY
@@ -353,6 +376,12 @@ def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=Fa
353376
reauth : boolean (default False)
354377
Force Google BigQuery to reauthenticate the user. This is useful
355378
if multiple accounts are used.
379+
gcloud_credentials: boolean or str (default None)
380+
Use oauth2 credentials from gcloud auth login. This is useful
381+
if pandas is being run in an ipython notebook, and the user
382+
has pre-existing authentication tokens.
383+
Set to True to use the default path, ~/.config/gcloud/credentials.
384+
Else provide an explicit path to file to use for credentials.
356385
357386
Returns
358387
-------
@@ -366,7 +395,7 @@ def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=Fa
366395
if not project_id:
367396
raise TypeError("Missing required parameter: project_id")
368397

369-
connector = GbqConnector(project_id, reauth = reauth)
398+
connector = GbqConnector(project_id, reauth = reauth, gcloud_credentials = gcloud_credentials)
370399
schema, pages = connector.run_query(query)
371400
dataframe_list = []
372401
while len(pages) > 0:
@@ -401,7 +430,7 @@ def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=Fa
401430
return final_df
402431

403432
def to_gbq(dataframe, destination_table, project_id=None, chunksize=10000,
404-
verbose=True, reauth=False):
433+
verbose=True, reauth=False, gcloud_credentials=None):
405434
"""Write a DataFrame to a Google BigQuery table.
406435
407436
THIS IS AN EXPERIMENTAL LIBRARY
@@ -430,6 +459,12 @@ def to_gbq(dataframe, destination_table, project_id=None, chunksize=10000,
430459
reauth : boolean (default False)
431460
Force Google BigQuery to reauthenticate the user. This is useful
432461
if multiple accounts are used.
462+
gcloud_credentials: boolean or str (default None)
463+
Use oauth2 credentials from gcloud auth login. This is useful
464+
if pandas is being run in an ipython notebook, and the user
465+
has pre-existing authentication tokens.
466+
Set to True to use the default path, ~/.config/gcloud/credentials.
467+
Else provide an explicit path to file to use for credentials.
433468
434469
"""
435470
_test_imports()
@@ -440,7 +475,7 @@ def to_gbq(dataframe, destination_table, project_id=None, chunksize=10000,
440475
if not '.' in destination_table:
441476
raise NotFoundException("Invalid Table Name. Should be of the form 'datasetId.tableId' ")
442477

443-
connector = GbqConnector(project_id, reauth = reauth)
478+
connector = GbqConnector(project_id, reauth = reauth, gcloud_credentials = gcloud_credentials)
444479
dataset_id, table_id = destination_table.rsplit('.',1)
445480

446481
connector.load_data(dataframe, dataset_id, table_id, chunksize, verbose)

0 commit comments

Comments
 (0)