Skip to content

ENH: improve bigquery connector to optionally allow use of gcloud credentials #8590

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 14 commits into from
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 46 additions & 11 deletions pandas/io/gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
from oauth2client.client import OAuth2WebServerFlow
from oauth2client.client import AccessTokenRefreshError
from oauth2client.client import flow_from_clientsecrets
from oauth2client.client import Credentials
from oauth2client.file import Storage
from oauth2client.tools import run
_GOOGLE_API_CLIENT_INSTALLED=True
Expand Down Expand Up @@ -72,6 +73,13 @@
logger = logging.getLogger('pandas.io.gbq')
logger.setLevel(logging.ERROR)

class MissingOauthCredentials(PandasError, IOError):
"""
Raised when Google BigQuery authentication credentials
file is missing, but was needed.
"""
pass

class InvalidPageToken(PandasError, IOError):
"""
Raised when Google BigQuery fails to return,
Expand Down Expand Up @@ -119,20 +127,34 @@ class InvalidColumnOrder(PandasError, IOError):
pass

class GbqConnector:
def __init__(self, project_id, reauth=False):
self.project_id = project_id
self.reauth = reauth
self.credentials = self.get_credentials()
self.service = self.get_service(self.credentials)
def __init__(self, project_id, reauth=False, gcloud_credentials=None):
self.project_id = project_id
self.reauth = reauth
self.gcloud_credentials = gcloud_credentials
self.credentials = self.get_credentials()
self.service = self.get_service(self.credentials)

def get_credentials(self):
flow = OAuth2WebServerFlow(client_id='495642085510-k0tmvj2m941jhre2nbqka17vqpjfddtd.apps.googleusercontent.com',
client_secret='kOc9wMptUtxkcIFbtZCcrEAc',
scope='https://www.googleapis.com/auth/bigquery',
redirect_uri='urn:ietf:wg:oauth:2.0:oob')

storage = Storage('bigquery_credentials.dat')
credentials = storage.get()
gcfp = self.gcloud_credentials # a bit of mangling since this is dual-typed, str or bool
if self.gcloud_credentials == True:
gcfp = ''
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

u can move this into the next section no?
then just do

if gcloud_credentials is True:
gcloud_credentials = default config path

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep, done


if self.gcloud_credentials is not None:
import json
import os
credfn = os.path.expanduser(gcfp or '~/.config/gcloud/credentials')
if not os.path.exists(credfn):
raise MissingOauthCredentials("Required google cloud authentication credentials file {0} missing.".format(credfn))
gcloud_cred = json.loads(open(credfn).read())['data'][0]['credential']
credentials = Credentials.new_from_json(json.dumps(gcloud_cred))
else:
storage = Storage('bigquery_credentials.dat')
credentials = storage.get()

if credentials is None or credentials.invalid or self.reauth:
credentials = run(flow, storage)
Expand Down Expand Up @@ -328,7 +350,8 @@ def _test_imports():
if not _HTTPLIB2_INSTALLED:
raise ImportError("pandas requires httplib2 for Google BigQuery support")

def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=False):
def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=False,
gcloud_credentials=None):
"""Load data from Google BigQuery.

THIS IS AN EXPERIMENTAL LIBRARY
Expand All @@ -353,6 +376,12 @@ def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=Fa
reauth : boolean (default False)
Force Google BigQuery to reauthenticate the user. This is useful
if multiple accounts are used.
gcloud_credentials: boolean or str (default None)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

minor comment: you need to use a space before the colon (so arg : type instead of arg: type) for sphinx to render it correctly

Use oauth2 credentials from gcloud auth login. This is useful
if pandas is being run in an ipython notebook, and the user
has pre-existing authentication tokens.
Set to True to use the default path, ~/.config/gcloud/credentials.
Else provide an explicit path to file to use for credentials.

Returns
-------
Expand All @@ -366,7 +395,7 @@ def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=Fa
if not project_id:
raise TypeError("Missing required parameter: project_id")

connector = GbqConnector(project_id, reauth = reauth)
connector = GbqConnector(project_id, reauth = reauth, gcloud_credentials = gcloud_credentials)
schema, pages = connector.run_query(query)
dataframe_list = []
while len(pages) > 0:
Expand Down Expand Up @@ -401,7 +430,7 @@ def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=Fa
return final_df

def to_gbq(dataframe, destination_table, project_id=None, chunksize=10000,
verbose=True, reauth=False):
verbose=True, reauth=False, gcloud_credentials=None):
"""Write a DataFrame to a Google BigQuery table.

THIS IS AN EXPERIMENTAL LIBRARY
Expand Down Expand Up @@ -430,6 +459,12 @@ def to_gbq(dataframe, destination_table, project_id=None, chunksize=10000,
reauth : boolean (default False)
Force Google BigQuery to reauthenticate the user. This is useful
if multiple accounts are used.
gcloud_credentials: boolean or str (default None)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here

Use oauth2 credentials from gcloud auth login. This is useful
if pandas is being run in an ipython notebook, and the user
has pre-existing authentication tokens.
Set to True to use the default path, ~/.config/gcloud/credentials.
Else provide an explicit path to file to use for credentials.

"""
_test_imports()
Expand All @@ -440,7 +475,7 @@ def to_gbq(dataframe, destination_table, project_id=None, chunksize=10000,
if not '.' in destination_table:
raise NotFoundException("Invalid Table Name. Should be of the form 'datasetId.tableId' ")

connector = GbqConnector(project_id, reauth = reauth)
connector = GbqConnector(project_id, reauth = reauth, gcloud_credentials = gcloud_credentials)
dataset_id, table_id = destination_table.rsplit('.',1)

connector.load_data(dataframe, dataset_id, table_id, chunksize, verbose)