-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
ENH: improve bigquery connector to optionally allow use of gcloud credentials #8590
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from 1 commit
Commits
Show all changes
14 commits
Select commit
Hold shift + click to select a range
2ba8788
ENH allow bigquery connector to optionally use gcloud credentials
ichuang 6c44bbc
move gcfp into if clause
ichuang 36f3876
formatting for proper sphinx rendering
ichuang 42f4b48
Add unit tests for gcloud authentication code.
sean-schaefer b0ace12
move import os to top for pep8
ichuang b023a56
Change LooseVersion check for Google Python Client from 1.2.0 to 1.2
sean-schaefer 7ca9160
Merge branch 'master' of https://github.com/ichuang/pandas
sean-schaefer 4e0b0ef
BUG: fix common.is_hashable for NumPy scalars on Python 3
shoyer eb1c4e3
Merge pull request #9473 from shoyer/fix-is_hashable
jreback 97e26c8
Merge pull request #9500 from jreback/clean
jreback d4c4c37
ENH allow bigquery connector to optionally use gcloud credentials.
ichuang e15c752
Merge branch 'master' of https://github.com/ichuang/pandas
sean-schaefer 2a9f77e
Add exception printing for import error.
sean-schaefer b31574a
Adding space to run travis builds again...
sean-schaefer File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38,6 +38,7 @@ | |
from oauth2client.client import OAuth2WebServerFlow | ||
from oauth2client.client import AccessTokenRefreshError | ||
from oauth2client.client import flow_from_clientsecrets | ||
from oauth2client.client import Credentials | ||
from oauth2client.file import Storage | ||
from oauth2client.tools import run | ||
_GOOGLE_API_CLIENT_INSTALLED=True | ||
|
@@ -72,6 +73,13 @@ | |
logger = logging.getLogger('pandas.io.gbq') | ||
logger.setLevel(logging.ERROR) | ||
|
||
class MissingOauthCredentials(PandasError, IOError): | ||
""" | ||
Raised when Google BigQuery authentication credentials | ||
file is missing, but was needed. | ||
""" | ||
pass | ||
|
||
class InvalidPageToken(PandasError, IOError): | ||
""" | ||
Raised when Google BigQuery fails to return, | ||
|
@@ -119,20 +127,34 @@ class InvalidColumnOrder(PandasError, IOError): | |
pass | ||
|
||
class GbqConnector: | ||
def __init__(self, project_id, reauth=False): | ||
self.project_id = project_id | ||
self.reauth = reauth | ||
self.credentials = self.get_credentials() | ||
self.service = self.get_service(self.credentials) | ||
def __init__(self, project_id, reauth=False, gcloud_credentials=None): | ||
self.project_id = project_id | ||
self.reauth = reauth | ||
self.gcloud_credentials = gcloud_credentials | ||
self.credentials = self.get_credentials() | ||
self.service = self.get_service(self.credentials) | ||
|
||
def get_credentials(self): | ||
flow = OAuth2WebServerFlow(client_id='495642085510-k0tmvj2m941jhre2nbqka17vqpjfddtd.apps.googleusercontent.com', | ||
client_secret='kOc9wMptUtxkcIFbtZCcrEAc', | ||
scope='https://www.googleapis.com/auth/bigquery', | ||
redirect_uri='urn:ietf:wg:oauth:2.0:oob') | ||
|
||
storage = Storage('bigquery_credentials.dat') | ||
credentials = storage.get() | ||
gcfp = self.gcloud_credentials # a bit of mangling since this is dual-typed, str or bool | ||
if self.gcloud_credentials == True: | ||
gcfp = '' | ||
|
||
if self.gcloud_credentials is not None: | ||
import json | ||
import os | ||
credfn = os.path.expanduser(gcfp or '~/.config/gcloud/credentials') | ||
if not os.path.exists(credfn): | ||
raise MissingOauthCredentials("Required google cloud authentication credentials file {0} missing.".format(credfn)) | ||
gcloud_cred = json.loads(open(credfn).read())['data'][0]['credential'] | ||
credentials = Credentials.new_from_json(json.dumps(gcloud_cred)) | ||
else: | ||
storage = Storage('bigquery_credentials.dat') | ||
credentials = storage.get() | ||
|
||
if credentials is None or credentials.invalid or self.reauth: | ||
credentials = run(flow, storage) | ||
|
@@ -328,7 +350,8 @@ def _test_imports(): | |
if not _HTTPLIB2_INSTALLED: | ||
raise ImportError("pandas requires httplib2 for Google BigQuery support") | ||
|
||
def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=False): | ||
def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=False, | ||
gcloud_credentials=None): | ||
"""Load data from Google BigQuery. | ||
|
||
THIS IS AN EXPERIMENTAL LIBRARY | ||
|
@@ -353,6 +376,12 @@ def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=Fa | |
reauth : boolean (default False) | ||
Force Google BigQuery to reauthenticate the user. This is useful | ||
if multiple accounts are used. | ||
gcloud_credentials: boolean or str (default None) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. minor comment: you need to use a space before the colon (so |
||
Use oauth2 credentials from gcloud auth login. This is useful | ||
if pandas is being run in an ipython notebook, and the user | ||
has pre-existing authentication tokens. | ||
Set to True to use the default path, ~/.config/gcloud/credentials. | ||
Else provide an explicit path to file to use for credentials. | ||
|
||
Returns | ||
------- | ||
|
@@ -366,7 +395,7 @@ def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=Fa | |
if not project_id: | ||
raise TypeError("Missing required parameter: project_id") | ||
|
||
connector = GbqConnector(project_id, reauth = reauth) | ||
connector = GbqConnector(project_id, reauth = reauth, gcloud_credentials = gcloud_credentials) | ||
schema, pages = connector.run_query(query) | ||
dataframe_list = [] | ||
while len(pages) > 0: | ||
|
@@ -401,7 +430,7 @@ def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=Fa | |
return final_df | ||
|
||
def to_gbq(dataframe, destination_table, project_id=None, chunksize=10000, | ||
verbose=True, reauth=False): | ||
verbose=True, reauth=False, gcloud_credentials=None): | ||
"""Write a DataFrame to a Google BigQuery table. | ||
|
||
THIS IS AN EXPERIMENTAL LIBRARY | ||
|
@@ -430,6 +459,12 @@ def to_gbq(dataframe, destination_table, project_id=None, chunksize=10000, | |
reauth : boolean (default False) | ||
Force Google BigQuery to reauthenticate the user. This is useful | ||
if multiple accounts are used. | ||
gcloud_credentials: boolean or str (default None) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here |
||
Use oauth2 credentials from gcloud auth login. This is useful | ||
if pandas is being run in an ipython notebook, and the user | ||
has pre-existing authentication tokens. | ||
Set to True to use the default path, ~/.config/gcloud/credentials. | ||
Else provide an explicit path to file to use for credentials. | ||
|
||
""" | ||
_test_imports() | ||
|
@@ -440,7 +475,7 @@ def to_gbq(dataframe, destination_table, project_id=None, chunksize=10000, | |
if not '.' in destination_table: | ||
raise NotFoundException("Invalid Table Name. Should be of the form 'datasetId.tableId' ") | ||
|
||
connector = GbqConnector(project_id, reauth = reauth) | ||
connector = GbqConnector(project_id, reauth = reauth, gcloud_credentials = gcloud_credentials) | ||
dataset_id, table_id = destination_table.rsplit('.',1) | ||
|
||
connector.load_data(dataframe, dataset_id, table_id, chunksize, verbose) |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
u can move this into the next section no?
then just do
if gcloud_credentials is True:
gcloud_credentials = default config path
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yep, done