diff --git a/doc/source/io.rst b/doc/source/io.rst
index f8fe6fc8a4c3a..ed43b8d731260 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -3643,9 +3643,18 @@ You will then be authenticated to the specified BigQuery account
via Google's Oauth2 mechanism. In general, this is as simple as following the
prompts in a browser window which will be opened for you. Should the browser not
be available, or fail to launch, a code will be provided to complete the process
-manually. Additional information on the authentication mechanism can be found
+manually. Additional information on this authentication mechanism can be found
`here `__
+Alternatively, you can use a headless authentication mechanism via the Google Cloud SDK. More
+information on installing the SDK and authenticating is available `here `__
+
+Once you have your authentication credentials setup, you can use this approach by including the gcloud_credentials parameter. It will accept either a boolean True (in which case it uses the SDK's default credentials path), or string filepath to the credentials file:
+
+.. code-block:: python
+
+ data_frame = pd.read_gbq('SELECT * FROM test_dataset.test_table', project_id = projectid, gcloud_credentials = True)
+
You can define which column from BigQuery to use as an index in the
destination DataFrame as well as a preferred column order as follows:
diff --git a/pandas/core/common.py b/pandas/core/common.py
index 937dc421e3926..7ab88edd77d4b 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -2573,13 +2573,13 @@ def is_hashable(arg):
>>> is_hashable(a)
False
"""
- # don't consider anything not collections.Hashable, so as not to broaden
- # the definition of hashable beyond that. For example, old-style classes
- # are not collections.Hashable but they won't fail hash().
- if not isinstance(arg, collections.Hashable):
- return False
+ # unfortunately, we can't use isinstance(arg, collections.Hashable), which
+ # can be faster than calling hash, because numpy scalars on Python 3 fail
+ # this test
+
+ # reconsider this decision once this numpy bug is fixed:
+ # https://github.com/numpy/numpy/issues/5562
- # narrow the definition of hashable if hash(arg) fails in practice
try:
hash(arg)
except TypeError:
diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py
index 572a8be5c65e8..f921c027513e2 100644
--- a/pandas/io/gbq.py
+++ b/pandas/io/gbq.py
@@ -1,9 +1,11 @@
from datetime import datetime
+import os
import json
import logging
import sys
from time import sleep
import uuid
+import traceback
import numpy as np
@@ -33,20 +35,22 @@
try:
from apiclient.discovery import build
from apiclient.http import MediaFileUpload
- from apiclient.errors import HttpError
+ from apiclient.errors import HttpError
from oauth2client.client import OAuth2WebServerFlow
from oauth2client.client import AccessTokenRefreshError
from oauth2client.client import flow_from_clientsecrets
+ from oauth2client.client import Credentials
from oauth2client.file import Storage
from oauth2client.tools import run
_GOOGLE_API_CLIENT_INSTALLED=True
_GOOGLE_API_CLIENT_VERSION = pkg_resources.get_distribution('google-api-python-client').version
- if LooseVersion(_GOOGLE_API_CLIENT_VERSION) >= '1.2.0':
+ if LooseVersion(_GOOGLE_API_CLIENT_VERSION) >= '1.2':
_GOOGLE_API_CLIENT_VALID_VERSION = True
except ImportError:
+ traceback.format_exc()
_GOOGLE_API_CLIENT_INSTALLED = False
@@ -72,6 +76,13 @@
logger = logging.getLogger('pandas.io.gbq')
logger.setLevel(logging.ERROR)
+class MissingOauthCredentials(PandasError, IOError):
+ """
+ Raised when Google BigQuery authentication credentials
+ file is missing, but was needed.
+ """
+ pass
+
class InvalidPageToken(PandasError, IOError):
"""
Raised when Google BigQuery fails to return,
@@ -119,11 +130,12 @@ class InvalidColumnOrder(PandasError, IOError):
pass
class GbqConnector:
- def __init__(self, project_id, reauth=False):
- self.project_id = project_id
- self.reauth = reauth
- self.credentials = self.get_credentials()
- self.service = self.get_service(self.credentials)
+ def __init__(self, project_id, reauth=False, gcloud_credentials=None):
+ self.project_id = project_id
+ self.reauth = reauth
+ self.gcloud_credentials = gcloud_credentials
+ self.credentials = self.get_credentials()
+ self.service = self.get_service(self.credentials)
def get_credentials(self):
flow = OAuth2WebServerFlow(client_id='495642085510-k0tmvj2m941jhre2nbqka17vqpjfddtd.apps.googleusercontent.com',
@@ -131,8 +143,19 @@ def get_credentials(self):
scope='https://www.googleapis.com/auth/bigquery',
redirect_uri='urn:ietf:wg:oauth:2.0:oob')
- storage = Storage('bigquery_credentials.dat')
- credentials = storage.get()
+ if self.gcloud_credentials is not None:
+ gcfp = self.gcloud_credentials # a bit of mangling since this is dual-typed, str or bool
+ if self.gcloud_credentials == True:
+ gcfp = '~/.config/gcloud/credentials'
+ credfn = os.path.expanduser(gcfp)
+ if not os.path.exists(credfn):
+ raise MissingOauthCredentials("Required google cloud authentication credentials file {0} missing.".format(credfn))
+ gcloud_cred = json.loads(open(credfn).read())['data'][0]['credential']
+ credentials = Credentials.new_from_json(json.dumps(gcloud_cred))
+ return credentials
+ else:
+ storage = Storage('bigquery_credentials.dat')
+ credentials = storage.get()
if credentials is None or credentials.invalid or self.reauth:
credentials = run(flow, storage)
@@ -328,7 +351,8 @@ def _test_imports():
if not _HTTPLIB2_INSTALLED:
raise ImportError("pandas requires httplib2 for Google BigQuery support")
-def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=False):
+def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=False,
+ gcloud_credentials=None):
"""Load data from Google BigQuery.
THIS IS AN EXPERIMENTAL LIBRARY
@@ -353,6 +377,12 @@ def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=Fa
reauth : boolean (default False)
Force Google BigQuery to reauthenticate the user. This is useful
if multiple accounts are used.
+ gcloud_credentials : boolean or str (default None)
+ Use oauth2 credentials from gcloud auth login. This is useful
+ if pandas is being run in an ipython notebook, and the user
+ has pre-existing authentication tokens.
+ Set to True to use the default path, ~/.config/gcloud/credentials.
+ Else provide an explicit path to file to use for credentials.
Returns
-------
@@ -366,7 +396,7 @@ def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=Fa
if not project_id:
raise TypeError("Missing required parameter: project_id")
- connector = GbqConnector(project_id, reauth = reauth)
+ connector = GbqConnector(project_id, reauth = reauth, gcloud_credentials = gcloud_credentials)
schema, pages = connector.run_query(query)
dataframe_list = []
while len(pages) > 0:
@@ -401,7 +431,7 @@ def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=Fa
return final_df
def to_gbq(dataframe, destination_table, project_id=None, chunksize=10000,
- verbose=True, reauth=False):
+ verbose=True, reauth=False, gcloud_credentials=None):
"""Write a DataFrame to a Google BigQuery table.
THIS IS AN EXPERIMENTAL LIBRARY
@@ -430,6 +460,12 @@ def to_gbq(dataframe, destination_table, project_id=None, chunksize=10000,
reauth : boolean (default False)
Force Google BigQuery to reauthenticate the user. This is useful
if multiple accounts are used.
+ gcloud_credentials : boolean or str (default None)
+ Use oauth2 credentials from gcloud auth login. This is useful
+ if pandas is being run in an ipython notebook, and the user
+ has pre-existing authentication tokens.
+ Set to True to use the default path, ~/.config/gcloud/credentials.
+ Else provide an explicit path to file to use for credentials.
"""
_test_imports()
@@ -440,7 +476,7 @@ def to_gbq(dataframe, destination_table, project_id=None, chunksize=10000,
if not '.' in destination_table:
raise NotFoundException("Invalid Table Name. Should be of the form 'datasetId.tableId' ")
- connector = GbqConnector(project_id, reauth = reauth)
+ connector = GbqConnector(project_id, reauth = reauth, gcloud_credentials = gcloud_credentials)
dataset_id, table_id = destination_table.rsplit('.',1)
connector.load_data(dataframe, dataset_id, table_id, chunksize, verbose)
diff --git a/pandas/io/tests/data/gcloud_credentials b/pandas/io/tests/data/gcloud_credentials
new file mode 100644
index 0000000000000..d0bd0381048a8
--- /dev/null
+++ b/pandas/io/tests/data/gcloud_credentials
@@ -0,0 +1,62 @@
+{
+ "data": [
+ {
+ "credential": {
+ "_class": "OAuth2Credentials",
+ "_module": "oauth2client.client",
+ "access_token": "ya29.xXx",
+ "client_id": "1112223456.apps.googleusercontent.com",
+ "client_secret": "aBc467",
+ "id_token": {
+ "at_hash": "OlAp__aV",
+ "aud": "1112223456.apps.googleusercontent.com",
+ "azp": "1112223456.apps.googleusercontent.com",
+ "cid": "1112223456.apps.googleusercontent.com",
+ "email": "luv-python-pandas@somemail.com",
+ "email_verified": true,
+ "exp": 1414558238,
+ "iat": 1414554338,
+ "id": "113403125016275849302",
+ "iss": "accounts.google.com",
+ "sub": "113403125016229475663",
+ "token_hash": "OlAp__aV",
+ "verified_email": true
+ },
+ "invalid": @INVALID@,
+ "refresh_token": "1/asf87bbEGsb78",
+ "revoke_uri": "https://accounts.google.com/o/oauth2/revoke",
+ "token_expiry": "2014-10-29T04:50:38Z",
+ "token_response": {
+ "access_token": "ya29.bYsadfiU8542B5",
+ "expires_in": 3600,
+ "id_token": {
+ "at_hash": "OlAp__aV",
+ "aud": "11112233456.apps.googleusercontent.com",
+ "azp": "11112223456.apps.googleusercontent.com",
+ "cid": "11112223456.apps.googleusercontent.com",
+ "email": "luv-python-pandas@somemail.com",
+ "email_verified": true,
+ "exp": 1414558238,
+ "iat": 1414554338,
+ "id": "11340312501621345098732",
+ "iss": "accounts.google.com",
+ "sub": "1134031250162435660892",
+ "token_hash": "OlAp__aV",
+ "verified_email": true
+ },
+ "refresh_token": "1/6v6asdf6NrR92",
+ "token_type": "Bearer"
+ },
+ "token_uri": "https://accounts.google.com/o/oauth2/token",
+ "user_agent": "Cloud SDK Command Line Tool"
+ },
+ "key": {
+ "account": "luv-python-pandas@somemail.com",
+ "clientId": "11112223456.apps.googleusercontent.com",
+ "scope": "https://www.googleapis.com/auth/appengine.admin https://www.googleapis.com/auth/bigquery https://www.googleapis.com/auth/compute https://www.googleapis.com/auth/devstorage.full_control https://www.googleapis.com/auth/userinfo.email https://www.googleapis.com/auth/ndev.cloudman https://www.googleapis.com/auth/cloud-platform https://www.googleapis.com/auth/sqlservice.admin https://www.googleapis.com/auth/prediction https://www.googleapis.com/auth/projecthosting",
+ "type": "google-cloud-sdk"
+ }
+ }
+ ],
+ "file_version": 1
+}
diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py
index 2f79cc8ba1826..97e4d76a2eb20 100644
--- a/pandas/io/tests/test_gbq.py
+++ b/pandas/io/tests/test_gbq.py
@@ -9,6 +9,7 @@
import sys
import platform
from time import sleep
+from tempfile import NamedTemporaryFile
import numpy as np
@@ -36,6 +37,27 @@ def test_requirements():
raise nose.SkipTest(import_exception)
class TestGBQConnectorIntegration(tm.TestCase):
+
+ @classmethod
+ def setUpClass(cls):
+ with open(os.path.join(tm.get_data_path(), 'gcloud_credentials'), 'r') as fin:
+ creds_json = fin.read()
+
+ creds_json_invalid = creds_json.replace('@INVALID@', '"true"')
+ creds_json_valid = creds_json.replace('@INVALID@', '"false"')
+
+ cls.creds_file_valid = NamedTemporaryFile()
+ cls.creds_file_valid.write(creds_json_valid.encode('UTF-8'))
+ cls.creds_file_valid.flush()
+
+ cls.creds_file_invalid = NamedTemporaryFile()
+ cls.creds_file_invalid.write(creds_json_invalid.encode('UTF-8'))
+ cls.creds_file_invalid.flush()
+
+ cls.non_creds_file = NamedTemporaryFile()
+ cls.non_creds_file.write('{"token": "50414e444153204556455259574845524521"}'.encode('UTF-8'))
+ cls.non_creds_file.flush()
+
def setUp(self):
test_requirements()
@@ -64,6 +86,28 @@ def test_should_be_able_to_get_results_from_query(self):
schema, pages = self.sut.run_query('SELECT 1')
self.assertTrue(pages is not None)
+ def test_should_raise_exception_with_invalid_gcloud_creds_path(self):
+ with tm.assertRaises(gbq.MissingOauthCredentials):
+ gbq.GbqConnector(PROJECT_ID, gcloud_credentials='missing_file')
+
+ def test_should_fail_with_invalid_gcloud_credentials(self):
+ credentials = gbq.GbqConnector(PROJECT_ID, gcloud_credentials=self.creds_file_invalid.name).credentials
+ self.assertEqual(credentials.invalid, "true")
+
+ def test_should_be_able_to_get_valid_gcloud_credentials(self):
+ credentials = gbq.GbqConnector(PROJECT_ID, gcloud_credentials=self.creds_file_valid.name).credentials
+ self.assertEqual(credentials.invalid, "false")
+
+ def test_should_fail_if_gcloud_credentials_incorrectly_formatted(self):
+ with tm.assertRaises(KeyError):
+ gbq.GbqConnector(PROJECT_ID, gcloud_credentials=self.non_creds_file.name)
+
+ @classmethod
+ def tearDownClass(cls):
+ cls.creds_file_valid.close()
+ cls.creds_file_invalid.close()
+ cls.non_creds_file.close()
+
class TestReadGBQUnitTests(tm.TestCase):
def setUp(self):
test_requirements()
diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py
index d8ce98350627d..d0ae7c9988c8d 100644
--- a/pandas/tests/test_common.py
+++ b/pandas/tests/test_common.py
@@ -424,7 +424,7 @@ def __hash__(self):
raise TypeError("Not hashable")
hashable = (
- 1, 'a', tuple(), (1,), HashableClass(),
+ 1, 3.14, np.float64(3.14), 'a', tuple(), (1,), HashableClass(),
)
not_hashable = (
[], UnhashableClass1(),
@@ -434,13 +434,10 @@ def __hash__(self):
)
for i in hashable:
- assert isinstance(i, collections.Hashable)
assert com.is_hashable(i)
for i in not_hashable:
- assert not isinstance(i, collections.Hashable)
assert not com.is_hashable(i)
for i in abc_hashable_not_really_hashable:
- assert isinstance(i, collections.Hashable)
assert not com.is_hashable(i)
# numpy.array is no longer collections.Hashable as of
@@ -455,7 +452,7 @@ class OldStyleClass():
pass
c = OldStyleClass()
assert not isinstance(c, collections.Hashable)
- assert not com.is_hashable(c)
+ assert com.is_hashable(c)
hash(c) # this will not raise