pandas-dev · ichuang · Oct 21, 2014 · Oct 21, 2014 · Oct 23, 2014 · Nov 11, 2014
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -3643,9 +3643,18 @@ You will then be authenticated to the specified BigQuery account
 via Google's Oauth2 mechanism. In general, this is as simple as following the
 prompts in a browser window which will be opened for you. Should the browser not
 be available, or fail to launch, a code will be provided to complete the process
-manually.  Additional information on the authentication mechanism can be found
+manually.  Additional information on this authentication mechanism can be found
 `here <https://developers.google.com/accounts/docs/OAuth2#clientside/>`__
 
+Alternatively, you can use a headless authentication mechanism via the Google Cloud SDK. More
+information on installing the SDK and authenticating is available `here <https://cloud.google.com/sdk/gcloud/>`__
+
+Once you have your authentication credentials setup, you can use this approach by including the gcloud_credentials parameter. It will accept either a boolean True (in which case it uses the SDK's default credentials path), or string filepath to the credentials file:
+
+.. code-block:: python
+
+   data_frame = pd.read_gbq('SELECT * FROM test_dataset.test_table', project_id = projectid, gcloud_credentials = True)
+
 You can define which column from BigQuery to use as an index in the
 destination DataFrame as well as a preferred column order as follows:
 

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -2573,13 +2573,13 @@ def is_hashable(arg):
     >>> is_hashable(a)
     False
     """
-    # don't consider anything not collections.Hashable, so as not to broaden
-    # the definition of hashable beyond that. For example, old-style classes
-    # are not collections.Hashable but they won't fail hash().
-    if not isinstance(arg, collections.Hashable):
-        return False
+    # unfortunately, we can't use isinstance(arg, collections.Hashable), which
+    # can be faster than calling hash, because numpy scalars on Python 3 fail
+    # this test
+
+    # reconsider this decision once this numpy bug is fixed:
+    # https://github.com/numpy/numpy/issues/5562
 
-    # narrow the definition of hashable if hash(arg) fails in practice
     try:
         hash(arg)
     except TypeError:

diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py
@@ -1,9 +1,11 @@
 from datetime import datetime
+import os
 import json
 import logging
 import sys
 from time import sleep
 import uuid
+import traceback
 
 import numpy as np
 
@@ -33,20 +35,22 @@
         try:
             from apiclient.discovery import build
             from apiclient.http import MediaFileUpload
-            from apiclient.errors import HttpError
+            from apiclient.errors import HttpError 
 
             from oauth2client.client import OAuth2WebServerFlow
             from oauth2client.client import AccessTokenRefreshError
             from oauth2client.client import flow_from_clientsecrets
+            from oauth2client.client import Credentials
             from oauth2client.file import Storage
             from oauth2client.tools import run
             _GOOGLE_API_CLIENT_INSTALLED=True
             _GOOGLE_API_CLIENT_VERSION = pkg_resources.get_distribution('google-api-python-client').version
 
-            if LooseVersion(_GOOGLE_API_CLIENT_VERSION) >= '1.2.0':
+            if LooseVersion(_GOOGLE_API_CLIENT_VERSION) >= '1.2':
                 _GOOGLE_API_CLIENT_VALID_VERSION = True
 
         except ImportError:
+            traceback.format_exc()
             _GOOGLE_API_CLIENT_INSTALLED = False
 
 
@@ -72,6 +76,13 @@
 logger = logging.getLogger('pandas.io.gbq')
 logger.setLevel(logging.ERROR)
 
+class MissingOauthCredentials(PandasError, IOError):
+    """
+    Raised when Google BigQuery authentication credentials 
+    file is missing, but was needed.
+    """
+    pass
+
 class InvalidPageToken(PandasError, IOError):
     """
     Raised when Google BigQuery fails to return,
@@ -119,20 +130,32 @@ class InvalidColumnOrder(PandasError, IOError):
     pass
 
 class GbqConnector:
-    def __init__(self, project_id, reauth=False):
-        self.project_id     = project_id
-        self.reauth         = reauth
-        self.credentials    = self.get_credentials()
-        self.service        = self.get_service(self.credentials)
+    def __init__(self, project_id, reauth=False, gcloud_credentials=None):
+        self.project_id         = project_id
+        self.reauth             = reauth
+        self.gcloud_credentials = gcloud_credentials
+        self.credentials        = self.get_credentials()
+        self.service            = self.get_service(self.credentials)
 
     def get_credentials(self):
         flow = OAuth2WebServerFlow(client_id='495642085510-k0tmvj2m941jhre2nbqka17vqpjfddtd.apps.googleusercontent.com',
                                    client_secret='kOc9wMptUtxkcIFbtZCcrEAc',
                                    scope='https://www.googleapis.com/auth/bigquery',
                                    redirect_uri='urn:ietf:wg:oauth:2.0:oob')
 
-        storage = Storage('bigquery_credentials.dat')
-        credentials = storage.get()
+        if self.gcloud_credentials is not None:
+            gcfp = self.gcloud_credentials		# a bit of mangling since this is dual-typed, str or bool
+            if self.gcloud_credentials == True:
+                gcfp = '~/.config/gcloud/credentials'
+            credfn = os.path.expanduser(gcfp)
+            if not os.path.exists(credfn):
+                raise MissingOauthCredentials("Required google cloud authentication credentials file {0} missing.".format(credfn))
+            gcloud_cred = json.loads(open(credfn).read())['data'][0]['credential']
+            credentials = Credentials.new_from_json(json.dumps(gcloud_cred))
+            return credentials
+        else:
+            storage = Storage('bigquery_credentials.dat')
+            credentials = storage.get()
 
         if credentials is None or credentials.invalid or self.reauth:
             credentials = run(flow, storage)
@@ -328,7 +351,8 @@ def _test_imports():
     if not _HTTPLIB2_INSTALLED:
         raise ImportError("pandas requires httplib2 for Google BigQuery support")
 
-def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=False):
+def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=False, 
+             gcloud_credentials=None):
     """Load data from Google BigQuery.
 
     THIS IS AN EXPERIMENTAL LIBRARY
@@ -353,6 +377,12 @@ def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=Fa
     reauth : boolean (default False)
         Force Google BigQuery to reauthenticate the user. This is useful
         if multiple accounts are used.
+    gcloud_credentials : boolean or str (default None)
+        Use oauth2 credentials from gcloud auth login.  This is useful
+        if pandas is being run in an ipython notebook, and the user
+        has pre-existing authentication tokens.
+        Set to True to use the default path, ~/.config/gcloud/credentials.
+        Else provide an explicit path to file to use for credentials. 
 
     Returns
     -------
@@ -366,7 +396,7 @@ def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=Fa
     if not project_id:
         raise TypeError("Missing required parameter: project_id")
 
-    connector = GbqConnector(project_id, reauth = reauth)
+    connector = GbqConnector(project_id, reauth = reauth, gcloud_credentials = gcloud_credentials)
     schema, pages = connector.run_query(query)
     dataframe_list = []
     while len(pages) > 0:
@@ -401,7 +431,7 @@ def read_gbq(query, project_id = None, index_col=None, col_order=None, reauth=Fa
     return final_df
 
 def to_gbq(dataframe, destination_table, project_id=None, chunksize=10000,
-           verbose=True, reauth=False):
+           verbose=True, reauth=False, gcloud_credentials=None):
     """Write a DataFrame to a Google BigQuery table.
 
     THIS IS AN EXPERIMENTAL LIBRARY
@@ -430,6 +460,12 @@ def to_gbq(dataframe, destination_table, project_id=None, chunksize=10000,
     reauth : boolean (default False)
         Force Google BigQuery to reauthenticate the user. This is useful
         if multiple accounts are used.
+    gcloud_credentials : boolean or str (default None)
+        Use oauth2 credentials from gcloud auth login.  This is useful
+        if pandas is being run in an ipython notebook, and the user
+        has pre-existing authentication tokens.
+        Set to True to use the default path, ~/.config/gcloud/credentials.
+        Else provide an explicit path to file to use for credentials. 
 
     """
     _test_imports()
@@ -440,7 +476,7 @@ def to_gbq(dataframe, destination_table, project_id=None, chunksize=10000,
     if not '.' in destination_table:
         raise NotFoundException("Invalid Table Name. Should be of the form 'datasetId.tableId' ")
 
-    connector = GbqConnector(project_id, reauth = reauth)
+    connector = GbqConnector(project_id, reauth = reauth, gcloud_credentials = gcloud_credentials)
     dataset_id, table_id = destination_table.rsplit('.',1)
 
     connector.load_data(dataframe, dataset_id, table_id, chunksize, verbose)

diff --git a/pandas/io/tests/data/gcloud_credentials b/pandas/io/tests/data/gcloud_credentials
@@ -0,0 +1,62 @@
+{
+  "data": [
+    {
+      "credential": {
+        "_class": "OAuth2Credentials", 
+        "_module": "oauth2client.client", 
+        "access_token": "ya29.xXx", 
+        "client_id": "1112223456.apps.googleusercontent.com", 
+        "client_secret": "aBc467", 
+        "id_token": {
+          "at_hash": "OlAp__aV", 
+          "aud": "1112223456.apps.googleusercontent.com", 
+          "azp": "1112223456.apps.googleusercontent.com", 
+          "cid": "1112223456.apps.googleusercontent.com", 
+          "email": "[email protected]", 
+          "email_verified": true, 
+          "exp": 1414558238, 
+          "iat": 1414554338, 
+          "id": "113403125016275849302", 
+          "iss": "accounts.google.com", 
+          "sub": "113403125016229475663", 
+          "token_hash": "OlAp__aV", 
+          "verified_email": true
+        }, 
+        "invalid": @INVALID@, 
+        "refresh_token": "1/asf87bbEGsb78", 
+        "revoke_uri": "https://accounts.google.com/o/oauth2/revoke", 
+        "token_expiry": "2014-10-29T04:50:38Z", 
+        "token_response": {
+          "access_token": "ya29.bYsadfiU8542B5",
+          "expires_in": 3600, 
+          "id_token": {
+            "at_hash": "OlAp__aV", 
+            "aud": "11112233456.apps.googleusercontent.com", 
+            "azp": "11112223456.apps.googleusercontent.com", 
+            "cid": "11112223456.apps.googleusercontent.com", 
+            "email": "[email protected]", 
+            "email_verified": true, 
+            "exp": 1414558238, 
+            "iat": 1414554338, 
+            "id": "11340312501621345098732", 
+            "iss": "accounts.google.com", 
+            "sub": "1134031250162435660892", 
+            "token_hash": "OlAp__aV", 
+            "verified_email": true
+          }, 
+          "refresh_token": "1/6v6asdf6NrR92", 
+          "token_type": "Bearer"
+        }, 
+        "token_uri": "https://accounts.google.com/o/oauth2/token", 
+        "user_agent": "Cloud SDK Command Line Tool"
+      }, 
+      "key": {
+        "account": "[email protected]", 
+        "clientId": "11112223456.apps.googleusercontent.com", 
+        "scope": "https://www.googleapis.com/auth/appengine.admin https://www.googleapis.com/auth/bigquery https://www.googleapis.com/auth/compute https://www.googleapis.com/auth/devstorage.full_control https://www.googleapis.com/auth/userinfo.email https://www.googleapis.com/auth/ndev.cloudman https://www.googleapis.com/auth/cloud-platform https://www.googleapis.com/auth/sqlservice.admin https://www.googleapis.com/auth/prediction https://www.googleapis.com/auth/projecthosting", 
+        "type": "google-cloud-sdk"
+      }
+    }
+  ], 
+  "file_version": 1
+}
diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py
@@ -9,6 +9,7 @@
 import sys
 import platform
 from time import sleep
+from tempfile import NamedTemporaryFile
 
 import numpy as np
 
@@ -36,6 +37,27 @@ def test_requirements():
         raise nose.SkipTest(import_exception)
 
 class TestGBQConnectorIntegration(tm.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        with open(os.path.join(tm.get_data_path(), 'gcloud_credentials'), 'r') as fin:
+            creds_json = fin.read()
+
+        creds_json_invalid = creds_json.replace('@INVALID@', '"true"')
+        creds_json_valid = creds_json.replace('@INVALID@', '"false"')
+
+        cls.creds_file_valid = NamedTemporaryFile()
+        cls.creds_file_valid.write(creds_json_valid.encode('UTF-8'))
+        cls.creds_file_valid.flush()
+
+        cls.creds_file_invalid = NamedTemporaryFile()
+        cls.creds_file_invalid.write(creds_json_invalid.encode('UTF-8'))
+        cls.creds_file_invalid.flush()
+
+        cls.non_creds_file = NamedTemporaryFile()
+        cls.non_creds_file.write('{"token": "50414e444153204556455259574845524521"}'.encode('UTF-8'))
+        cls.non_creds_file.flush()
+
     def setUp(self):
         test_requirements()
 
@@ -64,6 +86,28 @@ def test_should_be_able_to_get_results_from_query(self):
         schema, pages = self.sut.run_query('SELECT 1')
         self.assertTrue(pages is not None)
 
+    def test_should_raise_exception_with_invalid_gcloud_creds_path(self):
+        with tm.assertRaises(gbq.MissingOauthCredentials):
+            gbq.GbqConnector(PROJECT_ID, gcloud_credentials='missing_file')        
+
+    def test_should_fail_with_invalid_gcloud_credentials(self):
+        credentials = gbq.GbqConnector(PROJECT_ID, gcloud_credentials=self.creds_file_invalid.name).credentials
+        self.assertEqual(credentials.invalid, "true")
+
+    def test_should_be_able_to_get_valid_gcloud_credentials(self):
+        credentials = gbq.GbqConnector(PROJECT_ID, gcloud_credentials=self.creds_file_valid.name).credentials
+        self.assertEqual(credentials.invalid, "false")
+
+    def test_should_fail_if_gcloud_credentials_incorrectly_formatted(self):
+        with tm.assertRaises(KeyError):
+            gbq.GbqConnector(PROJECT_ID, gcloud_credentials=self.non_creds_file.name)
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.creds_file_valid.close()
+        cls.creds_file_invalid.close()
+        cls.non_creds_file.close()
+
 class TestReadGBQUnitTests(tm.TestCase):
     def setUp(self):
         test_requirements()

diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py
@@ -424,7 +424,7 @@ def __hash__(self):
             raise TypeError("Not hashable")
 
     hashable = (
-        1, 'a', tuple(), (1,), HashableClass(),
+        1, 3.14, np.float64(3.14), 'a', tuple(), (1,), HashableClass(),
     )
     not_hashable = (
         [], UnhashableClass1(),
@@ -434,13 +434,10 @@ def __hash__(self):
     )
 
     for i in hashable:
-        assert isinstance(i, collections.Hashable)
         assert com.is_hashable(i)
     for i in not_hashable:
-        assert not isinstance(i, collections.Hashable)
         assert not com.is_hashable(i)
     for i in abc_hashable_not_really_hashable:
-        assert isinstance(i, collections.Hashable)
         assert not com.is_hashable(i)
 
     # numpy.array is no longer collections.Hashable as of
@@ -455,7 +452,7 @@ class OldStyleClass():
             pass
         c = OldStyleClass()
         assert not isinstance(c, collections.Hashable)
-        assert not com.is_hashable(c)
+        assert com.is_hashable(c)
         hash(c)  # this will not raise