pandas-dev · parthea · Oct 16, 2015 · jreback · Oct 16, 2015 · parthea
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -4059,13 +4059,30 @@ The key functions are:
 .. autosummary::
    :toctree: generated/
 
+   authorize
    read_gbq
    to_gbq
 
 .. currentmodule:: pandas
 
 .. _io.bigquery_reader:
 
+Authorization
+'''''''''''''
+
+Authorization is required in order to use the BigQuery API. You must call the
+:func:`~pandas.io.gbq.authorize` function to start the authorization process. In general,
+this is as simple as following the prompts in a browser. A code will be provided to complete
+the process. A credentials file will be saved to disk so that you only need to authorize once
+as long as the credentials have not been revoked. Additional information on the authentication
+can be found `here <https://cloud.google.com/bigquery/authentication?hl=en/>`__.
+
+To begin the authorization process, use the :func:`~pandas.io.gbq.authorize` function
+
+.. code-block:: python
+
+   gbq.authorize()
+
 Querying
 ''''''''
 
@@ -4080,13 +4097,6 @@ into a DataFrame using the :func:`~pandas.io.gbq.read_gbq` function.
 
    data_frame = pd.read_gbq('SELECT * FROM test_dataset.test_table', projectid)
 
-You will then be authenticated to the specified BigQuery account
-via Google's Oauth2 mechanism. In general, this is as simple as following the
-prompts in a browser window which will be opened for you. Should the browser not
-be available, or fail to launch, a code will be provided to complete the process
-manually.  Additional information on the authentication mechanism can be found
-`here <https://developers.google.com/accounts/docs/OAuth2#clientside/>`__.
-
 You can define which column from BigQuery to use as an index in the
 destination DataFrame as well as a preferred column order as follows:
 

diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt
@@ -23,6 +23,8 @@ Enhancements
 .. _whatsnew_0171.enhancements.other:
 
 - Improve the error message in :func:`pandas.io.gbq.to_gbq` when a streaming insert fails (:issue:`11285`)
+- Added :func:`pandas.io.gbq.authorize` to allow users to authenticate with Google BigQuery.
+  See the :ref:`docs <io.bigquery>` for more details (:issue:`11141`).
 
 Other Enhancements
 ^^^^^^^^^^^^^^^^^^
@@ -97,3 +99,4 @@ Bug Fixes
 - Fixed a bug that prevented the construction of an empty series of dtype
   ``datetime64[ns, tz]`` (:issue:`11245`).
 - Bug in ``DataFrame.to_dict()`` produces a ``np.datetime64`` object instead of ``Timestamp`` when only datetime is present in data (:issue:`11327`)
+- Resolve the issue where authentication on remote servers fails silently when using the gbq module. (:issue:`11141`)
diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py
@@ -15,6 +15,8 @@
 from pandas.util.decorators import deprecate
 from pandas.compat import lzip, bytes_to_str
 
+CREDENTIALS_FILE = 'bigquery_credentials.dat'
+
 def _check_google_client_version():
 
     try:
@@ -109,7 +111,7 @@ class TableCreationError(PandasError, ValueError):
 
 class GbqConnector(object):
 
-    def __init__(self, project_id, reauth=False):
+    def __init__(self, project_id=None, reauth=False):
         self.test_google_api_imports()
         self.project_id = project_id
         self.reauth = reauth
@@ -128,23 +130,44 @@ def test_google_api_imports(self):
         except ImportError as e:
             raise ImportError("Missing module required for Google BigQuery support: {0}".format(str(e)))
 
-    def get_credentials(self):
+    def authorize(self):
         from oauth2client.client import OAuth2WebServerFlow
         from oauth2client.file import Storage
-        from oauth2client.tools import run_flow, argparser
 
         _check_google_client_version()
 
+        storage = Storage(CREDENTIALS_FILE)
         flow = OAuth2WebServerFlow(client_id='495642085510-k0tmvj2m941jhre2nbqka17vqpjfddtd.apps.googleusercontent.com',
                                    client_secret='kOc9wMptUtxkcIFbtZCcrEAc',
                                    scope='https://www.googleapis.com/auth/bigquery',
                                    redirect_uri='urn:ietf:wg:oauth:2.0:oob')
+        print('Please visit the following url to obtain an authorization code: {0}'.format(flow.step1_get_authorize_url()))
+
+        authorization_prompt_message = 'Enter authorization code and press enter: '
+
+        if compat.PY3:
+            code = eval(input(authorization_prompt_message))
+        else:
+            code = raw_input(authorization_prompt_message)
 
-        storage = Storage('bigquery_credentials.dat')
+        code.strip()
+        storage.put(flow.step2_exchange(code))
         credentials = storage.get()
 
-        if credentials is None or credentials.invalid or self.reauth:
-            credentials = run_flow(flow, storage, argparser.parse_args([]))
+        return credentials
+
+    def get_credentials(self):
+        from oauth2client.file import Storage
+
+        _check_google_client_version()
+
+        credentials = Storage(CREDENTIALS_FILE).get()
+
+        if self.reauth:
+            credentials = self.authorize()
+
+        if credentials is None or credentials.invalid:
+            raise AccessDenied("The credentials are missing or invalid. Please run gbq.authorize().")
 
         return credentials
 
@@ -215,8 +238,8 @@ def run_query(self, query, verbose=True):
         try:
             query_reply = job_collection.insert(projectId=self.project_id, body=job_data).execute()
         except AccessTokenRefreshError:
-            raise AccessDenied("The credentials have been revoked or expired, please re-run the application "
-                               "to re-authorize")
+            raise AccessDenied("The credentials have been revoked or expired, please run gbq.authorize() "
+                               "to re-authorize.")
         except HttpError as ex:
             self.process_http_error(ex)
 
@@ -518,6 +541,12 @@ def to_gbq(dataframe, destination_table, project_id, chunksize=10000,
     connector.load_data(dataframe, dataset_id, table_id, chunksize, verbose)
 
 
+def authorize():
+    """ Allows users to create the credentials file required for BigQuery authorization """
+
+    GbqConnector(reauth=True)
+
+
 def generate_bq_schema(df, default_type='STRING'):
 
     # deprecation TimeSeries, #11121
@@ -526,6 +555,7 @@ def generate_bq_schema(df, default_type='STRING'):
 
     return _generate_bq_schema(df, default_type=default_type)
 
+
 def _generate_bq_schema(df, default_type='STRING'):
     """ Given a passed df, generate the associated Google BigQuery schema.
 
@@ -554,6 +584,7 @@ def _generate_bq_schema(df, default_type='STRING'):
 
     return {'fields': fields}
 
+
 class _Table(GbqConnector):
 
     def __init__(self, project_id, dataset_id, reauth=False):

diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py
@@ -29,7 +29,7 @@
 _SETUPTOOLS_INSTALLED = False
 
 
-def _test_imports():
+def validate_imports():
     global _GOOGLE_API_CLIENT_INSTALLED, _GOOGLE_API_CLIENT_VALID_VERSION, \
            _HTTPLIB2_INSTALLED, _SETUPTOOLS_INSTALLED
 
@@ -83,13 +83,22 @@ def _test_imports():
         raise ImportError("pandas requires httplib2 for Google BigQuery support")
 
 
-def test_requirements():
+def validate_requirements():
     try:
-        _test_imports()
+        validate_imports()
     except (ImportError, NotImplementedError) as import_exception:
         raise nose.SkipTest(import_exception)
 
 
+def validate_authorization():
+    try:
+        gbq.GbqConnector(PROJECT_ID)
+    except gbq.AccessDenied:
+        gbq.authorize()
+    except ImportError as import_exception:
+        raise nose.SkipTest(import_exception)
+
+
 def clean_gbq_environment():
     dataset = gbq._Dataset(PROJECT_ID)
 
@@ -126,12 +135,20 @@ def test_generate_bq_schema_deprecated():
         gbq.generate_bq_schema(df)
 
 class TestGBQConnectorIntegration(tm.TestCase):
-    def setUp(self):
-        test_requirements()
+
+    @classmethod
+    def setUpClass(cls):
+        # - GLOBAL CLASS FIXTURES -
+        # put here any instruction you want to execute only *ONCE* *BEFORE* executing *ALL* tests
+        # described below.
 
         if not PROJECT_ID:
             raise nose.SkipTest("Cannot run integration tests without a project id")
 
+        validate_requirements()
+        validate_authorization()
+
+    def setUp(self):
         self.sut = gbq.GbqConnector(PROJECT_ID)
 
     def test_should_be_able_to_make_a_connector(self):
@@ -157,7 +174,7 @@ def test_should_be_able_to_get_results_from_query(self):
 
 class TestReadGBQUnitTests(tm.TestCase):
     def setUp(self):
-        test_requirements()
+        validate_requirements()
 
     def test_should_return_bigquery_integers_as_python_floats(self):
         result = gbq._parse_entry(1, 'INTEGER')
@@ -201,6 +218,7 @@ def test_that_parse_data_works_properly(self):
 
 
 class TestReadGBQIntegration(tm.TestCase):
+
     @classmethod
     def setUpClass(cls):
         # - GLOBAL CLASS FIXTURES -
@@ -210,7 +228,7 @@ def setUpClass(cls):
         if not PROJECT_ID:
             raise nose.SkipTest("Cannot run integration tests without a project id")
 
-        test_requirements()
+        validate_requirements()
 
     def setUp(self):
         # - PER-TEST FIXTURES -
@@ -373,7 +391,8 @@ def setUpClass(cls):
         if not PROJECT_ID:
             raise nose.SkipTest("Cannot run integration tests without a project id")
 
-        test_requirements()
+        validate_requirements()
+        validate_authorization()
         clean_gbq_environment()
 
         gbq._Dataset(PROJECT_ID).create(DATASET_ID + "1")