diff --git a/doc/source/io.rst b/doc/source/io.rst
index 014daa3f68dbb..e04d2ae569d23 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -4059,6 +4059,7 @@ The key functions are:
.. autosummary::
:toctree: generated/
+ authorize
read_gbq
to_gbq
@@ -4066,6 +4067,22 @@ The key functions are:
.. _io.bigquery_reader:
+Authorization
+'''''''''''''
+
+Authorization is required in order to use the BigQuery API. You must call the
+:func:`~pandas.io.gbq.authorize` function to start the authorization process. In general,
+this is as simple as following the prompts in a browser. A code will be provided to complete
+the process. A credentials file will be saved to disk so that you only need to authorize once
+as long as the credentials have not been revoked. Additional information on the authentication
+can be found `here `__.
+
+To begin the authorization process, use the :func:`~pandas.io.gbq.authorize` function
+
+.. code-block:: python
+
+ gbq.authorize()
+
Querying
''''''''
@@ -4080,13 +4097,6 @@ into a DataFrame using the :func:`~pandas.io.gbq.read_gbq` function.
data_frame = pd.read_gbq('SELECT * FROM test_dataset.test_table', projectid)
-You will then be authenticated to the specified BigQuery account
-via Google's Oauth2 mechanism. In general, this is as simple as following the
-prompts in a browser window which will be opened for you. Should the browser not
-be available, or fail to launch, a code will be provided to complete the process
-manually. Additional information on the authentication mechanism can be found
-`here `__.
-
You can define which column from BigQuery to use as an index in the
destination DataFrame as well as a preferred column order as follows:
diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt
index 6d4b61bb97f22..7657b74a3bb78 100755
--- a/doc/source/whatsnew/v0.17.1.txt
+++ b/doc/source/whatsnew/v0.17.1.txt
@@ -23,6 +23,8 @@ Enhancements
.. _whatsnew_0171.enhancements.other:
- Improve the error message in :func:`pandas.io.gbq.to_gbq` when a streaming insert fails (:issue:`11285`)
+- Added :func:`pandas.io.gbq.authorize` to allow users to authenticate with Google BigQuery.
+ See the :ref:`docs ` for more details (:issue:`11141`).
Other Enhancements
^^^^^^^^^^^^^^^^^^
@@ -97,3 +99,4 @@ Bug Fixes
- Fixed a bug that prevented the construction of an empty series of dtype
``datetime64[ns, tz]`` (:issue:`11245`).
- Bug in ``DataFrame.to_dict()`` produces a ``np.datetime64`` object instead of ``Timestamp`` when only datetime is present in data (:issue:`11327`)
+- Resolve the issue where authentication on remote servers fails silently when using the gbq module. (:issue:`11141`)
diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py
index e7241036b94c4..d68466316586b 100644
--- a/pandas/io/gbq.py
+++ b/pandas/io/gbq.py
@@ -15,6 +15,8 @@
from pandas.util.decorators import deprecate
from pandas.compat import lzip, bytes_to_str
+CREDENTIALS_FILE = 'bigquery_credentials.dat'
+
def _check_google_client_version():
try:
@@ -109,7 +111,7 @@ class TableCreationError(PandasError, ValueError):
class GbqConnector(object):
- def __init__(self, project_id, reauth=False):
+ def __init__(self, project_id=None, reauth=False):
self.test_google_api_imports()
self.project_id = project_id
self.reauth = reauth
@@ -128,23 +130,44 @@ def test_google_api_imports(self):
except ImportError as e:
raise ImportError("Missing module required for Google BigQuery support: {0}".format(str(e)))
- def get_credentials(self):
+ def authorize(self):
from oauth2client.client import OAuth2WebServerFlow
from oauth2client.file import Storage
- from oauth2client.tools import run_flow, argparser
_check_google_client_version()
+ storage = Storage(CREDENTIALS_FILE)
flow = OAuth2WebServerFlow(client_id='495642085510-k0tmvj2m941jhre2nbqka17vqpjfddtd.apps.googleusercontent.com',
client_secret='kOc9wMptUtxkcIFbtZCcrEAc',
scope='https://www.googleapis.com/auth/bigquery',
redirect_uri='urn:ietf:wg:oauth:2.0:oob')
+ print('Please visit the following url to obtain an authorization code: {0}'.format(flow.step1_get_authorize_url()))
+
+ authorization_prompt_message = 'Enter authorization code and press enter: '
+
+ if compat.PY3:
+ code = eval(input(authorization_prompt_message))
+ else:
+ code = raw_input(authorization_prompt_message)
- storage = Storage('bigquery_credentials.dat')
+ code.strip()
+ storage.put(flow.step2_exchange(code))
credentials = storage.get()
- if credentials is None or credentials.invalid or self.reauth:
- credentials = run_flow(flow, storage, argparser.parse_args([]))
+ return credentials
+
+ def get_credentials(self):
+ from oauth2client.file import Storage
+
+ _check_google_client_version()
+
+ credentials = Storage(CREDENTIALS_FILE).get()
+
+ if self.reauth:
+ credentials = self.authorize()
+
+ if credentials is None or credentials.invalid:
+ raise AccessDenied("The credentials are missing or invalid. Please run gbq.authorize().")
return credentials
@@ -215,8 +238,8 @@ def run_query(self, query, verbose=True):
try:
query_reply = job_collection.insert(projectId=self.project_id, body=job_data).execute()
except AccessTokenRefreshError:
- raise AccessDenied("The credentials have been revoked or expired, please re-run the application "
- "to re-authorize")
+ raise AccessDenied("The credentials have been revoked or expired, please run gbq.authorize() "
+ "to re-authorize.")
except HttpError as ex:
self.process_http_error(ex)
@@ -518,6 +541,12 @@ def to_gbq(dataframe, destination_table, project_id, chunksize=10000,
connector.load_data(dataframe, dataset_id, table_id, chunksize, verbose)
+def authorize():
+ """ Allows users to create the credentials file required for BigQuery authorization """
+
+ GbqConnector(reauth=True)
+
+
def generate_bq_schema(df, default_type='STRING'):
# deprecation TimeSeries, #11121
@@ -526,6 +555,7 @@ def generate_bq_schema(df, default_type='STRING'):
return _generate_bq_schema(df, default_type=default_type)
+
def _generate_bq_schema(df, default_type='STRING'):
""" Given a passed df, generate the associated Google BigQuery schema.
@@ -554,6 +584,7 @@ def _generate_bq_schema(df, default_type='STRING'):
return {'fields': fields}
+
class _Table(GbqConnector):
def __init__(self, project_id, dataset_id, reauth=False):
diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py
index cc1e901d8f119..f3df2bfa0959b 100644
--- a/pandas/io/tests/test_gbq.py
+++ b/pandas/io/tests/test_gbq.py
@@ -29,7 +29,7 @@
_SETUPTOOLS_INSTALLED = False
-def _test_imports():
+def validate_imports():
global _GOOGLE_API_CLIENT_INSTALLED, _GOOGLE_API_CLIENT_VALID_VERSION, \
_HTTPLIB2_INSTALLED, _SETUPTOOLS_INSTALLED
@@ -83,13 +83,22 @@ def _test_imports():
raise ImportError("pandas requires httplib2 for Google BigQuery support")
-def test_requirements():
+def validate_requirements():
try:
- _test_imports()
+ validate_imports()
except (ImportError, NotImplementedError) as import_exception:
raise nose.SkipTest(import_exception)
+def validate_authorization():
+ try:
+ gbq.GbqConnector(PROJECT_ID)
+ except gbq.AccessDenied:
+ gbq.authorize()
+ except ImportError as import_exception:
+ raise nose.SkipTest(import_exception)
+
+
def clean_gbq_environment():
dataset = gbq._Dataset(PROJECT_ID)
@@ -126,12 +135,20 @@ def test_generate_bq_schema_deprecated():
gbq.generate_bq_schema(df)
class TestGBQConnectorIntegration(tm.TestCase):
- def setUp(self):
- test_requirements()
+
+ @classmethod
+ def setUpClass(cls):
+ # - GLOBAL CLASS FIXTURES -
+ # put here any instruction you want to execute only *ONCE* *BEFORE* executing *ALL* tests
+ # described below.
if not PROJECT_ID:
raise nose.SkipTest("Cannot run integration tests without a project id")
+ validate_requirements()
+ validate_authorization()
+
+ def setUp(self):
self.sut = gbq.GbqConnector(PROJECT_ID)
def test_should_be_able_to_make_a_connector(self):
@@ -157,7 +174,7 @@ def test_should_be_able_to_get_results_from_query(self):
class TestReadGBQUnitTests(tm.TestCase):
def setUp(self):
- test_requirements()
+ validate_requirements()
def test_should_return_bigquery_integers_as_python_floats(self):
result = gbq._parse_entry(1, 'INTEGER')
@@ -201,6 +218,7 @@ def test_that_parse_data_works_properly(self):
class TestReadGBQIntegration(tm.TestCase):
+
@classmethod
def setUpClass(cls):
# - GLOBAL CLASS FIXTURES -
@@ -210,7 +228,7 @@ def setUpClass(cls):
if not PROJECT_ID:
raise nose.SkipTest("Cannot run integration tests without a project id")
- test_requirements()
+ validate_requirements()
def setUp(self):
# - PER-TEST FIXTURES -
@@ -373,7 +391,8 @@ def setUpClass(cls):
if not PROJECT_ID:
raise nose.SkipTest("Cannot run integration tests without a project id")
- test_requirements()
+ validate_requirements()
+ validate_authorization()
clean_gbq_environment()
gbq._Dataset(PROJECT_ID).create(DATASET_ID + "1")