Skip to content

Commit 55764ea

Browse files
BUG: Issue in the gbq module when authenticating on remote servers #8489
1 parent 89b4e5b commit 55764ea

File tree

4 files changed

+86
-23
lines changed

4 files changed

+86
-23
lines changed

doc/source/io.rst

+17-7
Original file line numberDiff line numberDiff line change
@@ -4059,13 +4059,30 @@ The key functions are:
40594059
.. autosummary::
40604060
:toctree: generated/
40614061

4062+
authorize
40624063
read_gbq
40634064
to_gbq
40644065

40654066
.. currentmodule:: pandas
40664067

40674068
.. _io.bigquery_reader:
40684069

4070+
Authorization
4071+
'''''''''''''
4072+
4073+
Authorization is required in order to use the BigQuery API. You must call the
4074+
:func:`~pandas.io.gbq.authorize` function to start the authorization process. In general,
4075+
this is as simple as following the prompts in a browser. A code will be provided to complete
4076+
the process. A credentials file will be saved to disk so that you only need to authorize once
4077+
as long as the credentials have not been revoked. Additional information on the authentication
4078+
can be found `here <https://cloud.google.com/bigquery/authentication?hl=en/>`__.
4079+
4080+
To begin the authorization process, use the :func:`~pandas.io.gbq.authorize` function
4081+
4082+
.. code-block:: python
4083+
4084+
gbq.authorize()
4085+
40694086
Querying
40704087
''''''''
40714088

@@ -4080,13 +4097,6 @@ into a DataFrame using the :func:`~pandas.io.gbq.read_gbq` function.
40804097
40814098
data_frame = pd.read_gbq('SELECT * FROM test_dataset.test_table', projectid)
40824099
4083-
You will then be authenticated to the specified BigQuery account
4084-
via Google's Oauth2 mechanism. In general, this is as simple as following the
4085-
prompts in a browser window which will be opened for you. Should the browser not
4086-
be available, or fail to launch, a code will be provided to complete the process
4087-
manually. Additional information on the authentication mechanism can be found
4088-
`here <https://developers.google.com/accounts/docs/OAuth2#clientside/>`__.
4089-
40904100
You can define which column from BigQuery to use as an index in the
40914101
destination DataFrame as well as a preferred column order as follows:
40924102

doc/source/whatsnew/v0.17.1.txt

+3
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ Enhancements
2323
.. _whatsnew_0171.enhancements.other:
2424

2525
- Improve the error message in :func:`pandas.io.gbq.to_gbq` when a streaming insert fails (:issue:`11285`)
26+
- Added :func:`pandas.io.gbq.authorize` to allow users to authenticate with Google BigQuery.
27+
See the :ref:`docs <io.bigquery>` for more details (:issue:`11141`).
2628

2729
Other Enhancements
2830
^^^^^^^^^^^^^^^^^^
@@ -97,3 +99,4 @@ Bug Fixes
9799
- Fixed a bug that prevented the construction of an empty series of dtype
98100
``datetime64[ns, tz]`` (:issue:`11245`).
99101
- Bug in ``DataFrame.to_dict()`` produces a ``np.datetime64`` object instead of ``Timestamp`` when only datetime is present in data (:issue:`11327`)
102+
- Resolve the issue where authentication on remote servers fails silently when using the gbq module. (:issue:`11141`)

pandas/io/gbq.py

+39-8
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
from pandas.util.decorators import deprecate
1616
from pandas.compat import lzip, bytes_to_str
1717

18+
CREDENTIALS_FILE = 'bigquery_credentials.dat'
19+
1820
def _check_google_client_version():
1921

2022
try:
@@ -109,7 +111,7 @@ class TableCreationError(PandasError, ValueError):
109111

110112
class GbqConnector(object):
111113

112-
def __init__(self, project_id, reauth=False):
114+
def __init__(self, project_id=None, reauth=False):
113115
self.test_google_api_imports()
114116
self.project_id = project_id
115117
self.reauth = reauth
@@ -128,23 +130,44 @@ def test_google_api_imports(self):
128130
except ImportError as e:
129131
raise ImportError("Missing module required for Google BigQuery support: {0}".format(str(e)))
130132

131-
def get_credentials(self):
133+
def authorize(self):
132134
from oauth2client.client import OAuth2WebServerFlow
133135
from oauth2client.file import Storage
134-
from oauth2client.tools import run_flow, argparser
135136

136137
_check_google_client_version()
137138

139+
storage = Storage(CREDENTIALS_FILE)
138140
flow = OAuth2WebServerFlow(client_id='495642085510-k0tmvj2m941jhre2nbqka17vqpjfddtd.apps.googleusercontent.com',
139141
client_secret='kOc9wMptUtxkcIFbtZCcrEAc',
140142
scope='https://www.googleapis.com/auth/bigquery',
141143
redirect_uri='urn:ietf:wg:oauth:2.0:oob')
144+
print('Please visit the following url to obtain an authorization code: {0}'.format(flow.step1_get_authorize_url()))
145+
146+
authorization_prompt_message = 'Enter authorization code and press enter: '
147+
148+
if compat.PY3:
149+
code = eval(input(authorization_prompt_message))
150+
else:
151+
code = raw_input(authorization_prompt_message)
142152

143-
storage = Storage('bigquery_credentials.dat')
153+
code.strip()
154+
storage.put(flow.step2_exchange(code))
144155
credentials = storage.get()
145156

146-
if credentials is None or credentials.invalid or self.reauth:
147-
credentials = run_flow(flow, storage, argparser.parse_args([]))
157+
return credentials
158+
159+
def get_credentials(self):
160+
from oauth2client.file import Storage
161+
162+
_check_google_client_version()
163+
164+
credentials = Storage(CREDENTIALS_FILE).get()
165+
166+
if self.reauth:
167+
credentials = self.authorize()
168+
169+
if credentials is None or credentials.invalid:
170+
raise AccessDenied("The credentials are missing or invalid. Please run gbq.authorize().")
148171

149172
return credentials
150173

@@ -215,8 +238,8 @@ def run_query(self, query, verbose=True):
215238
try:
216239
query_reply = job_collection.insert(projectId=self.project_id, body=job_data).execute()
217240
except AccessTokenRefreshError:
218-
raise AccessDenied("The credentials have been revoked or expired, please re-run the application "
219-
"to re-authorize")
241+
raise AccessDenied("The credentials have been revoked or expired, please run gbq.authorize() "
242+
"to re-authorize.")
220243
except HttpError as ex:
221244
self.process_http_error(ex)
222245

@@ -518,6 +541,12 @@ def to_gbq(dataframe, destination_table, project_id, chunksize=10000,
518541
connector.load_data(dataframe, dataset_id, table_id, chunksize, verbose)
519542

520543

544+
def authorize():
545+
""" Allows users to create the credentials file required for BigQuery authorization """
546+
547+
GbqConnector(reauth=True)
548+
549+
521550
def generate_bq_schema(df, default_type='STRING'):
522551

523552
# deprecation TimeSeries, #11121
@@ -526,6 +555,7 @@ def generate_bq_schema(df, default_type='STRING'):
526555

527556
return _generate_bq_schema(df, default_type=default_type)
528557

558+
529559
def _generate_bq_schema(df, default_type='STRING'):
530560
""" Given a passed df, generate the associated Google BigQuery schema.
531561
@@ -554,6 +584,7 @@ def _generate_bq_schema(df, default_type='STRING'):
554584

555585
return {'fields': fields}
556586

587+
557588
class _Table(GbqConnector):
558589

559590
def __init__(self, project_id, dataset_id, reauth=False):

pandas/io/tests/test_gbq.py

+27-8
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
_SETUPTOOLS_INSTALLED = False
3030

3131

32-
def _test_imports():
32+
def validate_imports():
3333
global _GOOGLE_API_CLIENT_INSTALLED, _GOOGLE_API_CLIENT_VALID_VERSION, \
3434
_HTTPLIB2_INSTALLED, _SETUPTOOLS_INSTALLED
3535

@@ -83,13 +83,22 @@ def _test_imports():
8383
raise ImportError("pandas requires httplib2 for Google BigQuery support")
8484

8585

86-
def test_requirements():
86+
def validate_requirements():
8787
try:
88-
_test_imports()
88+
validate_imports()
8989
except (ImportError, NotImplementedError) as import_exception:
9090
raise nose.SkipTest(import_exception)
9191

9292

93+
def validate_authorization():
94+
try:
95+
gbq.GbqConnector(PROJECT_ID)
96+
except gbq.AccessDenied:
97+
gbq.authorize()
98+
except ImportError as import_exception:
99+
raise nose.SkipTest(import_exception)
100+
101+
93102
def clean_gbq_environment():
94103
dataset = gbq._Dataset(PROJECT_ID)
95104

@@ -126,12 +135,20 @@ def test_generate_bq_schema_deprecated():
126135
gbq.generate_bq_schema(df)
127136

128137
class TestGBQConnectorIntegration(tm.TestCase):
129-
def setUp(self):
130-
test_requirements()
138+
139+
@classmethod
140+
def setUpClass(cls):
141+
# - GLOBAL CLASS FIXTURES -
142+
# put here any instruction you want to execute only *ONCE* *BEFORE* executing *ALL* tests
143+
# described below.
131144

132145
if not PROJECT_ID:
133146
raise nose.SkipTest("Cannot run integration tests without a project id")
134147

148+
validate_requirements()
149+
validate_authorization()
150+
151+
def setUp(self):
135152
self.sut = gbq.GbqConnector(PROJECT_ID)
136153

137154
def test_should_be_able_to_make_a_connector(self):
@@ -157,7 +174,7 @@ def test_should_be_able_to_get_results_from_query(self):
157174

158175
class TestReadGBQUnitTests(tm.TestCase):
159176
def setUp(self):
160-
test_requirements()
177+
validate_requirements()
161178

162179
def test_should_return_bigquery_integers_as_python_floats(self):
163180
result = gbq._parse_entry(1, 'INTEGER')
@@ -201,6 +218,7 @@ def test_that_parse_data_works_properly(self):
201218

202219

203220
class TestReadGBQIntegration(tm.TestCase):
221+
204222
@classmethod
205223
def setUpClass(cls):
206224
# - GLOBAL CLASS FIXTURES -
@@ -210,7 +228,7 @@ def setUpClass(cls):
210228
if not PROJECT_ID:
211229
raise nose.SkipTest("Cannot run integration tests without a project id")
212230

213-
test_requirements()
231+
validate_requirements()
214232

215233
def setUp(self):
216234
# - PER-TEST FIXTURES -
@@ -373,7 +391,8 @@ def setUpClass(cls):
373391
if not PROJECT_ID:
374392
raise nose.SkipTest("Cannot run integration tests without a project id")
375393

376-
test_requirements()
394+
validate_requirements()
395+
validate_authorization()
377396
clean_gbq_environment()
378397

379398
gbq._Dataset(PROJECT_ID).create(DATASET_ID + "1")

0 commit comments

Comments
 (0)