Skip to content

Add Python 3 support and optional parameter "silent" for read_gbq #10572

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 12 additions & 11 deletions pandas/io/gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@


def _check_google_client_version():
if compat.PY3:
raise NotImplementedError("Google's libraries do not support Python 3 yet")

try:
import pkg_resources

Expand All @@ -26,8 +23,9 @@ def _check_google_client_version():

_GOOGLE_API_CLIENT_VERSION = pkg_resources.get_distribution('google-api-python-client').version

if LooseVersion(_GOOGLE_API_CLIENT_VERSION) < '1.2.0':
raise ImportError("pandas requires google-api-python-client >= 1.2.0 for Google "
required_version = '1.4.0' if compat.PY3 else '1.2.0'
if LooseVersion(_GOOGLE_API_CLIENT_VERSION) < required_version:
raise ImportError("pandas requires google-api-python-client >= " + required_version + " for Google "
"BigQuery support, current version " + _GOOGLE_API_CLIENT_VERSION)

logger = logging.getLogger('pandas.io.gbq')
Expand Down Expand Up @@ -133,7 +131,7 @@ def get_service(self, credentials):

return bigquery_service

def run_query(self, query):
def run_query(self, query, silent):
try:
from apiclient.errors import HttpError
from oauth2client.client import AccessTokenRefreshError
Expand Down Expand Up @@ -182,7 +180,8 @@ def run_query(self, query):
job_reference = query_reply['jobReference']

while(not query_reply.get('jobComplete', False)):
print('Job not yet complete...')
if not silent:
print('Job is not yet complete...')
query_reply = job_collection.getQueryResults(
projectId=job_reference['projectId'],
jobId=job_reference['jobId']).execute()
Expand Down Expand Up @@ -267,10 +266,10 @@ def _parse_data(schema, rows):

fields = schema['fields']
col_types = [field['type'] for field in fields]
col_names = [field['name'].encode('ascii', 'ignore') for field in fields]
col_names = [field['name'] for field in fields]
col_dtypes = [dtype_map.get(field['type'], object) for field in fields]
page_array = np.zeros((len(rows),),
dtype=zip(col_names, col_dtypes))
dtype=list(zip(col_names, col_dtypes)))

for row_num, raw_row in enumerate(rows):
entries = raw_row.get('f', [])
Expand All @@ -294,7 +293,7 @@ def _parse_entry(field_value, field_type):
return field_value


def read_gbq(query, project_id=None, index_col=None, col_order=None, reauth=False):
def read_gbq(query, project_id=None, index_col=None, col_order=None, reauth=False, silent = False):
"""Load data from Google BigQuery.

THIS IS AN EXPERIMENTAL LIBRARY
Expand All @@ -319,6 +318,8 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, reauth=Fals
reauth : boolean (default False)
Force Google BigQuery to reauthenticate the user. This is useful
if multiple accounts are used.
silent : boolean (default False)
Do not print status messages during query execution if True

Returns
-------
Expand All @@ -332,7 +333,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, reauth=Fals
raise TypeError("Missing required parameter: project_id")

connector = GbqConnector(project_id, reauth = reauth)
schema, pages = connector.run_query(query)
schema, pages = connector.run_query(query, silent = silent)
dataframe_list = []
while len(pages) > 0:
page = pages.pop()
Expand Down
69 changes: 37 additions & 32 deletions pandas/io/tests/test_gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import sys
import platform
from time import sleep
from io import StringIO

import numpy as np

Expand Down Expand Up @@ -39,55 +40,51 @@ def missing_bq():
return True

def _test_imports():
if not compat.PY3:
required_version = '1.4.0' if compat.PY3 else '1.2.0'

global _GOOGLE_API_CLIENT_INSTALLED, _GOOGLE_API_CLIENT_VALID_VERSION, \
_HTTPLIB2_INSTALLED, _SETUPTOOLS_INSTALLED
global _GOOGLE_API_CLIENT_INSTALLED, _GOOGLE_API_CLIENT_VALID_VERSION, \
_HTTPLIB2_INSTALLED, _SETUPTOOLS_INSTALLED

try:
import pkg_resources
_SETUPTOOLS_INSTALLED = True
except ImportError:
_SETUPTOOLS_INSTALLED = False
try:
import pkg_resources
_SETUPTOOLS_INSTALLED = True
except ImportError:
_SETUPTOOLS_INSTALLED = False

if _SETUPTOOLS_INSTALLED:
try:
from apiclient.discovery import build
from apiclient.errors import HttpError
if _SETUPTOOLS_INSTALLED:
try:
from apiclient.discovery import build
from apiclient.errors import HttpError

from oauth2client.client import OAuth2WebServerFlow
from oauth2client.client import AccessTokenRefreshError
from oauth2client.client import OAuth2WebServerFlow
from oauth2client.client import AccessTokenRefreshError

from oauth2client.file import Storage
from oauth2client.tools import run_flow
_GOOGLE_API_CLIENT_INSTALLED=True
_GOOGLE_API_CLIENT_VERSION = pkg_resources.get_distribution('google-api-python-client').version
from oauth2client.file import Storage
from oauth2client.tools import run_flow
_GOOGLE_API_CLIENT_INSTALLED=True
_GOOGLE_API_CLIENT_VERSION = pkg_resources.get_distribution('google-api-python-client').version

if LooseVersion(_GOOGLE_API_CLIENT_VERSION) >= '1.2.0':
_GOOGLE_API_CLIENT_VALID_VERSION = True
if LooseVersion(_GOOGLE_API_CLIENT_VERSION) >= required_version:
_GOOGLE_API_CLIENT_VALID_VERSION = True

except ImportError:
_GOOGLE_API_CLIENT_INSTALLED = False
except ImportError:
_GOOGLE_API_CLIENT_INSTALLED = False


try:
import httplib2
_HTTPLIB2_INSTALLED = True
except ImportError:
_HTTPLIB2_INSTALLED = False
try:
import httplib2
_HTTPLIB2_INSTALLED = True
except ImportError:
_HTTPLIB2_INSTALLED = False


if compat.PY3:
raise NotImplementedError("Google's libraries do not support Python 3 yet")

if not _SETUPTOOLS_INSTALLED:
raise ImportError('Could not import pkg_resources (setuptools).')

if not _GOOGLE_API_CLIENT_INSTALLED:
raise ImportError('Could not import Google API Client.')

if not _GOOGLE_API_CLIENT_VALID_VERSION:
raise ImportError("pandas requires google-api-python-client >= 1.2.0 for Google "
raise ImportError("pandas requires google-api-python-client >= " + required_version + " for Google "
"BigQuery support, current version " + _GOOGLE_API_CLIENT_VERSION)

if not _HTTPLIB2_INSTALLED:
Expand Down Expand Up @@ -295,6 +292,14 @@ def test_download_dataset_larger_than_200k_rows(self):
# http://stackoverflow.com/questions/19145587/bq-py-not-paging-results
df = gbq.read_gbq("SELECT id FROM [publicdata:samples.wikipedia] GROUP EACH BY id ORDER BY id ASC LIMIT 200005", project_id=PROJECT_ID)
self.assertEqual(len(df.drop_duplicates()), 200005)

def test_silent_option_true(self):
stdout = sys.stdout
sys.stdout = StringIO()
gbq.read_gbq("SELECT 3", project_id = PROJECT_ID, silent = True)
output = sys.stdout.getvalue()
sys.stdout = stdout
tm.assert_equal(output, "")

class TestToGBQIntegration(tm.TestCase):
# This class requires bq.py to be installed for setup/teardown.
Expand Down