From 83b66c2708af326655d676f451515d2f71c567e5 Mon Sep 17 00:00:00 2001
From: Jason Ng <jasonng@spotify.com>
Date: Fri, 7 Apr 2017 22:07:02 -0400
Subject: [PATCH 01/42] Add new from_gbq() method and tweak requirements

---
 pandas_gbq/gbq.py | 128 ++++++++++++++++++++++++++++++++++++++++++++++
 requirements.txt  |   2 +
 2 files changed, 130 insertions(+)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 9473b082..b2159d02 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -6,12 +6,14 @@
 import time
 import sys
 import os
+import uuid
 
 import numpy as np
 
 from distutils.version import StrictVersion
 from pandas import compat, DataFrame, concat
 from pandas.compat import lzip, bytes_to_str
+from google.cloud import bigquery
 
 
 def _check_google_client_version():
@@ -1014,6 +1016,132 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
 
     return final_df
 
+def from_gbq(query, project_id=None, index_col=None, col_order=None,
+             private_key=None, dialect='legacy', configuration = None, **kwargs):
+    r"""Load data from Google BigQuery using google-cloud-python
+
+    The main method a user calls to execute a Query in Google BigQuery
+    and read results into a pandas DataFrame.
+
+    The Google Cloud library is used.
+    Documentation is available `here
+    <https://googlecloudplatform.github.io/google-cloud-python/stable/>`__
+
+    Authentication via Google Cloud can be performed a number of ways, see:
+    <https://googlecloudplatform.github.io/google-cloud-python/stable/google-cloud-auth.html>
+    The easiest is to download a service account JSON keyfile and point to it using 
+    an environment variable:
+    `$ export GOOGLE_APPLICATION_CREDENTIALS="/path/to/keyfile.json"`
+
+    Parameters
+    ----------
+    query : str
+        SQL-Like Query to return data values
+    project_id : str (optional)
+        Google BigQuery Account project ID.
+    index_col : str (optional)
+        Name of result column to use for index in results DataFrame
+    col_order : list(str) (optional)
+        List of BigQuery column names in the desired order for results
+        DataFrame
+    private_key : str (optional)
+        Path to service account private key in JSON format. If none is provided,
+        will default to the GOOGLE_APPLICATION_CREDENTIALS environment variable
+        or another form of authentication (see above)
+    dialect : {'legacy', 'standard'}, default 'legacy'
+        'legacy' : Use BigQuery's legacy SQL dialect.
+        'standard' : Use BigQuery's standard SQL (beta), which is
+        compliant with the SQL 2011 standard. For more information
+        see `BigQuery SQL Reference
+        <https://cloud.google.com/bigquery/sql-reference/>`__
+    configuration : dict (optional)
+        Because of current limitations (https://github.com/GoogleCloudPlatform/google-cloud-python/issues/2765)
+        only a certain number of configuration settings are currently implemented. You can set them with
+        like: `from_gbq(q,configuration={'allow_large_results':True,'use_legacy_sql':False})`
+        Allowable settings: 
+        -allow_large_results
+        -create_disposition
+        -default_dataset
+        -destination
+        -flatten_results
+        -priority
+        -use_query_cache
+        -use_legacy_sql
+        -dry_run
+        -write_disposition
+        -maximum_billing_tier
+        -maximum_bytes_billed
+
+    Returns
+    -------
+    df: DataFrame
+        DataFrame representing results of query
+
+    """
+
+    if private_key:
+        os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = private_key
+
+    def _wait_for_job(job):
+        while True:
+            job.reload()  # Refreshes the state via a GET request.
+            if job.state == 'DONE':
+                if job.error_result:
+                    raise RuntimeError(job.errors)
+                return
+            time.sleep(1)
+            
+    client = bigquery.Client(project=project_id)
+    query_job = client.run_async_query(str(uuid.uuid4()), query)
+
+    if dialect != 'legacy':
+        query_job.use_legacy_sql = False
+
+    if configuration:
+        for setting, value in configuration.items():
+            setattr(query_job, setting, value)
+
+    query_job.begin()
+    _wait_for_job(query_job)
+
+    query_results = query_job.results()
+
+    rows, total_rows, page_token = query_results.fetch_data()
+    columns=[field.name for field in query_results.schema]
+    data = rows
+
+    final_df = DataFrame(data=data,columns=columns)
+
+    # Reindex the DataFrame on the provided column
+    if index_col is not None:
+        if index_col in final_df.columns:
+            final_df.set_index(index_col, inplace=True)
+        else:
+            raise InvalidIndexColumn(
+                'Index column "{0}" does not exist in DataFrame.'
+                .format(index_col)
+            )
+
+    # Change the order of columns in the DataFrame based on provided list
+    if col_order is not None:
+        if sorted(col_order) == sorted(final_df.columns):
+            final_df = final_df[col_order]
+        else:
+            raise InvalidColumnOrder(
+                'Column order does not match this DataFrame.'
+            )
+
+    # cast BOOLEAN and INTEGER columns from object to bool/int
+    # if they dont have any nulls
+    type_map = {'BOOLEAN': bool, 'INTEGER': int}
+    for field in query_results.schema:
+        if field.field_type in type_map and \
+                final_df[field.name].notnull().all():
+            final_df[field.name] = \
+                final_df[field.name].astype(type_map[field.field_type])
+
+    return final_df
+
 
 def to_gbq(dataframe, destination_table, project_id, chunksize=10000,
            verbose=True, reauth=False, if_exists='fail', private_key=None,
diff --git a/requirements.txt b/requirements.txt
index c72b5a5a..d9022078 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,3 +4,5 @@ google-api-python-client
 google-auth
 google-auth-httplib2
 google-auth-oauthlib
+google-cloud
+uuid

From 438ff590704446d07348c8c6735383bebdcf5813 Mon Sep 17 00:00:00 2001
From: Jason Ng <jasonng@spotify.com>
Date: Sat, 8 Apr 2017 00:53:49 -0400
Subject: [PATCH 02/42] Remove unecessary type conversion

Tweaking docstring
---
 pandas_gbq/gbq.py | 56 +++++++++++++++++------------------------------
 1 file changed, 20 insertions(+), 36 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index b2159d02..c90303fc 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -1017,7 +1017,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
     return final_df
 
 def from_gbq(query, project_id=None, index_col=None, col_order=None,
-             private_key=None, dialect='legacy', configuration = None, **kwargs):
+             private_key=None, dialect='legacy', configuration=None, **kwargs):
     r"""Load data from Google BigQuery using google-cloud-python
 
     The main method a user calls to execute a Query in Google BigQuery
@@ -1056,21 +1056,14 @@ def from_gbq(query, project_id=None, index_col=None, col_order=None,
         <https://cloud.google.com/bigquery/sql-reference/>`__
     configuration : dict (optional)
         Because of current limitations (https://github.com/GoogleCloudPlatform/google-cloud-python/issues/2765)
-        only a certain number of configuration settings are currently implemented. You can set them with
-        like: `from_gbq(q,configuration={'allow_large_results':True,'use_legacy_sql':False})`
-        Allowable settings: 
-        -allow_large_results
-        -create_disposition
-        -default_dataset
-        -destination
-        -flatten_results
-        -priority
-        -use_query_cache
-        -use_legacy_sql
-        -dry_run
-        -write_disposition
-        -maximum_billing_tier
-        -maximum_bytes_billed
+        only some configuration settings are currently implemented. You can pass them 
+        along like in the following: 
+        `from_gbq(q,configuration={'allow_large_results':True,'maximum_billing_tier':2})`
+        
+        Example allowable settings: 
+            allow_large_results, create_disposition, default_dataset, destination
+            flatten_results, priority, use_query_cache, use_legacy_sql, dry_run, 
+            write_disposition, udf_resources, maximum_billing_tier, maximum_bytes_billed
 
     Returns
     -------
@@ -1107,23 +1100,13 @@ def _wait_for_job(job):
     query_results = query_job.results()
 
     rows, total_rows, page_token = query_results.fetch_data()
-    columns=[field.name for field in query_results.schema]
+    columns = [field.name for field in query_results.schema]
     data = rows
 
     final_df = DataFrame(data=data,columns=columns)
 
-    # Reindex the DataFrame on the provided column
-    if index_col is not None:
-        if index_col in final_df.columns:
-            final_df.set_index(index_col, inplace=True)
-        else:
-            raise InvalidIndexColumn(
-                'Index column "{0}" does not exist in DataFrame.'
-                .format(index_col)
-            )
-
     # Change the order of columns in the DataFrame based on provided list
-    if col_order is not None:
+    if col_order:
         if sorted(col_order) == sorted(final_df.columns):
             final_df = final_df[col_order]
         else:
@@ -1131,14 +1114,15 @@ def _wait_for_job(job):
                 'Column order does not match this DataFrame.'
             )
 
-    # cast BOOLEAN and INTEGER columns from object to bool/int
-    # if they dont have any nulls
-    type_map = {'BOOLEAN': bool, 'INTEGER': int}
-    for field in query_results.schema:
-        if field.field_type in type_map and \
-                final_df[field.name].notnull().all():
-            final_df[field.name] = \
-                final_df[field.name].astype(type_map[field.field_type])
+    # Reindex the DataFrame on the provided column
+    if index_col:
+        if index_col in final_df.columns:
+            final_df.set_index(index_col, inplace=True)
+        else:
+            raise InvalidIndexColumn(
+                'Index column "{0}" does not exist in DataFrame.'
+                .format(index_col)
+            )
 
     return final_df
 

From 03a9548e53765c0cb00c687cc04b9ab2ce4b8ec0 Mon Sep 17 00:00:00 2001
From: Jason Ng <jasonng@spotify.com>
Date: Sun, 9 Apr 2017 09:59:08 -0400
Subject: [PATCH 03/42] Remove old read_gbq, rename from_gbq to read_gbq, and
 add verbose query job info

Remove locale import

Remove leftover >>>>>HEAD
---
 pandas_gbq/gbq.py | 58 +++++++++++++++++++++++++++--------------------
 1 file changed, 33 insertions(+), 25 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index c90303fc..9b3b55e9 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -455,28 +455,6 @@ def _print(self, msg, end='\n'):
             sys.stdout.write(msg + end)
             sys.stdout.flush()
 
-    def _start_timer(self):
-        self.start = time.time()
-
-    def get_elapsed_seconds(self):
-        return round(time.time() - self.start, 2)
-
-    def print_elapsed_seconds(self, prefix='Elapsed', postfix='s.',
-                              overlong=7):
-        sec = self.get_elapsed_seconds()
-        if sec > overlong:
-            self._print('{} {} {}'.format(prefix, sec, postfix))
-
-    # http://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
-    @staticmethod
-    def sizeof_fmt(num, suffix='B'):
-        fmt = "%3.1f %s%s"
-        for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
-            if abs(num) < 1024.0:
-                return fmt % (num, unit, suffix)
-            num /= 1024.0
-        return fmt % (num, 'Y', suffix)
-
     def get_service(self):
         import httplib2
         from google_auth_httplib2 import AuthorizedHttp
@@ -833,7 +811,6 @@ def delete_and_recreate_table(self, dataset_id, table_id, table_schema):
         table.create(table_id, table_schema)
         sleep(delay)
 
-
 def _get_credentials_file():
     return os.environ.get(
         'PANDAS_GBQ_CREDENTIALS_FILE')
@@ -1016,7 +993,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
 
     return final_df
 
-def from_gbq(query, project_id=None, index_col=None, col_order=None,
+def read_gbq(query, project_id=None, index_col=None, col_order=None, verbose=True,
              private_key=None, dialect='legacy', configuration=None, **kwargs):
     r"""Load data from Google BigQuery using google-cloud-python
 
@@ -1044,6 +1021,8 @@ def from_gbq(query, project_id=None, index_col=None, col_order=None,
     col_order : list(str) (optional)
         List of BigQuery column names in the desired order for results
         DataFrame
+    verbose : boolean (default True)
+        Verbose output
     private_key : str (optional)
         Path to service account private key in JSON format. If none is provided,
         will default to the GOOGLE_APPLICATION_CREDENTIALS environment variable
@@ -1072,6 +1051,15 @@ def from_gbq(query, project_id=None, index_col=None, col_order=None,
 
     """
 
+    # http://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
+    def sizeof_fmt(num, suffix='B'):
+        fmt = "%3.1f %s%s"
+        for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
+            if abs(num) < 1024.0:
+                return fmt % (num, unit, suffix)
+            num /= 1024.0
+        return fmt % (num, 'Y', suffix)
+
     if private_key:
         os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = private_key
 
@@ -1095,11 +1083,31 @@ def _wait_for_job(job):
             setattr(query_job, setting, value)
 
     query_job.begin()
-    _wait_for_job(query_job)
 
+    if verbose:
+        print("Query running...")
+    _wait_for_job(query_job)
+    if verbose:
+        print("Query done.")
+        if query_job._properties["statistics"]["query"].get("cacheHit", False):
+            print("Cache hit.")
+        elif "statistics" in query_job._properties and "query" in query_job._properties["statistics"]:
+            bytes_billed = int(query_job._properties["statistics"]["query"].get("totalBytesProcessed", 0))
+            bytes_processed = int(query_job._properties["statistics"]["query"].get("totalBytesBilled", 0))
+            print("Total bytes billed (processed): %s (%s)" % (sizeof_fmt(bytes_billed),sizeof_fmt(bytes_processed)))
     query_results = query_job.results()
 
+    if verbose:
+        print("\nRetrieving results...")
+
     rows, total_rows, page_token = query_results.fetch_data()
+
+    if verbose:
+        print("Got %s rows.") % total_rows
+        print("\nTotal time taken %s s" % (datetime.utcnow()-query_job.created.replace(tzinfo=None)).seconds)
+        print("Finished at %s." % datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
+        
+
     columns = [field.name for field in query_results.schema]
     data = rows
 

From ae7c0e98ad16ef28e03919879fc870c1a2ca2ad6 Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Tue, 3 Oct 2017 18:50:35 -0400
Subject: [PATCH 04/42] Handle new iterator return type from fetch_data

---
 pandas_gbq/gbq.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 9b3b55e9..35fe5ce1 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -1099,8 +1099,8 @@ def _wait_for_job(job):
 
     if verbose:
         print("\nRetrieving results...")
-
-    rows, total_rows, page_token = query_results.fetch_data()
+    rows = list(query_results.fetch_data())
+    total_rows = len(rows)
 
     if verbose:
         print("Got %s rows.") % total_rows

From d2d01bce737ebb7cec42078a9339af13b81f13da Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Wed, 4 Oct 2017 13:34:39 -0400
Subject: [PATCH 05/42] Remove old read_gbq

---
 pandas_gbq/gbq.py | 142 +---------------------------------------------
 1 file changed, 1 insertion(+), 141 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 35fe5ce1..96d7125b 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -851,147 +851,7 @@ def _parse_entry(field_value, field_type):
     elif field_type == 'BOOLEAN':
         return field_value == 'true'
     return field_value
-
-
-def read_gbq(query, project_id=None, index_col=None, col_order=None,
-             reauth=False, verbose=True, private_key=None,
-             auth_local_webserver=False, dialect='legacy', **kwargs):
-    r"""Load data from Google BigQuery.
-
-    The main method a user calls to execute a Query in Google BigQuery
-    and read results into a pandas DataFrame.
-
-    Google BigQuery API Client Library v2 for Python is used.
-    Documentation is available `here
-    <https://developers.google.com/api-client-library/python/apis/bigquery/v2>`__
-
-    Authentication to the Google BigQuery service is via OAuth 2.0.
-
-    - If "private_key" is not provided:
-
-      By default "application default credentials" are used.
-
-      If default application credentials are not found or are restrictive,
-      user account credentials are used. In this case, you will be asked to
-      grant permissions for product name 'pandas GBQ'.
-
-    - If "private_key" is provided:
-
-      Service account credentials will be used to authenticate.
-
-    Parameters
-    ----------
-    query : str
-        SQL-Like Query to return data values
-    project_id : str
-        Google BigQuery Account project ID.
-    index_col : str (optional)
-        Name of result column to use for index in results DataFrame
-    col_order : list(str) (optional)
-        List of BigQuery column names in the desired order for results
-        DataFrame
-    reauth : boolean (default False)
-        Force Google BigQuery to reauthenticate the user. This is useful
-        if multiple accounts are used.
-    verbose : boolean (default True)
-        Verbose output
-    private_key : str (optional)
-        Service account private key in JSON format. Can be file path
-        or string contents. This is useful for remote server
-        authentication (eg. jupyter iPython notebook on remote host)
-    auth_local_webserver : boolean, default False
-        Use the [local webserver flow] instead of the [console flow] when
-        getting user credentials. A file named bigquery_credentials.dat will
-        be created in current dir. You can also set PANDAS_GBQ_CREDENTIALS_FILE
-        environment variable so as to define a specific path to store this
-        credential (eg. /etc/keys/bigquery.dat).
-
-        .. [local webserver flow]
-            http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server
-        .. [console flow]
-            http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console
-        .. versionadded:: 0.2.0
-
-    dialect : {'legacy', 'standard'}, default 'legacy'
-        'legacy' : Use BigQuery's legacy SQL dialect.
-        'standard' : Use BigQuery's standard SQL (beta), which is
-        compliant with the SQL 2011 standard. For more information
-        see `BigQuery SQL Reference
-        <https://cloud.google.com/bigquery/sql-reference/>`__
-
-    **kwargs : Arbitrary keyword arguments
-        configuration (dict): query config parameters for job processing.
-        For example:
-
-            configuration = {'query': {'useQueryCache': False}}
-
-        For more information see `BigQuery SQL Reference
-        <https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query>`__
-
-    Returns
-    -------
-    df: DataFrame
-        DataFrame representing results of query
-
-    """
-
-    _test_google_api_imports()
-
-    if not project_id:
-        raise TypeError("Missing required parameter: project_id")
-
-    if dialect not in ('legacy', 'standard'):
-        raise ValueError("'{0}' is not valid for dialect".format(dialect))
-
-    connector = GbqConnector(
-        project_id, reauth=reauth, verbose=verbose, private_key=private_key,
-        dialect=dialect, auth_local_webserver=auth_local_webserver)
-    schema, pages = connector.run_query(query, **kwargs)
-    dataframe_list = []
-    while len(pages) > 0:
-        page = pages.pop()
-        dataframe_list.append(_parse_data(schema, page))
-
-    if len(dataframe_list) > 0:
-        final_df = concat(dataframe_list, ignore_index=True)
-    else:
-        final_df = _parse_data(schema, [])
-
-    # Reindex the DataFrame on the provided column
-    if index_col is not None:
-        if index_col in final_df.columns:
-            final_df.set_index(index_col, inplace=True)
-        else:
-            raise InvalidIndexColumn(
-                'Index column "{0}" does not exist in DataFrame.'
-                .format(index_col)
-            )
-
-    # Change the order of columns in the DataFrame based on provided list
-    if col_order is not None:
-        if sorted(col_order) == sorted(final_df.columns):
-            final_df = final_df[col_order]
-        else:
-            raise InvalidColumnOrder(
-                'Column order does not match this DataFrame.'
-            )
-
-    # cast BOOLEAN and INTEGER columns from object to bool/int
-    # if they dont have any nulls
-    type_map = {'BOOLEAN': bool, 'INTEGER': int}
-    for field in schema['fields']:
-        if field['type'] in type_map and \
-                final_df[field['name']].notnull().all():
-            final_df[field['name']] = \
-                final_df[field['name']].astype(type_map[field['type']])
-
-    connector.print_elapsed_seconds(
-        'Total time taken',
-        datetime.now().strftime('s.\nFinished at %Y-%m-%d %H:%M:%S.'),
-        0
-    )
-
-    return final_df
+    
 
 def read_gbq(query, project_id=None, index_col=None, col_order=None, verbose=True,
              private_key=None, dialect='legacy', configuration=None, **kwargs):

From 97410dd96da30b9432a690ac949798349b83ff48 Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Wed, 4 Oct 2017 13:53:03 -0400
Subject: [PATCH 06/42] Change requirement to google-cloud-bigquery and pin to
 0.25.0

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index d9022078..9d2969bc 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,5 +4,5 @@ google-api-python-client
 google-auth
 google-auth-httplib2
 google-auth-oauthlib
-google-cloud
+google-cloud-bigquery==0.25.0
 uuid

From a0847f8e17d7af74396520196558d2dc6247da3a Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Wed, 4 Oct 2017 15:52:10 -0400
Subject: [PATCH 07/42] Pass any private key to client for auth

---
 pandas_gbq/gbq.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 96d7125b..e7fd788d 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -920,9 +920,6 @@ def sizeof_fmt(num, suffix='B'):
             num /= 1024.0
         return fmt % (num, 'Y', suffix)
 
-    if private_key:
-        os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = private_key
-
     def _wait_for_job(job):
         while True:
             job.reload()  # Refreshes the state via a GET request.
@@ -931,8 +928,10 @@ def _wait_for_job(job):
                     raise RuntimeError(job.errors)
                 return
             time.sleep(1)
-            
-    client = bigquery.Client(project=project_id)
+    if private_key:
+        client = bigquery.Client(project=project_id).from_service_account_json(private_key)
+    else:        
+        client = bigquery.Client(project=project_id)
     query_job = client.run_async_query(str(uuid.uuid4()), query)
 
     if dialect != 'legacy':

From 12bacd8aba0fdc914d4f01dd16a903a5d22cbd4e Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Thu, 5 Oct 2017 13:53:50 -0400
Subject: [PATCH 08/42] Pin google-cloud-bigquery to 0.25.0 in setup.py

---
 pandas_gbq/gbq.py | 6 ++++--
 setup.py          | 1 +
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index e7fd788d..32c4b244 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -866,8 +866,10 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, verbose=Tru
 
     Authentication via Google Cloud can be performed a number of ways, see:
     <https://googlecloudplatform.github.io/google-cloud-python/stable/google-cloud-auth.html>
-    The easiest is to download a service account JSON keyfile and point to it using 
-    an environment variable:
+    The easiest is to download a service account JSON keyfile or generate user credentials via 
+    `gcloud auth application-default login` 
+    <https://cloud.google.com/sdk/gcloud/reference/auth/application-default/login>
+    and point to it using an environment variable:
     `$ export GOOGLE_APPLICATION_CREDENTIALS="/path/to/keyfile.json"`
 
     Parameters
diff --git a/setup.py b/setup.py
index df3cd85d..48c924bc 100644
--- a/setup.py
+++ b/setup.py
@@ -24,6 +24,7 @@ def readme():
     'google-auth>=1.0.0',
     'google-auth-httplib2>=0.0.1',
     'google-auth-oauthlib>=0.0.1',
+    'google-cloud-bigquery==0.25.0',
 ]
 
 

From f50bcca0fa7c042925de2e9b169bfc7da8b004d4 Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Thu, 5 Oct 2017 15:02:37 -0400
Subject: [PATCH 09/42] Catch renaming of results to result in 0.26.0

---
 pandas_gbq/gbq.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 32c4b244..cac565d9 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -956,7 +956,10 @@ def _wait_for_job(job):
             bytes_billed = int(query_job._properties["statistics"]["query"].get("totalBytesProcessed", 0))
             bytes_processed = int(query_job._properties["statistics"]["query"].get("totalBytesBilled", 0))
             print("Total bytes billed (processed): %s (%s)" % (sizeof_fmt(bytes_billed),sizeof_fmt(bytes_processed)))
-    query_results = query_job.results()
+    try:
+        query_results = query_job.results()
+    except:
+        query_results = query_job.result()
 
     if verbose:
         print("\nRetrieving results...")

From cd17b8d6c84a43df93074dc0183d88bbd7fb650e Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Thu, 5 Oct 2017 15:12:25 -0400
Subject: [PATCH 10/42] Pin requirements to include 0.26.0

---
 requirements.txt | 2 +-
 setup.py         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 9d2969bc..731224d2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,5 +4,5 @@ google-api-python-client
 google-auth
 google-auth-httplib2
 google-auth-oauthlib
-google-cloud-bigquery==0.25.0
+google-cloud-bigquery>=0.25.0,<=0.26.0
 uuid
diff --git a/setup.py b/setup.py
index 48c924bc..b6cdf68d 100644
--- a/setup.py
+++ b/setup.py
@@ -24,7 +24,7 @@ def readme():
     'google-auth>=1.0.0',
     'google-auth-httplib2>=0.0.1',
     'google-auth-oauthlib>=0.0.1',
-    'google-cloud-bigquery==0.25.0',
+    'google-cloud-bigquery>=0.25.0,<=0.26.0',
 ]
 
 

From 48ac79563a319c0d598c3f1fc485245be3791924 Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Thu, 5 Oct 2017 16:51:24 -0400
Subject: [PATCH 11/42] Remove legacy read_gbq code and add initial user auth
 flow

---
 pandas_gbq/gbq.py | 196 +++++-----------------------------------------
 1 file changed, 21 insertions(+), 175 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index cac565d9..7dbea89d 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -507,138 +507,6 @@ def process_insert_errors(self, insert_errors):
 
         raise StreamingInsertError
 
-    def run_query(self, query, **kwargs):
-        try:
-            from googleapiclient.errors import HttpError
-        except:
-            from apiclient.errors import HttpError
-        from google.auth.exceptions import RefreshError
-
-        job_collection = self.service.jobs()
-
-        job_config = {
-            'query': {
-                'query': query,
-                'useLegacySql': self.dialect == 'legacy'
-                # 'allowLargeResults', 'createDisposition',
-                # 'preserveNulls', destinationTable, useQueryCache
-            }
-        }
-        config = kwargs.get('configuration')
-        if config is not None:
-            if len(config) != 1:
-                raise ValueError("Only one job type must be specified, but "
-                                 "given {}".format(','.join(config.keys())))
-            if 'query' in config:
-                if 'query' in config['query'] and query is not None:
-                    raise ValueError("Query statement can't be specified "
-                                     "inside config while it is specified "
-                                     "as parameter")
-
-                job_config['query'].update(config['query'])
-            else:
-                raise ValueError("Only 'query' job type is supported")
-
-        job_data = {
-            'configuration': job_config
-        }
-
-        self._start_timer()
-        try:
-            self._print('Requesting query... ', end="")
-            query_reply = job_collection.insert(
-                projectId=self.project_id, body=job_data).execute()
-            self._print('ok.')
-        except (RefreshError, ValueError):
-            if self.private_key:
-                raise AccessDenied(
-                    "The service account credentials are not valid")
-            else:
-                raise AccessDenied(
-                    "The credentials have been revoked or expired, "
-                    "please re-run the application to re-authorize")
-        except HttpError as ex:
-            self.process_http_error(ex)
-
-        job_reference = query_reply['jobReference']
-        job_id = job_reference['jobId']
-        self._print('Job ID: %s\nQuery running...' % job_id)
-
-        while not query_reply.get('jobComplete', False):
-            self.print_elapsed_seconds('  Elapsed', 's. Waiting...')
-
-            timeout_ms = job_config['query'].get('timeoutMs')
-            if timeout_ms and timeout_ms < self.get_elapsed_seconds() * 1000:
-                raise QueryTimeout('Query timeout: {} ms'.format(timeout_ms))
-
-            try:
-                query_reply = job_collection.getQueryResults(
-                    projectId=job_reference['projectId'],
-                    jobId=job_id).execute()
-            except HttpError as ex:
-                self.process_http_error(ex)
-
-        if self.verbose:
-            if query_reply['cacheHit']:
-                self._print('Query done.\nCache hit.\n')
-            else:
-                bytes_processed = int(query_reply.get(
-                    'totalBytesProcessed', '0'))
-                self._print('Query done.\nProcessed: {}'.format(
-                    self.sizeof_fmt(bytes_processed)))
-                self._print('Standard price: ${:,.2f} USD\n'.format(
-                    bytes_processed * self.query_price_for_TB))
-
-            self._print('Retrieving results...')
-
-        total_rows = int(query_reply['totalRows'])
-        result_pages = list()
-        seen_page_tokens = list()
-        current_row = 0
-        # Only read schema on first page
-        schema = query_reply['schema']
-
-        # Loop through each page of data
-        while 'rows' in query_reply and current_row < total_rows:
-            page = query_reply['rows']
-            result_pages.append(page)
-            current_row += len(page)
-
-            self.print_elapsed_seconds(
-                '  Got page: {}; {}% done. Elapsed'.format(
-                    len(result_pages),
-                    round(100.0 * current_row / total_rows)))
-
-            if current_row == total_rows:
-                break
-
-            page_token = query_reply.get('pageToken', None)
-
-            if not page_token and current_row < total_rows:
-                raise InvalidPageToken("Required pageToken was missing. "
-                                       "Received {0} of {1} rows"
-                                       .format(current_row, total_rows))
-
-            elif page_token in seen_page_tokens:
-                raise InvalidPageToken("A duplicate pageToken was returned")
-
-            seen_page_tokens.append(page_token)
-
-            try:
-                query_reply = job_collection.getQueryResults(
-                    projectId=job_reference['projectId'],
-                    jobId=job_id,
-                    pageToken=page_token).execute()
-            except HttpError as ex:
-                self.process_http_error(ex)
-
-        if current_row < total_rows:
-            raise InvalidPageToken()
-
-        # print basic query stats
-        self._print('Got {} rows.\n'.format(total_rows))
-
-        return schema, result_pages
 
     def load_data(self, dataframe, dataset_id, table_id, chunksize):
         try:
@@ -815,46 +683,9 @@ def _get_credentials_file():
     return os.environ.get(
         'PANDAS_GBQ_CREDENTIALS_FILE')
 
-
-def _parse_data(schema, rows):
-    # see:
-    # http://pandas.pydata.org/pandas-docs/dev/missing_data.html
-    # #missing-data-casting-rules-and-indexing
-    dtype_map = {'FLOAT': np.dtype(float),
-                 'TIMESTAMP': 'M8[ns]'}
-
-    fields = schema['fields']
-    col_types = [field['type'] for field in fields]
-    col_names = [str(field['name']) for field in fields]
-    col_dtypes = [dtype_map.get(field['type'], object) for field in fields]
-    page_array = np.zeros((len(rows),), dtype=lzip(col_names, col_dtypes))
-    for row_num, raw_row in enumerate(rows):
-        entries = raw_row.get('f', [])
-        for col_num, field_type in enumerate(col_types):
-            field_value = _parse_entry(entries[col_num].get('v', ''),
-                                       field_type)
-            page_array[row_num][col_num] = field_value
-
-    return DataFrame(page_array, columns=col_names)
-
-
-def _parse_entry(field_value, field_type):
-    if field_value is None or field_value == 'null':
-        return None
-    if field_type == 'INTEGER':
-        return int(field_value)
-    elif field_type == 'FLOAT':
-        return float(field_value)
-    elif field_type == 'TIMESTAMP':
-        timestamp = datetime.utcfromtimestamp(float(field_value))
-        return np.datetime64(timestamp)
-    elif field_type == 'BOOLEAN':
-        return field_value == 'true'
-    return field_value
-    
-
 def read_gbq(query, project_id=None, index_col=None, col_order=None, verbose=True,
-             private_key=None, dialect='legacy', configuration=None, **kwargs):
+             private_key=None, auth_local_webserver=False, dialect='legacy', 
+             configuration=None, **kwargs):
     r"""Load data from Google BigQuery using google-cloud-python
 
     The main method a user calls to execute a Query in Google BigQuery
@@ -862,16 +693,23 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, verbose=Tru
 
     The Google Cloud library is used.
     Documentation is available `here
-    <https://googlecloudplatform.github.io/google-cloud-python/stable/>`__
+    <https://googlecloudplatform.github.io/google-cloud-python/stable/>`
 
     Authentication via Google Cloud can be performed a number of ways, see:
     <https://googlecloudplatform.github.io/google-cloud-python/stable/google-cloud-auth.html>
-    The easiest is to download a service account JSON keyfile or generate user credentials via 
-    `gcloud auth application-default login` 
+
+    The easiest is to generate user credentials via `gcloud auth application-default login` 
     <https://cloud.google.com/sdk/gcloud/reference/auth/application-default/login>
     and point to it using an environment variable:
     `$ export GOOGLE_APPLICATION_CREDENTIALS="/path/to/keyfile.json"`
 
+    You can also download a service account private key JSON file and pass the path to the file
+    to the private_key paramater.
+
+    As a final alternative, you can also set auth_local_webserver to True, which will trigger 
+    a pop-up through which a user can auth with their Google account. This will generate a user 
+    credentials file, which is saved locally and can be re-used in the future.
+
     Parameters
     ----------
     query : str
@@ -889,6 +727,12 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, verbose=Tru
         Path to service account private key in JSON format. If none is provided,
         will default to the GOOGLE_APPLICATION_CREDENTIALS environment variable
         or another form of authentication (see above)
+    auth_local_webserver : boolean, default False
+        Use the [local webserver flow] instead of the [console flow] when
+        getting user credentials. A file named bigquery_credentials.dat will
+        be created in ~/.config/pandas_gbq/. You can also set PANDAS_GBQ_CREDENTIALS_FILE
+        environment variable so as to define a specific path to store this
+        credential (eg. /etc/keys/bigquery.dat).
     dialect : {'legacy', 'standard'}, default 'legacy'
         'legacy' : Use BigQuery's legacy SQL dialect.
         'standard' : Use BigQuery's standard SQL (beta), which is
@@ -900,7 +744,6 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, verbose=Tru
         only some configuration settings are currently implemented. You can pass them 
         along like in the following: 
         `from_gbq(q,configuration={'allow_large_results':True,'maximum_billing_tier':2})`
-        
         Example allowable settings: 
             allow_large_results, create_disposition, default_dataset, destination
             flatten_results, priority, use_query_cache, use_legacy_sql, dry_run, 
@@ -914,6 +757,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, verbose=Tru
     """
 
     # http://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
+
     def sizeof_fmt(num, suffix='B'):
         fmt = "%3.1f %s%s"
         for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
@@ -932,6 +776,8 @@ def _wait_for_job(job):
             time.sleep(1)
     if private_key:
         client = bigquery.Client(project=project_id).from_service_account_json(private_key)
+    elif auth_local_webserver:
+        GbqConnector(project_id=project_id,auth_local_webserver=True).get_user_account_credentials()
     else:        
         client = bigquery.Client(project=project_id)
     query_job = client.run_async_query(str(uuid.uuid4()), query)

From c4a2c36f41bf91ed00558a26e979d05e4faf7685 Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Thu, 5 Oct 2017 17:07:11 -0400
Subject: [PATCH 12/42] Use GbqConnector for credentials checking

---
 pandas_gbq/gbq.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 7dbea89d..6acb77f3 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -683,7 +683,7 @@ def _get_credentials_file():
     return os.environ.get(
         'PANDAS_GBQ_CREDENTIALS_FILE')
 
-def read_gbq(query, project_id=None, index_col=None, col_order=None, verbose=True,
+def read_gbq(query, project_id=None, index_col=None, col_order=None, reauth=False, verbose=True,
              private_key=None, auth_local_webserver=False, dialect='legacy', 
              configuration=None, **kwargs):
     r"""Load data from Google BigQuery using google-cloud-python
@@ -721,6 +721,9 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, verbose=Tru
     col_order : list(str) (optional)
         List of BigQuery column names in the desired order for results
         DataFrame
+    reauth : boolean (default False)
+        Force Google BigQuery to reauthenticate the user. This is useful
+        if multiple accounts are used.
     verbose : boolean (default True)
         Verbose output
     private_key : str (optional)
@@ -774,12 +777,12 @@ def _wait_for_job(job):
                     raise RuntimeError(job.errors)
                 return
             time.sleep(1)
-    if private_key:
-        client = bigquery.Client(project=project_id).from_service_account_json(private_key)
-    elif auth_local_webserver:
-        GbqConnector(project_id=project_id,auth_local_webserver=True).get_user_account_credentials()
-    else:        
-        client = bigquery.Client(project=project_id)
+
+    credentials = GbqConnector(project_id=project_id,
+                               reauth=reauth,
+                               auth_local_webserver=auth_local_webserver,
+                               private_key=private_key).credentials  
+    client = bigquery.Client(project=project_id, credentials=credentials)
     query_job = client.run_async_query(str(uuid.uuid4()), query)
 
     if dialect != 'legacy':

From 04373ebd0aa8dc802b9148b0048e200d0f4867dd Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Thu, 5 Oct 2017 19:39:39 -0400
Subject: [PATCH 13/42] Dealing with tests part 1

---
 pandas_gbq/gbq.py            | 39 +++++++++++++++++-----------
 pandas_gbq/tests/test_gbq.py | 50 ++++++++++++++++++++----------------
 2 files changed, 52 insertions(+), 37 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 6acb77f3..2ecc5e93 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -683,9 +683,19 @@ def _get_credentials_file():
     return os.environ.get(
         'PANDAS_GBQ_CREDENTIALS_FILE')
 
+
+def sizeof_fmt(num, suffix='B'):
+    fmt = "%3.1f %s%s"
+    for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
+        if abs(num) < 1024.0:
+            return fmt % (num, unit, suffix)
+        num /= 1024.0
+    return fmt % (num, 'Y', suffix)
+        
+
 def read_gbq(query, project_id=None, index_col=None, col_order=None, reauth=False, verbose=True,
-             private_key=None, auth_local_webserver=False, dialect='legacy', 
-             configuration=None, **kwargs):
+             private_key=None, auth_local_webserver=False, dialect='legacy', credentials=None,
+             get_schema=False, configuration=None, **kwargs):
     r"""Load data from Google BigQuery using google-cloud-python
 
     The main method a user calls to execute a Query in Google BigQuery
@@ -741,7 +751,11 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, reauth=Fals
         'standard' : Use BigQuery's standard SQL (beta), which is
         compliant with the SQL 2011 standard. For more information
         see `BigQuery SQL Reference
-        <https://cloud.google.com/bigquery/sql-reference/>`__
+        <https://cloud.google.com/bigquery/sql-reference/>`
+    credentials: credentials object (default None)
+        If generating credentials on your own, pass in. Otherwise, will attempt to generate automatically
+    get_schema: boolean, default False
+        Set to True if you only want to return the schema, otherwise by default will return dataframe
     configuration : dict (optional)
         Because of current limitations (https://github.com/GoogleCloudPlatform/google-cloud-python/issues/2765)
         only some configuration settings are currently implemented. You can pass them 
@@ -761,14 +775,6 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, reauth=Fals
 
     # http://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
 
-    def sizeof_fmt(num, suffix='B'):
-        fmt = "%3.1f %s%s"
-        for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
-            if abs(num) < 1024.0:
-                return fmt % (num, unit, suffix)
-            num /= 1024.0
-        return fmt % (num, 'Y', suffix)
-
     def _wait_for_job(job):
         while True:
             job.reload()  # Refreshes the state via a GET request.
@@ -778,10 +784,11 @@ def _wait_for_job(job):
                 return
             time.sleep(1)
 
-    credentials = GbqConnector(project_id=project_id,
-                               reauth=reauth,
-                               auth_local_webserver=auth_local_webserver,
-                               private_key=private_key).credentials  
+    if credentials is None:
+        credentials = GbqConnector(project_id=project_id,
+                                   reauth=reauth,
+                                   auth_local_webserver=auth_local_webserver,
+                                   private_key=private_key).credentials  
     client = bigquery.Client(project=project_id, credentials=credentials)
     query_job = client.run_async_query(str(uuid.uuid4()), query)
 
@@ -820,6 +827,8 @@ def _wait_for_job(job):
         print("\nTotal time taken %s s" % (datetime.utcnow()-query_job.created.replace(tzinfo=None)).seconds)
         print("Finished at %s." % datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
         
+    if get_schema:
+        return query_results.schema
 
     columns = [field.name for field in query_results.schema]
     data = rows
diff --git a/pandas_gbq/tests/test_gbq.py b/pandas_gbq/tests/test_gbq.py
index 62b72dbc..e718bfda 100644
--- a/pandas_gbq/tests/test_gbq.py
+++ b/pandas_gbq/tests/test_gbq.py
@@ -198,12 +198,14 @@ def test_should_be_able_to_get_a_bigquery_service(self):
         assert bigquery_service is not None
 
     def test_should_be_able_to_get_schema_from_query(self):
-        schema, pages = self.sut.run_query('SELECT 1')
+        credentials = self.sut.credentials
+        schema = gbq.read_gbq('SELECT 1',project_id=_get_project_id(),credentials=credentials,get_schema=True)
         assert schema is not None
 
     def test_should_be_able_to_get_results_from_query(self):
-        schema, pages = self.sut.run_query('SELECT 1')
-        assert pages is not None
+        credentials = self.sut.credentials
+        results = gbq.read_gbq('SELECT 1',project_id=_get_project_id(),credentials=credentials)
+        assert results is not None
 
     def test_get_application_default_credentials_does_not_throw_error(self):
         if _check_if_can_get_correct_default_credentials():
@@ -261,12 +263,14 @@ def test_should_be_able_to_get_a_bigquery_service(self):
         assert bigquery_service is not None
 
     def test_should_be_able_to_get_schema_from_query(self):
-        schema, pages = self.sut.run_query('SELECT 1')
+        credentials = self.sut.credentials
+        schema = gbq.read_gbq('SELECT 1',project_id=_get_project_id(),credentials=credentials,get_schema=True)
         assert schema is not None
 
     def test_should_be_able_to_get_results_from_query(self):
-        schema, pages = self.sut.run_query('SELECT 1')
-        assert pages is not None
+        credentials = self.sut.credentials
+        results = gbq.read_gbq('SELECT 1',project_id=_get_project_id(),credentials=credentials)
+        assert results is not None
 
 
 class TestGBQConnectorIntegrationWithServiceAccountKeyContents(object):
@@ -292,12 +296,14 @@ def test_should_be_able_to_get_a_bigquery_service(self):
         assert bigquery_service is not None
 
     def test_should_be_able_to_get_schema_from_query(self):
-        schema, pages = self.sut.run_query('SELECT 1')
+        credentials = self.sut.credentials
+        schema = gbq.read_gbq('SELECT 1',project_id=_get_project_id(),credentials=credentials,get_schema=True)
         assert schema is not None
 
     def test_should_be_able_to_get_results_from_query(self):
-        schema, pages = self.sut.run_query('SELECT 1')
-        assert pages is not None
+        credentials = self.sut.credentials
+        results = gbq.read_gbq('SELECT 1',project_id=_get_project_id(),credentials=credentials)
+        assert results is not None
 
 
 class GBQUnitTests(object):
@@ -906,19 +912,19 @@ def test_timeout_configuration(self):
                          configuration=config)
 
     def test_query_response_bytes(self):
-        assert self.gbq_connector.sizeof_fmt(999) == "999.0 B"
-        assert self.gbq_connector.sizeof_fmt(1024) == "1.0 KB"
-        assert self.gbq_connector.sizeof_fmt(1099) == "1.1 KB"
-        assert self.gbq_connector.sizeof_fmt(1044480) == "1020.0 KB"
-        assert self.gbq_connector.sizeof_fmt(1048576) == "1.0 MB"
-        assert self.gbq_connector.sizeof_fmt(1048576000) == "1000.0 MB"
-        assert self.gbq_connector.sizeof_fmt(1073741824) == "1.0 GB"
-        assert self.gbq_connector.sizeof_fmt(1.099512E12) == "1.0 TB"
-        assert self.gbq_connector.sizeof_fmt(1.125900E15) == "1.0 PB"
-        assert self.gbq_connector.sizeof_fmt(1.152922E18) == "1.0 EB"
-        assert self.gbq_connector.sizeof_fmt(1.180592E21) == "1.0 ZB"
-        assert self.gbq_connector.sizeof_fmt(1.208926E24) == "1.0 YB"
-        assert self.gbq_connector.sizeof_fmt(1.208926E28) == "10000.0 YB"
+        assert gbq.sizeof_fmt(999) == "999.0 B"
+        assert gbq.sizeof_fmt(1024) == "1.0 KB"
+        assert gbq.sizeof_fmt(1099) == "1.1 KB"
+        assert gbq.sizeof_fmt(1044480) == "1020.0 KB"
+        assert gbq.sizeof_fmt(1048576) == "1.0 MB"
+        assert gbq.sizeof_fmt(1048576000) == "1000.0 MB"
+        assert gbq.sizeof_fmt(1073741824) == "1.0 GB"
+        assert gbq.sizeof_fmt(1.099512E12) == "1.0 TB"
+        assert gbq.sizeof_fmt(1.125900E15) == "1.0 PB"
+        assert gbq.sizeof_fmt(1.152922E18) == "1.0 EB"
+        assert gbq.sizeof_fmt(1.180592E21) == "1.0 ZB"
+        assert gbq.sizeof_fmt(1.208926E24) == "1.0 YB"
+        assert gbq.sizeof_fmt(1.208926E28) == "10000.0 YB"
 
 
 class TestToGBQIntegrationWithServiceAccountKeyPath(object):

From dcf014ab1c85804fcdb9c7b94d3f3c2837f428aa Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Thu, 5 Oct 2017 21:41:03 -0400
Subject: [PATCH 14/42] Fix formatting to make linter happy plus remove obj
 conversion in test

Remove obj conversion in test

Tweak formatting

Update ci requirements

Fix more formatting to make linter happy
---
 ci/requirements-2.7-0.19.2.pip   |   1 +
 ci/requirements-3.5-0.18.1.pip   |   1 +
 ci/requirements-3.6-0.20.1.conda |   1 +
 ci/requirements-3.6-MASTER.pip   |   1 +
 pandas_gbq/gbq.py                | 107 ++++++++++++++++++-------------
 pandas_gbq/tests/test_gbq.py     |  58 +++++++----------
 6 files changed, 88 insertions(+), 81 deletions(-)

diff --git a/ci/requirements-2.7-0.19.2.pip b/ci/requirements-2.7-0.19.2.pip
index 852dc153..22b763cf 100644
--- a/ci/requirements-2.7-0.19.2.pip
+++ b/ci/requirements-2.7-0.19.2.pip
@@ -5,3 +5,4 @@ google-auth-oauthlib
 PyCrypto
 python-gflags
 mock
+google-cloud-bigquery>=0.25.0,<=0.26.0
diff --git a/ci/requirements-3.5-0.18.1.pip b/ci/requirements-3.5-0.18.1.pip
index 6fb8a03d..19ca6227 100644
--- a/ci/requirements-3.5-0.18.1.pip
+++ b/ci/requirements-3.5-0.18.1.pip
@@ -3,3 +3,4 @@ google-auth==1.0.0
 google-auth-httplib2==0.0.1
 google-auth-oauthlib==0.0.1
 mock
+google-cloud-bigquery>=0.25.0,<=0.26.0
diff --git a/ci/requirements-3.6-0.20.1.conda b/ci/requirements-3.6-0.20.1.conda
index a1608720..7ca942e4 100644
--- a/ci/requirements-3.6-0.20.1.conda
+++ b/ci/requirements-3.6-0.20.1.conda
@@ -3,3 +3,4 @@ google-auth
 google-auth-httplib2
 google-auth-oauthlib
 mock
+google-cloud-bigquery>=0.25.0,<=0.26.0
diff --git a/ci/requirements-3.6-MASTER.pip b/ci/requirements-3.6-MASTER.pip
index a1608720..7ca942e4 100644
--- a/ci/requirements-3.6-MASTER.pip
+++ b/ci/requirements-3.6-MASTER.pip
@@ -3,3 +3,4 @@ google-auth
 google-auth-httplib2
 google-auth-oauthlib
 mock
+google-cloud-bigquery>=0.25.0,<=0.26.0
diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 2ecc5e93..71a9c2f0 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -6,13 +6,10 @@
 import time
 import sys
 import os
-import uuid
-
-import numpy as np
 
 from distutils.version import StrictVersion
-from pandas import compat, DataFrame, concat
-from pandas.compat import lzip, bytes_to_str
+from pandas import compat, DataFrame
+from pandas.compat import bytes_to_str
 from google.cloud import bigquery
 
 
@@ -507,7 +504,6 @@ def process_insert_errors(self, insert_errors):
 
         raise StreamingInsertError
 
-
     def load_data(self, dataframe, dataset_id, table_id, chunksize):
         try:
             from googleapiclient.errors import HttpError
@@ -679,6 +675,7 @@ def delete_and_recreate_table(self, dataset_id, table_id, table_schema):
         table.create(table_id, table_schema)
         sleep(delay)
 
+
 def _get_credentials_file():
     return os.environ.get(
         'PANDAS_GBQ_CREDENTIALS_FILE')
@@ -691,11 +688,12 @@ def sizeof_fmt(num, suffix='B'):
             return fmt % (num, unit, suffix)
         num /= 1024.0
     return fmt % (num, 'Y', suffix)
-        
 
-def read_gbq(query, project_id=None, index_col=None, col_order=None, reauth=False, verbose=True,
-             private_key=None, auth_local_webserver=False, dialect='legacy', credentials=None,
-             get_schema=False, configuration=None, **kwargs):
+
+def read_gbq(query, project_id=None, index_col=None, col_order=None,
+             reauth=False, verbose=True, private_key=None, auth_local_webserver=False,
+             dialect='legacy', credentials=None, get_schema=False, query_parameters=(),
+             configuration=None, **kwargs):
     r"""Load data from Google BigQuery using google-cloud-python
 
     The main method a user calls to execute a Query in Google BigQuery
@@ -706,19 +704,22 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, reauth=Fals
     <https://googlecloudplatform.github.io/google-cloud-python/stable/>`
 
     Authentication via Google Cloud can be performed a number of ways, see:
-    <https://googlecloudplatform.github.io/google-cloud-python/stable/google-cloud-auth.html>
+    <https://googlecloudplatform.github.io/google-cloud-python/stable/google-
+    cloud-auth.html>
 
-    The easiest is to generate user credentials via `gcloud auth application-default login` 
-    <https://cloud.google.com/sdk/gcloud/reference/auth/application-default/login>
-    and point to it using an environment variable:
+    The easiest is to generate user credentials via
+    `gcloud auth application-default login` <https://cloud.google.com/sdk/
+    gcloud/reference/auth/application-default/login> and point to it using an
+    environment variable:
     `$ export GOOGLE_APPLICATION_CREDENTIALS="/path/to/keyfile.json"`
 
-    You can also download a service account private key JSON file and pass the path to the file
-    to the private_key paramater.
+    You can also download a service account private key JSON file and pass the
+    path to the file to the private_key paramater.
 
-    As a final alternative, you can also set auth_local_webserver to True, which will trigger 
-    a pop-up through which a user can auth with their Google account. This will generate a user 
-    credentials file, which is saved locally and can be re-used in the future.
+    As a final alternative, you can also set auth_local_webserver to True,
+    which will trigger a pop-up through which a user can auth with their Google
+    account. This will generate a user credentials file, which is saved locally
+    and can be re-used in the future.
 
     Parameters
     ----------
@@ -737,15 +738,15 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, reauth=Fals
     verbose : boolean (default True)
         Verbose output
     private_key : str (optional)
-        Path to service account private key in JSON format. If none is provided,
-        will default to the GOOGLE_APPLICATION_CREDENTIALS environment variable
-        or another form of authentication (see above)
+        Path to service account private key in JSON format. If none is
+        provided, will default to the GOOGLE_APPLICATION_CREDENTIALS
+        environment variable or another form of authentication (see above)
     auth_local_webserver : boolean, default False
         Use the [local webserver flow] instead of the [console flow] when
         getting user credentials. A file named bigquery_credentials.dat will
-        be created in ~/.config/pandas_gbq/. You can also set PANDAS_GBQ_CREDENTIALS_FILE
-        environment variable so as to define a specific path to store this
-        credential (eg. /etc/keys/bigquery.dat).
+        be created in ~/.config/pandas_gbq/. You can also set
+        PANDAS_GBQ_CREDENTIALS_FILE environment variable so as to define a
+        specific path to store this credential (eg. /etc/keys/bigquery.dat).
     dialect : {'legacy', 'standard'}, default 'legacy'
         'legacy' : Use BigQuery's legacy SQL dialect.
         'standard' : Use BigQuery's standard SQL (beta), which is
@@ -753,18 +754,26 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, reauth=Fals
         see `BigQuery SQL Reference
         <https://cloud.google.com/bigquery/sql-reference/>`
     credentials: credentials object (default None)
-        If generating credentials on your own, pass in. Otherwise, will attempt to generate automatically
+        If generating credentials on your own, pass in. Otherwise, will attempt
+        to generate automatically
     get_schema: boolean, default False
-        Set to True if you only want to return the schema, otherwise by default will return dataframe
+        Set to True if you only want to return the schema, otherwise by default
+        will return dataframe
+    query_parameters: dict (optional) <https://cloud.google.com/bigquery/docs/
+        parameterized-queries>
     configuration : dict (optional)
-        Because of current limitations (https://github.com/GoogleCloudPlatform/google-cloud-python/issues/2765)
-        only some configuration settings are currently implemented. You can pass them 
-        along like in the following: 
-        `from_gbq(q,configuration={'allow_large_results':True,'maximum_billing_tier':2})`
-        Example allowable settings: 
-            allow_large_results, create_disposition, default_dataset, destination
-            flatten_results, priority, use_query_cache, use_legacy_sql, dry_run, 
-            write_disposition, udf_resources, maximum_billing_tier, maximum_bytes_billed
+        Because of current limitations <https://github.com/GoogleCloudPlatform/
+        google-cloud-python/issues/2765> only some configuration settings are
+        currently implemented. You can pass them along like in the following:
+        `read_gbq(q,configuration={'allow_large_results':True,
+                                   'maximum_billing_tier':2})`
+        Example allowable settings:
+            allow_large_results, create_disposition, default_dataset,
+            destination, flatten_results, priority, use_query_cache,
+            use_legacy_sql, dry_run, write_disposition, udf_resources,
+            maximum_billing_tier, maximum_bytes_billed
+            <http://google-cloud-python.readthedocs.io/en/latest/_modules/
+            google/cloud/bigquery/job.html?highlight=_AsyncQueryConfiguration>
 
     Returns
     -------
@@ -773,8 +782,6 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, reauth=Fals
 
     """
 
-    # http://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
-
     def _wait_for_job(job):
         while True:
             job.reload()  # Refreshes the state via a GET request.
@@ -788,9 +795,12 @@ def _wait_for_job(job):
         credentials = GbqConnector(project_id=project_id,
                                    reauth=reauth,
                                    auth_local_webserver=auth_local_webserver,
-                                   private_key=private_key).credentials  
+                                   private_key=private_key).credentials
+
     client = bigquery.Client(project=project_id, credentials=credentials)
-    query_job = client.run_async_query(str(uuid.uuid4()), query)
+    query_job = client.run_async_query(str(uuid.uuid4()),
+                                       query,
+                                       query_parameters=query_parameters)
 
     if dialect != 'legacy':
         query_job.use_legacy_sql = False
@@ -808,10 +818,14 @@ def _wait_for_job(job):
         print("Query done.")
         if query_job._properties["statistics"]["query"].get("cacheHit", False):
             print("Cache hit.")
-        elif "statistics" in query_job._properties and "query" in query_job._properties["statistics"]:
-            bytes_billed = int(query_job._properties["statistics"]["query"].get("totalBytesProcessed", 0))
-            bytes_processed = int(query_job._properties["statistics"]["query"].get("totalBytesBilled", 0))
-            print("Total bytes billed (processed): %s (%s)" % (sizeof_fmt(bytes_billed),sizeof_fmt(bytes_processed)))
+        elif ("statistics" in query_job._properties and
+                "query" in query_job._properties["statistics"]):
+            bytes_billed = int(query_job._properties["statistics"]["query"]
+                               .get("totalBytesProcessed", 0))
+            bytes_processed = int(query_job._properties["statistics"]["query"]
+                                  .get("totalBytesBilled", 0))
+            print("Total bytes billed (processed): %s (%s)" %
+                  (sizeof_fmt(bytes_billed), sizeof_fmt(bytes_processed)))
     try:
         query_results = query_job.results()
     except:
@@ -824,16 +838,17 @@ def _wait_for_job(job):
 
     if verbose:
         print("Got %s rows.") % total_rows
-        print("\nTotal time taken %s s" % (datetime.utcnow()-query_job.created.replace(tzinfo=None)).seconds)
+        print("\nTotal time taken %ss" % (datetime.utcnow() -
+              query_job.created.replace(tzinfo=None)).seconds)
         print("Finished at %s." % datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
-        
+
     if get_schema:
         return query_results.schema
 
     columns = [field.name for field in query_results.schema]
     data = rows
 
-    final_df = DataFrame(data=data,columns=columns)
+    final_df = DataFrame(data=data, columns=columns)
 
     # Change the order of columns in the DataFrame based on provided list
     if col_order:
diff --git a/pandas_gbq/tests/test_gbq.py b/pandas_gbq/tests/test_gbq.py
index e718bfda..859b72b6 100644
--- a/pandas_gbq/tests/test_gbq.py
+++ b/pandas_gbq/tests/test_gbq.py
@@ -199,12 +199,17 @@ def test_should_be_able_to_get_a_bigquery_service(self):
 
     def test_should_be_able_to_get_schema_from_query(self):
         credentials = self.sut.credentials
-        schema = gbq.read_gbq('SELECT 1',project_id=_get_project_id(),credentials=credentials,get_schema=True)
+        schema = gbq.read_gbq('SELECT 1',
+                              project_id=_get_project_id(),
+                              credentials=credentials,
+                              get_schema=True)
         assert schema is not None
 
     def test_should_be_able_to_get_results_from_query(self):
         credentials = self.sut.credentials
-        results = gbq.read_gbq('SELECT 1',project_id=_get_project_id(),credentials=credentials)
+        results = gbq.read_gbq('SELECT 1',
+                               project_id=_get_project_id(),
+                               credentials=credentials)
         assert results is not None
 
     def test_get_application_default_credentials_does_not_throw_error(self):
@@ -264,12 +269,15 @@ def test_should_be_able_to_get_a_bigquery_service(self):
 
     def test_should_be_able_to_get_schema_from_query(self):
         credentials = self.sut.credentials
-        schema = gbq.read_gbq('SELECT 1',project_id=_get_project_id(),credentials=credentials,get_schema=True)
+        schema = gbq.read_gbq('SELECT 1', project_id=_get_project_id(),
+                              credentials=credentials,
+                              get_schema=True)
         assert schema is not None
 
     def test_should_be_able_to_get_results_from_query(self):
         credentials = self.sut.credentials
-        results = gbq.read_gbq('SELECT 1',project_id=_get_project_id(),credentials=credentials)
+        results = gbq.read_gbq('SELECT 1', project_id=_get_project_id(),
+                               credentials=credentials)
         assert results is not None
 
 
@@ -297,12 +305,17 @@ def test_should_be_able_to_get_a_bigquery_service(self):
 
     def test_should_be_able_to_get_schema_from_query(self):
         credentials = self.sut.credentials
-        schema = gbq.read_gbq('SELECT 1',project_id=_get_project_id(),credentials=credentials,get_schema=True)
+        schema = gbq.read_gbq('SELECT 1',
+                              project_id=_get_project_id(),
+                              credentials=credentials,
+                              get_schema=True)
         assert schema is not None
 
     def test_should_be_able_to_get_results_from_query(self):
         credentials = self.sut.credentials
-        results = gbq.read_gbq('SELECT 1',project_id=_get_project_id(),credentials=credentials)
+        results = gbq.read_gbq('SELECT 1',
+                               project_id=_get_project_id(),
+                               credentials=credentials)
         assert results is not None
 
 
@@ -522,7 +535,7 @@ def test_should_properly_handle_nullable_integers(self):
         df = gbq.read_gbq(query, project_id=_get_project_id(),
                           private_key=_get_private_key_path())
         tm.assert_frame_equal(
-            df, DataFrame({'nullable_integer': [1, None]}).astype(object))
+            df, DataFrame({'nullable_integer': [1, None]}))
 
     def test_should_properly_handle_valid_longs(self):
         query = 'SELECT 1 << 62 AS valid_long'
@@ -538,7 +551,7 @@ def test_should_properly_handle_nullable_longs(self):
         df = gbq.read_gbq(query, project_id=_get_project_id(),
                           private_key=_get_private_key_path())
         tm.assert_frame_equal(
-            df, DataFrame({'nullable_long': [1 << 62, None]}).astype(object))
+            df, DataFrame({'nullable_long': [1 << 62, None]}))
 
     def test_should_properly_handle_null_integers(self):
         query = 'SELECT INTEGER(NULL) AS null_integer'
@@ -634,7 +647,7 @@ def test_should_properly_handle_nullable_booleans(self):
         df = gbq.read_gbq(query, project_id=_get_project_id(),
                           private_key=_get_private_key_path())
         tm.assert_frame_equal(
-            df, DataFrame({'nullable_boolean': [True, None]}).astype(object))
+            df, DataFrame({'nullable_boolean': [True, None]}))
 
     def test_unicode_string_conversion_and_normalization(self):
         correct_test_datatype = DataFrame(
@@ -797,32 +810,7 @@ def test_invalid_option_for_sql_dialect(self):
 
     def test_query_with_parameters(self):
         sql_statement = "SELECT @param1 + @param2 AS valid_result"
-        config = {
-            'query': {
-                "useLegacySql": False,
-                "parameterMode": "named",
-                "queryParameters": [
-                    {
-                        "name": "param1",
-                        "parameterType": {
-                            "type": "INTEGER"
-                        },
-                        "parameterValue": {
-                            "value": 1
-                        }
-                    },
-                    {
-                        "name": "param2",
-                        "parameterType": {
-                            "type": "INTEGER"
-                        },
-                        "parameterValue": {
-                            "value": 2
-                        }
-                    }
-                ]
-            }
-        }
+        config = {"use_legacy_sql": False}
         # Test that a query that relies on parameters fails
         # when parameters are not supplied via configuration
         with pytest.raises(ValueError):

From 72253aabf55dcc64f307f1a32b979278e90fb421 Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Fri, 6 Oct 2017 00:55:37 -0400
Subject: [PATCH 15/42] Rewrite query_parameters test with documentation

---
 pandas_gbq/gbq.py            | 26 ++++++++++++++++++--------
 pandas_gbq/tests/test_gbq.py | 14 ++++++++++----
 2 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 71a9c2f0..a9f7d034 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -691,9 +691,10 @@ def sizeof_fmt(num, suffix='B'):
 
 
 def read_gbq(query, project_id=None, index_col=None, col_order=None,
-             reauth=False, verbose=True, private_key=None, auth_local_webserver=False,
-             dialect='legacy', credentials=None, get_schema=False, query_parameters=(),
-             configuration=None, **kwargs):
+             reauth=False, verbose=True, private_key=None,
+             auth_local_webserver=False, dialect='legacy', credentials=None,
+             get_schema=False, query_parameters=(), configuration=None,
+             **kwargs):
     r"""Load data from Google BigQuery using google-cloud-python
 
     The main method a user calls to execute a Query in Google BigQuery
@@ -741,7 +742,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
         Path to service account private key in JSON format. If none is
         provided, will default to the GOOGLE_APPLICATION_CREDENTIALS
         environment variable or another form of authentication (see above)
-    auth_local_webserver : boolean, default False
+    auth_local_webserver : boolean, default False (optional)
         Use the [local webserver flow] instead of the [console flow] when
         getting user credentials. A file named bigquery_credentials.dat will
         be created in ~/.config/pandas_gbq/. You can also set
@@ -756,11 +757,16 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
     credentials: credentials object (default None)
         If generating credentials on your own, pass in. Otherwise, will attempt
         to generate automatically
-    get_schema: boolean, default False
+    get_schema: boolean, default False (optional)
         Set to True if you only want to return the schema, otherwise by default
         will return dataframe
-    query_parameters: dict (optional) <https://cloud.google.com/bigquery/docs/
-        parameterized-queries>
+    query_parameters: tuple (optional) Can only be used in Standard SQL
+        example: gbq.read_gbq("SELECT @param1 + @param2",
+                          query_parameters = (bigquery.ScalarQueryParameter(
+                                                      'param1', 'INT64', 1),
+                                              bigquery.ScalarQueryParameter(
+                                                      'param2', 'INT64', 2)))
+        <https://cloud.google.com/bigquery/docs/parameterized-queries>
     configuration : dict (optional)
         Because of current limitations <https://github.com/GoogleCloudPlatform/
         google-cloud-python/issues/2765> only some configuration settings are
@@ -802,8 +808,12 @@ def _wait_for_job(job):
                                        query,
                                        query_parameters=query_parameters)
 
-    if dialect != 'legacy':
+    if dialect == 'legacy':
+        query_job.use_legacy_sql = True
+    elif dialect == 'standard':
         query_job.use_legacy_sql = False
+    else:
+        raise ValueError("'{0}' is not valid for dialect".format(dialect))
 
     if configuration:
         for setting, value in configuration.items():
diff --git a/pandas_gbq/tests/test_gbq.py b/pandas_gbq/tests/test_gbq.py
index 859b72b6..00ff4769 100644
--- a/pandas_gbq/tests/test_gbq.py
+++ b/pandas_gbq/tests/test_gbq.py
@@ -17,6 +17,7 @@
 from pandas_gbq import gbq
 import pandas.util.testing as tm
 from pandas.compat.numpy import np_datetime64_compat
+from google.cloud import bigquery
 
 
 TABLE_ID = 'new_test'
@@ -813,15 +814,20 @@ def test_query_with_parameters(self):
         config = {"use_legacy_sql": False}
         # Test that a query that relies on parameters fails
         # when parameters are not supplied via configuration
-        with pytest.raises(ValueError):
+        with pytest.raises(RuntimeError):
             gbq.read_gbq(sql_statement, project_id=_get_project_id(),
                          private_key=_get_private_key_path())
 
         # Test that the query is successful because we have supplied
-        # the correct query parameters via the 'config' option
+        # the correct query parameters via the 'config' and query_parameters
+        # option
         df = gbq.read_gbq(sql_statement, project_id=_get_project_id(),
-                          private_key=_get_private_key_path(),
-                          configuration=config)
+                          configuration = config,
+                          query_parameters = (bigquery.ScalarQueryParameter(
+                                                      'param1', 'INT64', 1),
+                                              bigquery.ScalarQueryParameter(
+                                                      'param2', 'INT64', 2)),
+                          private_key=_get_private_key_path())
         tm.assert_frame_equal(df, DataFrame({'valid_result': [3]}))
 
     def test_query_inside_configuration(self):

From 751f39b142e3a0b366dab00f2a65baf09e8a751d Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Fri, 6 Oct 2017 07:03:25 -0400
Subject: [PATCH 16/42] Rewrite sync and async job flow, handle timeouts, add
 BadRequest error to tests

---
 pandas_gbq/gbq.py            | 131 +++++++++++++++++++++++------------
 pandas_gbq/tests/test_gbq.py |  12 ++--
 2 files changed, 92 insertions(+), 51 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index a9f7d034..6f8c2597 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -694,7 +694,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
              reauth=False, verbose=True, private_key=None,
              auth_local_webserver=False, dialect='legacy', credentials=None,
              get_schema=False, query_parameters=(), configuration=None,
-             **kwargs):
+             timeout_ms=None, **kwargs):
     r"""Load data from Google BigQuery using google-cloud-python
 
     The main method a user calls to execute a Query in Google BigQuery
@@ -780,6 +780,11 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
             maximum_billing_tier, maximum_bytes_billed
             <http://google-cloud-python.readthedocs.io/en/latest/_modules/
             google/cloud/bigquery/job.html?highlight=_AsyncQueryConfiguration>
+    timeout_ms: int (optional) If set or found in config, triggers a sync query
+        that times out with no results if it can't be completed in the time
+        desired
+        <http://google-cloud-python.readthedocs.io/en/latest/bigquery/
+        query.html#google.cloud.bigquery.query.QueryResults.fetch_data>
 
     Returns
     -------
@@ -788,6 +793,9 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
 
     """
 
+    if dialect not in ('legacy', 'standard'):
+        raise ValueError("'{0}' is not valid for dialect".format(dialect))    
+
     def _wait_for_job(job):
         while True:
             job.reload()  # Refreshes the state via a GET request.
@@ -802,55 +810,88 @@ def _wait_for_job(job):
                                    reauth=reauth,
                                    auth_local_webserver=auth_local_webserver,
                                    private_key=private_key).credentials
-
     client = bigquery.Client(project=project_id, credentials=credentials)
-    query_job = client.run_async_query(str(uuid.uuid4()),
-                                       query,
-                                       query_parameters=query_parameters)
-
-    if dialect == 'legacy':
-        query_job.use_legacy_sql = True
-    elif dialect == 'standard':
-        query_job.use_legacy_sql = False
-    else:
-        raise ValueError("'{0}' is not valid for dialect".format(dialect))
-
-    if configuration:
-        for setting, value in configuration.items():
-            setattr(query_job, setting, value)
-
-    query_job.begin()
-
-    if verbose:
-        print("Query running...")
-    _wait_for_job(query_job)
-    if verbose:
-        print("Query done.")
-        if query_job._properties["statistics"]["query"].get("cacheHit", False):
-            print("Cache hit.")
-        elif ("statistics" in query_job._properties and
-                "query" in query_job._properties["statistics"]):
-            bytes_billed = int(query_job._properties["statistics"]["query"]
-                               .get("totalBytesProcessed", 0))
-            bytes_processed = int(query_job._properties["statistics"]["query"]
-                                  .get("totalBytesBilled", 0))
-            print("Total bytes billed (processed): %s (%s)" %
-                  (sizeof_fmt(bytes_billed), sizeof_fmt(bytes_processed)))
-    try:
-        query_results = query_job.results()
-    except:
-        query_results = query_job.result()
 
-    if verbose:
-        print("\nRetrieving results...")
-    rows = list(query_results.fetch_data())
-    total_rows = len(rows)
+    def _set_common_query_settings(query_job):
+        if dialect == 'legacy':
+            query_job.use_legacy_sql = True
+        elif dialect == 'standard':
+            query_job.use_legacy_sql = False
+
+        if configuration:
+            for setting, value in configuration.items():
+                setattr(query_job, setting, value)
+        return query_job
+
+    def sync_query():
+        query_job = client.run_sync_query(query,
+                                          query_parameters=query_parameters)
+        query_job = _set_common_query_settings(query_job)
+        if verbose:
+            print("Query running...")
+        if timeout_ms:
+            query_job.timeout_ms = timeout_ms
+        query_job.run()
+        if not query_job._properties.get("jobComplete", False):
+            raise QueryTimeout("Sync query timed out")
+        if verbose:
+            print("Query done.")
+            if query_job._properties.get("cacheHit", False):
+                print("Cache hit.")
+            else:
+                bytes_billed = int(query_job._properties
+                                   .get("totalBytesProcessed", 0))
+                bytes_processed = int(query_job._properties
+                                      .get("totalBytesBilled", 0))
+                print("Total bytes billed (processed): %s (%s)" %
+                      (sizeof_fmt(bytes_billed), sizeof_fmt(bytes_processed)))
+            print("\nRetrieving results...")
+        return query_job, None
+
+    def async_query():
+        query_job = client.run_async_query(str(uuid.uuid4()),
+                                           query,
+                                           query_parameters=query_parameters)
+        query_job = _set_common_query_settings(query_job)
+        query_job.begin()
+        try:
+            query_results = query_job.results().fetch_data()
+        except:
+            query_results = query_job.result().fetch_data()
+        if verbose:
+            print("Query running...")
+        _wait_for_job(query_job)
+        if verbose:
+            print("Query done.")
+            if query_job._properties["statistics"]["query"].get("cacheHit", False):
+                print("Cache hit.")
+            elif ("statistics" in query_job._properties and
+                    "query" in query_job._properties["statistics"]):
+                bytes_billed = int(query_job._properties["statistics"]["query"]
+                                   .get("totalBytesProcessed", 0))
+                bytes_processed = int(query_job._properties["statistics"]["query"]
+                                      .get("totalBytesBilled", 0))
+                print("Total bytes billed (processed): %s (%s)" %
+                      (sizeof_fmt(bytes_billed), sizeof_fmt(bytes_processed)))
+            print("\nRetrieving results...")
+        return query_results, query_job
+
+    if (configuration and "timeout_ms" in configuration) or timeout_ms:
+        query_results, query_job = sync_query()
+        rows = list(query_results.rows)
+        total_rows = len(rows)
+    else:
+        query_results, query_job = async_query()
+        rows = list(query_results)
+        total_rows = len(rows)
 
     if verbose:
         print("Got %s rows.") % total_rows
-        print("\nTotal time taken %ss" % (datetime.utcnow() -
-              query_job.created.replace(tzinfo=None)).seconds)
-        print("Finished at %s." % datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
+        if query_job:
+            print("\nTotal time taken %ss" % (datetime.utcnow() -
+                  query_job.created.replace(tzinfo=None)).seconds)
+            print("Finished at %s." % datetime.now()
+                  .strftime('%Y-%m-%d %H:%M:%S'))
 
     if get_schema:
         return query_results.schema
diff --git a/pandas_gbq/tests/test_gbq.py b/pandas_gbq/tests/test_gbq.py
index 00ff4769..778974ea 100644
--- a/pandas_gbq/tests/test_gbq.py
+++ b/pandas_gbq/tests/test_gbq.py
@@ -19,6 +19,8 @@
 from pandas.compat.numpy import np_datetime64_compat
 from google.cloud import bigquery
 
+from google.cloud.exceptions import BadRequest
+
 
 TABLE_ID = 'new_test'
 
@@ -765,7 +767,7 @@ def test_legacy_sql(self):
 
         # Test that a legacy sql statement fails when
         # setting dialect='standard'
-        with pytest.raises(gbq.GenericGBQException):
+        with pytest.raises((RuntimeError,BadRequest)):
             gbq.read_gbq(legacy_sql, project_id=_get_project_id(),
                          dialect='standard',
                          private_key=_get_private_key_path())
@@ -783,7 +785,7 @@ def test_standard_sql(self):
 
         # Test that a standard sql statement fails when using
         # the legacy SQL dialect (default value)
-        with pytest.raises(gbq.GenericGBQException):
+        with pytest.raises((RuntimeError,BadRequest)):
             gbq.read_gbq(standard_sql, project_id=_get_project_id(),
                          private_key=_get_private_key_path())
 
@@ -814,7 +816,7 @@ def test_query_with_parameters(self):
         config = {"use_legacy_sql": False}
         # Test that a query that relies on parameters fails
         # when parameters are not supplied via configuration
-        with pytest.raises(RuntimeError):
+        with pytest.raises((RuntimeError,BadRequest)):
             gbq.read_gbq(sql_statement, project_id=_get_project_id(),
                          private_key=_get_private_key_path())
 
@@ -895,9 +897,7 @@ def test_configuration_raises_value_error_with_multiple_config(self):
     def test_timeout_configuration(self):
         sql_statement = 'SELECT 1'
         config = {
-            'query': {
-                "timeoutMs": 1
-            }
+                "timeout_ms": 1
         }
         # Test that QueryTimeout error raises
         with pytest.raises(gbq.QueryTimeout):

From d83fd2293dca1a9c011474ef2c7859da3a9a2eb0 Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Fri, 6 Oct 2017 07:24:46 -0400
Subject: [PATCH 17/42] Remove two configuration tests which are no longer
 relevant

---
 pandas_gbq/gbq.py            |  5 ++++-
 pandas_gbq/tests/test_gbq.py | 39 ------------------------------------
 2 files changed, 4 insertions(+), 40 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 6f8c2597..dd5b7cf9 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -794,7 +794,10 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
     """
 
     if dialect not in ('legacy', 'standard'):
-        raise ValueError("'{0}' is not valid for dialect".format(dialect))    
+        raise ValueError("'{0}' is not valid for dialect".format(dialect))
+    if configuration and any(key in configuration for key in 
+                             ["query", "copy", "load", "extract"]):
+        raise ValueError("New API handles configuration settings differently")
 
     def _wait_for_job(job):
         while True:
diff --git a/pandas_gbq/tests/test_gbq.py b/pandas_gbq/tests/test_gbq.py
index 778974ea..7f6d3291 100644
--- a/pandas_gbq/tests/test_gbq.py
+++ b/pandas_gbq/tests/test_gbq.py
@@ -832,27 +832,6 @@ def test_query_with_parameters(self):
                           private_key=_get_private_key_path())
         tm.assert_frame_equal(df, DataFrame({'valid_result': [3]}))
 
-    def test_query_inside_configuration(self):
-        query_no_use = 'SELECT "PI_WRONG" AS valid_string'
-        query = 'SELECT "PI" AS valid_string'
-        config = {
-            'query': {
-                "query": query,
-                "useQueryCache": False,
-            }
-        }
-        # Test that it can't pass query both
-        # inside config and as parameter
-        with pytest.raises(ValueError):
-            gbq.read_gbq(query_no_use, project_id=_get_project_id(),
-                         private_key=_get_private_key_path(),
-                         configuration=config)
-
-        df = gbq.read_gbq(None, project_id=_get_project_id(),
-                          private_key=_get_private_key_path(),
-                          configuration=config)
-        tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']}))
-
     def test_configuration_without_query(self):
         sql_statement = 'SELECT 1'
         config = {
@@ -876,24 +855,6 @@ def test_configuration_without_query(self):
                          private_key=_get_private_key_path(),
                          configuration=config)
 
-    def test_configuration_raises_value_error_with_multiple_config(self):
-        sql_statement = 'SELECT 1'
-        config = {
-            'query': {
-                "query": sql_statement,
-                "useQueryCache": False,
-            },
-            'load': {
-                "query": sql_statement,
-                "useQueryCache": False,
-            }
-        }
-        # Test that only ValueError is raised with multiple configurations
-        with pytest.raises(ValueError):
-            gbq.read_gbq(sql_statement, project_id=_get_project_id(),
-                         private_key=_get_private_key_path(),
-                         configuration=config)
-
     def test_timeout_configuration(self):
         sql_statement = 'SELECT 1'
         config = {

From e597a762be88f7cca89aa0d9c334125b2e94871f Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Fri, 6 Oct 2017 11:24:37 -0400
Subject: [PATCH 18/42] Rewrite tests part 2 and can specify return type

---
 pandas_gbq/gbq.py            | 55 ++++++++++++++++++++++++------------
 pandas_gbq/tests/test_gbq.py | 44 ++++++++++++++++-------------
 2 files changed, 61 insertions(+), 38 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index dd5b7cf9..2ad48b45 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -8,7 +8,7 @@
 import os
 
 from distutils.version import StrictVersion
-from pandas import compat, DataFrame
+from pandas import compat, DataFrame, to_datetime, to_numeric
 from pandas.compat import bytes_to_str
 from google.cloud import bigquery
 
@@ -693,7 +693,7 @@ def sizeof_fmt(num, suffix='B'):
 def read_gbq(query, project_id=None, index_col=None, col_order=None,
              reauth=False, verbose=True, private_key=None,
              auth_local_webserver=False, dialect='legacy', credentials=None,
-             get_schema=False, query_parameters=(), configuration=None,
+             return_type='df', query_parameters=(), configuration=None,
              timeout_ms=None, **kwargs):
     r"""Load data from Google BigQuery using google-cloud-python
 
@@ -757,9 +757,13 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
     credentials: credentials object (default None)
         If generating credentials on your own, pass in. Otherwise, will attempt
         to generate automatically
-    get_schema: boolean, default False (optional)
-        Set to True if you only want to return the schema, otherwise by default
-        will return dataframe
+    return_type: {'schema','list','df'}, default 'df'
+        schema returns an array of SchemaField objects, which you can access
+            `from pprint import pprint
+            [pprint(vars(field)) for field in schema]`
+        list returns a list of lists of the rows of the results; column names
+            are not included
+        df returns a dataframe by default
     query_parameters: tuple (optional) Can only be used in Standard SQL
         example: gbq.read_gbq("SELECT @param1 + @param2",
                           query_parameters = (bigquery.ScalarQueryParameter(
@@ -866,13 +870,16 @@ def async_query():
         _wait_for_job(query_job)
         if verbose:
             print("Query done.")
-            if query_job._properties["statistics"]["query"].get("cacheHit", False):
+            if query_job._properties["statistics"]["query"].get("cacheHit", 
+                                                                False):
                 print("Cache hit.")
             elif ("statistics" in query_job._properties and
                     "query" in query_job._properties["statistics"]):
-                bytes_billed = int(query_job._properties["statistics"]["query"]
+                bytes_billed = int(query_job
+                                   ._properties["statistics"]["query"]
                                    .get("totalBytesProcessed", 0))
-                bytes_processed = int(query_job._properties["statistics"]["query"]
+                bytes_processed = int(query_job
+                                      ._properties["statistics"]["query"]
                                       .get("totalBytesBilled", 0))
                 print("Total bytes billed (processed): %s (%s)" %
                       (sizeof_fmt(bytes_billed), sizeof_fmt(bytes_processed)))
@@ -896,22 +903,25 @@ def async_query():
             print("Finished at %s." % datetime.now()
                   .strftime('%Y-%m-%d %H:%M:%S'))
 
-    if get_schema:
+    if return_type=='schema':
         return query_results.schema
+    elif return_type=='list':
+        return rows
 
     columns = [field.name for field in query_results.schema]
     data = rows
 
     final_df = DataFrame(data=data, columns=columns)
 
-    # Change the order of columns in the DataFrame based on provided list
-    if col_order:
-        if sorted(col_order) == sorted(final_df.columns):
-            final_df = final_df[col_order]
-        else:
-            raise InvalidColumnOrder(
-                'Column order does not match this DataFrame.'
-            )
+    # Manual field type conversion. Inserted to handle tests
+    # with only null rows, otherwise type conversion works automatically
+    for field in query_results.schema:
+        if field.field_type=='TIMESTAMP':
+            if final_df[field.name].isnull().values.all():
+                final_df[field.name] = to_datetime(final_df[field.name])
+        if field.field_type=='FLOAT':
+            if final_df[field.name].isnull().values.all():
+                final_df[field.name] = to_numeric(final_df[field.name])
 
     # Reindex the DataFrame on the provided column
     if index_col:
@@ -921,7 +931,16 @@ def async_query():
             raise InvalidIndexColumn(
                 'Index column "{0}" does not exist in DataFrame.'
                 .format(index_col)
-            )
+        )
+
+    # Change the order of columns in the DataFrame based on provided list
+    if col_order:
+        if sorted(col_order) == sorted(final_df.columns):
+            final_df = final_df[col_order]
+        else:
+            raise InvalidColumnOrder(
+                'Column order does not match this DataFrame.'
+        )
 
     return final_df
 
diff --git a/pandas_gbq/tests/test_gbq.py b/pandas_gbq/tests/test_gbq.py
index 7f6d3291..1b11e3be 100644
--- a/pandas_gbq/tests/test_gbq.py
+++ b/pandas_gbq/tests/test_gbq.py
@@ -13,13 +13,13 @@
 from pandas import compat
 
 from pandas.compat import u, range
-from pandas import NaT, DataFrame
+from pandas import NaT, DataFrame, to_datetime
 from pandas_gbq import gbq
 import pandas.util.testing as tm
 from pandas.compat.numpy import np_datetime64_compat
 from google.cloud import bigquery
 
-from google.cloud.exceptions import BadRequest
+from google.cloud.exceptions import BadRequest, NotFound
 
 
 TABLE_ID = 'new_test'
@@ -205,7 +205,7 @@ def test_should_be_able_to_get_schema_from_query(self):
         schema = gbq.read_gbq('SELECT 1',
                               project_id=_get_project_id(),
                               credentials=credentials,
-                              get_schema=True)
+                              return_type='schema')
         assert schema is not None
 
     def test_should_be_able_to_get_results_from_query(self):
@@ -274,7 +274,7 @@ def test_should_be_able_to_get_schema_from_query(self):
         credentials = self.sut.credentials
         schema = gbq.read_gbq('SELECT 1', project_id=_get_project_id(),
                               credentials=credentials,
-                              get_schema=True)
+                              return_type='schema')
         assert schema is not None
 
     def test_should_be_able_to_get_results_from_query(self):
@@ -608,16 +608,19 @@ def test_should_properly_handle_timestamp_unix_epoch(self):
         query = 'SELECT TIMESTAMP("1970-01-01 00:00:00") AS unix_epoch'
         df = gbq.read_gbq(query, project_id=_get_project_id(),
                           private_key=_get_private_key_path())
-        tm.assert_frame_equal(df, DataFrame(
-            {'unix_epoch': [np.datetime64('1970-01-01T00:00:00.000000Z')]}))
+        expected = DataFrame({'unix_epoch': 
+                               [np.datetime64('1970-01-01T00:00:00.000000Z')]})
+        tm.assert_frame_equal(df, to_datetime(expected.unix_epoch).dt
+                                              .tz_localize('UTC').to_frame())
 
     def test_should_properly_handle_arbitrary_timestamp(self):
         query = 'SELECT TIMESTAMP("2004-09-15 05:00:00") AS valid_timestamp'
         df = gbq.read_gbq(query, project_id=_get_project_id(),
                           private_key=_get_private_key_path())
-        tm.assert_frame_equal(df, DataFrame({
-            'valid_timestamp': [np.datetime64('2004-09-15T05:00:00.000000Z')]
-        }))
+        expected = DataFrame({'valid_timestamp': 
+                               [np.datetime64('2004-09-15T05:00:00.000000Z')]})
+        tm.assert_frame_equal(df, to_datetime(expected.valid_timestamp).dt
+                                              .tz_localize('UTC').to_frame())
 
     def test_should_properly_handle_null_timestamp(self):
         query = 'SELECT TIMESTAMP(NULL) AS null_timestamp'
@@ -711,7 +714,7 @@ def test_column_order_plus_index(self):
 
     def test_read_gbq_raises_invalid_index_column(self):
         query = "SELECT 'a' AS string_1, 'b' AS string_2, 'c' AS string_3"
-        col_order = ['string_3', 'string_2']
+        col_order = ['string_3', 'string_2', 'string_1']
 
         # Column string_bbb does not exist. Should raise InvalidIndexColumn
         with pytest.raises(gbq.InvalidIndexColumn):
@@ -720,18 +723,18 @@ def test_read_gbq_raises_invalid_index_column(self):
                          private_key=_get_private_key_path())
 
     def test_malformed_query(self):
-        with pytest.raises(gbq.GenericGBQException):
+        with pytest.raises(BadRequest):
             gbq.read_gbq("SELCET * FORM [publicdata:samples.shakespeare]",
                          project_id=_get_project_id(),
                          private_key=_get_private_key_path())
 
     def test_bad_project_id(self):
-        with pytest.raises(gbq.GenericGBQException):
+        with pytest.raises(NotFound):
             gbq.read_gbq("SELECT 1", project_id='001',
                          private_key=_get_private_key_path())
 
     def test_bad_table_name(self):
-        with pytest.raises(gbq.GenericGBQException):
+        with pytest.raises(NotFound):
             gbq.read_gbq("SELECT * FROM [publicdata:samples.nope]",
                          project_id=_get_project_id(),
                          private_key=_get_private_key_path())
@@ -760,14 +763,15 @@ def test_zero_rows(self):
                          ('is_bot', np.dtype(bool)), ('ts', 'M8[ns]')])
         expected_result = DataFrame(
             page_array, columns=['title', 'id', 'is_bot', 'ts'])
-        tm.assert_frame_equal(df, expected_result)
+        tm.assert_frame_equal(expected_result.astype(object),
+                              df.reset_index(drop=True).astype(object))
 
     def test_legacy_sql(self):
         legacy_sql = "SELECT id FROM [publicdata.samples.wikipedia] LIMIT 10"
 
         # Test that a legacy sql statement fails when
         # setting dialect='standard'
-        with pytest.raises((RuntimeError,BadRequest)):
+        with pytest.raises((RuntimeError, BadRequest)):
             gbq.read_gbq(legacy_sql, project_id=_get_project_id(),
                          dialect='standard',
                          private_key=_get_private_key_path())
@@ -785,7 +789,7 @@ def test_standard_sql(self):
 
         # Test that a standard sql statement fails when using
         # the legacy SQL dialect (default value)
-        with pytest.raises((RuntimeError,BadRequest)):
+        with pytest.raises((RuntimeError, BadRequest)):
             gbq.read_gbq(standard_sql, project_id=_get_project_id(),
                          private_key=_get_private_key_path())
 
@@ -816,7 +820,7 @@ def test_query_with_parameters(self):
         config = {"use_legacy_sql": False}
         # Test that a query that relies on parameters fails
         # when parameters are not supplied via configuration
-        with pytest.raises((RuntimeError,BadRequest)):
+        with pytest.raises((RuntimeError, BadRequest)):
             gbq.read_gbq(sql_statement, project_id=_get_project_id(),
                          private_key=_get_private_key_path())
 
@@ -824,10 +828,10 @@ def test_query_with_parameters(self):
         # the correct query parameters via the 'config' and query_parameters
         # option
         df = gbq.read_gbq(sql_statement, project_id=_get_project_id(),
-                          configuration = config,
-                          query_parameters = (bigquery.ScalarQueryParameter(
+                          configuration=config,
+                          query_parameters=(bigquery.ScalarQueryParameter(
                                                       'param1', 'INT64', 1),
-                                              bigquery.ScalarQueryParameter(
+                                            bigquery.ScalarQueryParameter(
                                                       'param2', 'INT64', 2)),
                           private_key=_get_private_key_path())
         tm.assert_frame_equal(df, DataFrame({'valid_result': [3]}))

From 347bca21bf1544a92adff648b102180922870a4c Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Fri, 6 Oct 2017 11:40:58 -0400
Subject: [PATCH 19/42] Pin bigquery to 0.26.0 and more linting

Fix pinning in conda ci requirements

Give up trying to pin in conda

Clean up the read_gbq doc a little
---
 ci/requirements-2.7-0.19.2.pip   |  2 +-
 ci/requirements-3.5-0.18.1.pip   |  2 +-
 ci/requirements-3.6-0.20.1.conda |  2 +-
 ci/requirements-3.6-MASTER.pip   |  2 +-
 pandas_gbq/gbq.py                | 29 ++++++++++++++---------------
 pandas_gbq/tests/test_gbq.py     | 20 +++++++++-----------
 requirements.txt                 |  2 +-
 setup.py                         |  2 +-
 8 files changed, 29 insertions(+), 32 deletions(-)

diff --git a/ci/requirements-2.7-0.19.2.pip b/ci/requirements-2.7-0.19.2.pip
index 22b763cf..4a90785a 100644
--- a/ci/requirements-2.7-0.19.2.pip
+++ b/ci/requirements-2.7-0.19.2.pip
@@ -5,4 +5,4 @@ google-auth-oauthlib
 PyCrypto
 python-gflags
 mock
-google-cloud-bigquery>=0.25.0,<=0.26.0
+google-cloud-bigquery==0.26.0
diff --git a/ci/requirements-3.5-0.18.1.pip b/ci/requirements-3.5-0.18.1.pip
index 19ca6227..9c94dbbb 100644
--- a/ci/requirements-3.5-0.18.1.pip
+++ b/ci/requirements-3.5-0.18.1.pip
@@ -3,4 +3,4 @@ google-auth==1.0.0
 google-auth-httplib2==0.0.1
 google-auth-oauthlib==0.0.1
 mock
-google-cloud-bigquery>=0.25.0,<=0.26.0
+google-cloud-bigquery==0.26.0
diff --git a/ci/requirements-3.6-0.20.1.conda b/ci/requirements-3.6-0.20.1.conda
index 7ca942e4..3ee89b92 100644
--- a/ci/requirements-3.6-0.20.1.conda
+++ b/ci/requirements-3.6-0.20.1.conda
@@ -3,4 +3,4 @@ google-auth
 google-auth-httplib2
 google-auth-oauthlib
 mock
-google-cloud-bigquery>=0.25.0,<=0.26.0
+google-cloud-bigquery
diff --git a/ci/requirements-3.6-MASTER.pip b/ci/requirements-3.6-MASTER.pip
index 7ca942e4..96ab0aff 100644
--- a/ci/requirements-3.6-MASTER.pip
+++ b/ci/requirements-3.6-MASTER.pip
@@ -3,4 +3,4 @@ google-auth
 google-auth-httplib2
 google-auth-oauthlib
 mock
-google-cloud-bigquery>=0.25.0,<=0.26.0
+google-cloud-bigquery==0.26.0
diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 2ad48b45..bcf2b610 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -708,7 +708,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
     <https://googlecloudplatform.github.io/google-cloud-python/stable/google-
     cloud-auth.html>
 
-    The easiest is to generate user credentials via
+    One method is to generate user credentials via
     `gcloud auth application-default login` <https://cloud.google.com/sdk/
     gcloud/reference/auth/application-default/login> and point to it using an
     environment variable:
@@ -717,10 +717,11 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
     You can also download a service account private key JSON file and pass the
     path to the file to the private_key paramater.
 
-    As a final alternative, you can also set auth_local_webserver to True,
-    which will trigger a pop-up through which a user can auth with their Google
-    account. This will generate a user credentials file, which is saved locally
-    and can be re-used in the future.
+    If default credentials are not located and a private key is not passed,
+    an auth flow will begin where a user can auth via a link or via a pop-up
+    through which a user can auth with their Google account. This will
+    generate a user credentials file, which is saved locally and can be re-used
+    in the future.
 
     Parameters
     ----------
@@ -799,7 +800,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
 
     if dialect not in ('legacy', 'standard'):
         raise ValueError("'{0}' is not valid for dialect".format(dialect))
-    if configuration and any(key in configuration for key in 
+    if configuration and any(key in configuration for key in
                              ["query", "copy", "load", "extract"]):
         raise ValueError("New API handles configuration settings differently")
 
@@ -870,7 +871,7 @@ def async_query():
         _wait_for_job(query_job)
         if verbose:
             print("Query done.")
-            if query_job._properties["statistics"]["query"].get("cacheHit", 
+            if query_job._properties["statistics"]["query"].get("cacheHit",
                                                                 False):
                 print("Cache hit.")
             elif ("statistics" in query_job._properties and
@@ -903,9 +904,9 @@ def async_query():
             print("Finished at %s." % datetime.now()
                   .strftime('%Y-%m-%d %H:%M:%S'))
 
-    if return_type=='schema':
+    if return_type == 'schema':
         return query_results.schema
-    elif return_type=='list':
+    elif return_type == 'list':
         return rows
 
     columns = [field.name for field in query_results.schema]
@@ -916,10 +917,10 @@ def async_query():
     # Manual field type conversion. Inserted to handle tests
     # with only null rows, otherwise type conversion works automatically
     for field in query_results.schema:
-        if field.field_type=='TIMESTAMP':
+        if field.field_type == 'TIMESTAMP':
             if final_df[field.name].isnull().values.all():
                 final_df[field.name] = to_datetime(final_df[field.name])
-        if field.field_type=='FLOAT':
+        if field.field_type == 'FLOAT':
             if final_df[field.name].isnull().values.all():
                 final_df[field.name] = to_numeric(final_df[field.name])
 
@@ -930,8 +931,7 @@ def async_query():
         else:
             raise InvalidIndexColumn(
                 'Index column "{0}" does not exist in DataFrame.'
-                .format(index_col)
-        )
+                .format(index_col))
 
     # Change the order of columns in the DataFrame based on provided list
     if col_order:
@@ -939,8 +939,7 @@ def async_query():
             final_df = final_df[col_order]
         else:
             raise InvalidColumnOrder(
-                'Column order does not match this DataFrame.'
-        )
+                'Column order does not match this DataFrame.')
 
     return final_df
 
diff --git a/pandas_gbq/tests/test_gbq.py b/pandas_gbq/tests/test_gbq.py
index 1b11e3be..f75ef49c 100644
--- a/pandas_gbq/tests/test_gbq.py
+++ b/pandas_gbq/tests/test_gbq.py
@@ -608,19 +608,19 @@ def test_should_properly_handle_timestamp_unix_epoch(self):
         query = 'SELECT TIMESTAMP("1970-01-01 00:00:00") AS unix_epoch'
         df = gbq.read_gbq(query, project_id=_get_project_id(),
                           private_key=_get_private_key_path())
-        expected = DataFrame({'unix_epoch': 
-                               [np.datetime64('1970-01-01T00:00:00.000000Z')]})
+        expected = DataFrame({'unix_epoch':
+                             [np.datetime64('1970-01-01T00:00:00.000000Z')]})
         tm.assert_frame_equal(df, to_datetime(expected.unix_epoch).dt
-                                              .tz_localize('UTC').to_frame())
+                              .tz_localize('UTC').to_frame())
 
     def test_should_properly_handle_arbitrary_timestamp(self):
         query = 'SELECT TIMESTAMP("2004-09-15 05:00:00") AS valid_timestamp'
         df = gbq.read_gbq(query, project_id=_get_project_id(),
                           private_key=_get_private_key_path())
-        expected = DataFrame({'valid_timestamp': 
-                               [np.datetime64('2004-09-15T05:00:00.000000Z')]})
+        expected = DataFrame({'valid_timestamp':
+                             [np.datetime64('2004-09-15T05:00:00.000000Z')]})
         tm.assert_frame_equal(df, to_datetime(expected.valid_timestamp).dt
-                                              .tz_localize('UTC').to_frame())
+                              .tz_localize('UTC').to_frame())
 
     def test_should_properly_handle_null_timestamp(self):
         query = 'SELECT TIMESTAMP(NULL) AS null_timestamp'
@@ -830,9 +830,9 @@ def test_query_with_parameters(self):
         df = gbq.read_gbq(sql_statement, project_id=_get_project_id(),
                           configuration=config,
                           query_parameters=(bigquery.ScalarQueryParameter(
-                                                      'param1', 'INT64', 1),
+                                            'param1', 'INT64', 1),
                                             bigquery.ScalarQueryParameter(
-                                                      'param2', 'INT64', 2)),
+                                            'param2', 'INT64', 2)),
                           private_key=_get_private_key_path())
         tm.assert_frame_equal(df, DataFrame({'valid_result': [3]}))
 
@@ -861,9 +861,7 @@ def test_configuration_without_query(self):
 
     def test_timeout_configuration(self):
         sql_statement = 'SELECT 1'
-        config = {
-                "timeout_ms": 1
-        }
+        config = {"timeout_ms": 1}
         # Test that QueryTimeout error raises
         with pytest.raises(gbq.QueryTimeout):
             gbq.read_gbq(sql_statement, project_id=_get_project_id(),
diff --git a/requirements.txt b/requirements.txt
index 731224d2..f7f2cd1a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,5 +4,5 @@ google-api-python-client
 google-auth
 google-auth-httplib2
 google-auth-oauthlib
-google-cloud-bigquery>=0.25.0,<=0.26.0
+google-cloud-bigquery==0.26.0
 uuid
diff --git a/setup.py b/setup.py
index b6cdf68d..ba3e5e3e 100644
--- a/setup.py
+++ b/setup.py
@@ -24,7 +24,7 @@ def readme():
     'google-auth>=1.0.0',
     'google-auth-httplib2>=0.0.1',
     'google-auth-oauthlib>=0.0.1',
-    'google-cloud-bigquery>=0.25.0,<=0.26.0',
+    'google-cloud-bigquery==0.26.0',
 ]
 
 

From fc6134ab7b4a5fcc3b5df2bf4f1fbe3ecffad28d Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Sat, 7 Oct 2017 16:29:25 -0400
Subject: [PATCH 20/42] Remove uuid from requirements

---
 requirements.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index f7f2cd1a..f49120c2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,4 +5,3 @@ google-auth
 google-auth-httplib2
 google-auth-oauthlib
 google-cloud-bigquery==0.26.0
-uuid

From 61db19190cdfe690298288cf90e0d62a5b73e95d Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Mon, 9 Oct 2017 09:58:14 -0400
Subject: [PATCH 21/42] Clean up/revert some of the documentation

---
 pandas_gbq/gbq.py | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index bcf2b610..66fb21a0 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -727,7 +727,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
     ----------
     query : str
         SQL-Like Query to return data values
-    project_id : str (optional)
+    project_id : str
         Google BigQuery Account project ID.
     index_col : str (optional)
         Name of result column to use for index in results DataFrame
@@ -740,22 +740,27 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
     verbose : boolean (default True)
         Verbose output
     private_key : str (optional)
-        Path to service account private key in JSON format. If none is
-        provided, will default to the GOOGLE_APPLICATION_CREDENTIALS
-        environment variable or another form of authentication (see above)
-    auth_local_webserver : boolean, default False (optional)
+        Service account private key in JSON format. Can be file path
+        or string contents. This is useful for remote server
+        authentication (eg. jupyter iPython notebook on remote host)
+    auth_local_webserver : boolean, default False
         Use the [local webserver flow] instead of the [console flow] when
         getting user credentials. A file named bigquery_credentials.dat will
-        be created in ~/.config/pandas_gbq/. You can also set
-        PANDAS_GBQ_CREDENTIALS_FILE environment variable so as to define a
-        specific path to store this credential (eg. /etc/keys/bigquery.dat).
+        be created in current dir. You can also set PANDAS_GBQ_CREDENTIALS_FILE
+        environment variable so as to define a specific path to store this
+        credential (eg. /etc/keys/bigquery.dat).
+        .. [local webserver flow]
+            http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server
+        .. [console flow]
+            http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console
+        .. versionadded:: 0.2.0
     dialect : {'legacy', 'standard'}, default 'legacy'
         'legacy' : Use BigQuery's legacy SQL dialect.
         'standard' : Use BigQuery's standard SQL (beta), which is
         compliant with the SQL 2011 standard. For more information
         see `BigQuery SQL Reference
         <https://cloud.google.com/bigquery/sql-reference/>`
-    credentials: credentials object (default None)
+    credentials: credentials object, default None (optional)
         If generating credentials on your own, pass in. Otherwise, will attempt
         to generate automatically
     return_type: {'schema','list','df'}, default 'df'

From 28c0ae72a03aa526a78acfb7635a4d7680f2af98 Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Fri, 27 Oct 2017 13:24:21 -0400
Subject: [PATCH 22/42] Unpin in testing requirements and expand config error
 message

---
 ci/requirements-2.7-0.19.2.pip |  2 +-
 ci/requirements-3.6-MASTER.pip |  2 +-
 pandas_gbq/gbq.py              | 15 ++++++++++++---
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/ci/requirements-2.7-0.19.2.pip b/ci/requirements-2.7-0.19.2.pip
index 4a90785a..a31accd7 100644
--- a/ci/requirements-2.7-0.19.2.pip
+++ b/ci/requirements-2.7-0.19.2.pip
@@ -5,4 +5,4 @@ google-auth-oauthlib
 PyCrypto
 python-gflags
 mock
-google-cloud-bigquery==0.26.0
+google-cloud-bigquery
diff --git a/ci/requirements-3.6-MASTER.pip b/ci/requirements-3.6-MASTER.pip
index 96ab0aff..3ee89b92 100644
--- a/ci/requirements-3.6-MASTER.pip
+++ b/ci/requirements-3.6-MASTER.pip
@@ -3,4 +3,4 @@ google-auth
 google-auth-httplib2
 google-auth-oauthlib
 mock
-google-cloud-bigquery==0.26.0
+google-cloud-bigquery
diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 66fb21a0..65070bb2 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -12,7 +12,6 @@
 from pandas.compat import bytes_to_str
 from google.cloud import bigquery
 
-
 def _check_google_client_version():
 
     try:
@@ -789,7 +788,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
             use_legacy_sql, dry_run, write_disposition, udf_resources,
             maximum_billing_tier, maximum_bytes_billed
             <http://google-cloud-python.readthedocs.io/en/latest/_modules/
-            google/cloud/bigquery/job.html?highlight=_AsyncQueryConfiguration>
+            google/cloud/bigquery/job.html?highlight=QueryJobConfig>
     timeout_ms: int (optional) If set or found in config, triggers a sync query
         that times out with no results if it can't be completed in the time
         desired
@@ -802,12 +801,22 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
         DataFrame representing results of query
 
     """
+    _test_google_api_imports()
+
+    if not project_id:
+        raise TypeError("Missing required parameter: project_id")
 
     if dialect not in ('legacy', 'standard'):
         raise ValueError("'{0}' is not valid for dialect".format(dialect))
     if configuration and any(key in configuration for key in
                              ["query", "copy", "load", "extract"]):
-        raise ValueError("New API handles configuration settings differently")
+        raise ValueError("The Google Cloud BigQuery API handles configuration "
+            "settings differently. There are now a discrete set of query "
+            "settings one can set by passing in a dictionary, e.g.: "
+            "`configuration={'maximum_billing_tier':2}`. See "
+            "http://google-cloud-python.readthedocs.io/en/latest/_modules/"
+            "google/cloud/bigquery/job.html?highlight=QueryJobConfig "
+            "for allowable paramaters.")
 
     def _wait_for_job(job):
         while True:

From cd75cda5d427f19652012e29bf6e30b66c87febb Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Fri, 27 Oct 2017 13:26:56 -0400
Subject: [PATCH 23/42] Include 0.27 in setup.py install_requires

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index ba3e5e3e..327c983d 100644
--- a/setup.py
+++ b/setup.py
@@ -24,7 +24,7 @@ def readme():
     'google-auth>=1.0.0',
     'google-auth-httplib2>=0.0.1',
     'google-auth-oauthlib>=0.0.1',
-    'google-cloud-bigquery==0.26.0',
+    'google-cloud-bigquery>=0.26.0,<0.28.0',
 ]
 
 

From 62641775cc43c7bc786b303fd8a057da342ea625 Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Fri, 27 Oct 2017 15:16:28 -0400
Subject: [PATCH 24/42] Initial run_query function for modularizing read_gbq

---
 pandas_gbq/gbq.py | 197 +++++++++++++++++++++++++---------------------
 1 file changed, 109 insertions(+), 88 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 65070bb2..a5b0c892 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -12,6 +12,7 @@
 from pandas.compat import bytes_to_str
 from google.cloud import bigquery
 
+
 def _check_google_client_version():
 
     try:
@@ -689,6 +690,95 @@ def sizeof_fmt(num, suffix='B'):
     return fmt % (num, 'Y', suffix)
 
 
+def run_query(query, client, dialect, query_parameters, configuration, verbose,
+              async=True):
+    def _wait_for_job(job):
+        while True:
+            job.reload()  # Refreshes the state via a GET request.
+            if job.state == 'DONE':
+                if job.error_result:
+                    raise RuntimeError(job.errors)
+                return
+            time.sleep(1)
+
+    def _set_common_query_settings(query_job):
+        if dialect == 'legacy':
+            query_job.use_legacy_sql = True
+        elif dialect == 'standard':
+            query_job.use_legacy_sql = False
+
+        if configuration:
+            for setting, value in configuration.items():
+                setattr(query_job, setting, value)
+        return query_job
+
+    def sync_query():
+        query_job = client.run_sync_query(query,
+                                          query_parameters=query_parameters)
+        query_job = _set_common_query_settings(query_job)
+        if verbose:
+            print("Query running...")
+        query_job.run()
+        if not query_job._properties.get("jobComplete", False):
+            raise QueryTimeout("Sync query timed out")
+        if verbose:
+            print("Query done.")
+            if query_job._properties.get("cacheHit", False):
+                print("Cache hit.")
+            else:
+                bytes_billed = int(query_job._properties
+                                   .get("totalBytesProcessed", 0))
+                bytes_processed = int(query_job._properties
+                                      .get("totalBytesBilled", 0))
+                print("Total bytes billed (processed): %s (%s)" %
+                      (sizeof_fmt(bytes_billed), sizeof_fmt(bytes_processed)))
+            print("\nRetrieving results...")
+        return query_job, None
+
+    def async_query():
+        query_job = client.run_async_query(str(uuid.uuid4()),
+                                           query,
+                                           query_parameters=query_parameters)
+        query_job = _set_common_query_settings(query_job)
+        query_job.begin()
+        try:
+            query_results = query_job.results().fetch_data()
+        except:
+            query_results = query_job.result().fetch_data()
+        if verbose:
+            print("Query running...")
+        _wait_for_job(query_job)
+        if verbose:
+            print("Query done.")
+            if query_job._properties["statistics"]["query"].get("cacheHit",
+                                                                False):
+                print("Cache hit.")
+            elif ("statistics" in query_job._properties and
+                    "query" in query_job._properties["statistics"]):
+                bytes_billed = int(query_job
+                                   ._properties["statistics"]["query"]
+                                   .get("totalBytesProcessed", 0))
+                bytes_processed = int(query_job
+                                      ._properties["statistics"]["query"]
+                                      .get("totalBytesBilled", 0))
+                print("Total bytes billed (processed): %s (%s)" %
+                      (sizeof_fmt(bytes_billed), sizeof_fmt(bytes_processed)))
+            print("\nRetrieving results...")
+        return query_results, query_job
+
+    if async:
+        query_results, query_job = async_query()
+        rows = list(query_results)
+    else:
+        query_results, query_job = sync_query()
+        rows = list(query_results.rows)
+
+    columns = [field.name for field in query_results.schema]
+    schema = query_results.schema
+
+    return query_results, query_job, rows, schema, columns
+
+
 def read_gbq(query, project_id=None, index_col=None, col_order=None,
              reauth=False, verbose=True, private_key=None,
              auth_local_webserver=False, dialect='legacy', credentials=None,
@@ -818,15 +908,6 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
             "google/cloud/bigquery/job.html?highlight=QueryJobConfig "
             "for allowable paramaters.")
 
-    def _wait_for_job(job):
-        while True:
-            job.reload()  # Refreshes the state via a GET request.
-            if job.state == 'DONE':
-                if job.error_result:
-                    raise RuntimeError(job.errors)
-                return
-            time.sleep(1)
-
     if credentials is None:
         credentials = GbqConnector(project_id=project_id,
                                    reauth=reauth,
@@ -834,84 +915,27 @@ def _wait_for_job(job):
                                    private_key=private_key).credentials
     client = bigquery.Client(project=project_id, credentials=credentials)
 
-    def _set_common_query_settings(query_job):
-        if dialect == 'legacy':
-            query_job.use_legacy_sql = True
-        elif dialect == 'standard':
-            query_job.use_legacy_sql = False
-
-        if configuration:
-            for setting, value in configuration.items():
-                setattr(query_job, setting, value)
-        return query_job
-
-    def sync_query():
-        query_job = client.run_sync_query(query,
-                                          query_parameters=query_parameters)
-        query_job = _set_common_query_settings(query_job)
-        if verbose:
-            print("Query running...")
-        if timeout_ms:
-            query_job.timeout_ms = timeout_ms
-        query_job.run()
-        if not query_job._properties.get("jobComplete", False):
-            raise QueryTimeout("Sync query timed out")
-        if verbose:
-            print("Query done.")
-            if query_job._properties.get("cacheHit", False):
-                print("Cache hit.")
-            else:
-                bytes_billed = int(query_job._properties
-                                   .get("totalBytesProcessed", 0))
-                bytes_processed = int(query_job._properties
-                                      .get("totalBytesBilled", 0))
-                print("Total bytes billed (processed): %s (%s)" %
-                      (sizeof_fmt(bytes_billed), sizeof_fmt(bytes_processed)))
-            print("\nRetrieving results...")
-        return query_job, None
-
-    def async_query():
-        query_job = client.run_async_query(str(uuid.uuid4()),
-                                           query,
-                                           query_parameters=query_parameters)
-        query_job = _set_common_query_settings(query_job)
-        query_job.begin()
-        try:
-            query_results = query_job.results().fetch_data()
-        except:
-            query_results = query_job.result().fetch_data()
-        if verbose:
-            print("Query running...")
-        _wait_for_job(query_job)
-        if verbose:
-            print("Query done.")
-            if query_job._properties["statistics"]["query"].get("cacheHit",
-                                                                False):
-                print("Cache hit.")
-            elif ("statistics" in query_job._properties and
-                    "query" in query_job._properties["statistics"]):
-                bytes_billed = int(query_job
-                                   ._properties["statistics"]["query"]
-                                   .get("totalBytesProcessed", 0))
-                bytes_processed = int(query_job
-                                      ._properties["statistics"]["query"]
-                                      .get("totalBytesBilled", 0))
-                print("Total bytes billed (processed): %s (%s)" %
-                      (sizeof_fmt(bytes_billed), sizeof_fmt(bytes_processed)))
-            print("\nRetrieving results...")
-        return query_results, query_job
-
-    if (configuration and "timeout_ms" in configuration) or timeout_ms:
-        query_results, query_job = sync_query()
-        rows = list(query_results.rows)
-        total_rows = len(rows)
+    
+    if timeout_ms:
+        configuration['timeout_ms'] = timeout_ms
+    if (configuration and "timeout_ms" in configuration):
+        query_results, query_job, rows, schame, columns = run_query(query, 
+                                                   client,
+                                                   dialect, 
+                                                   query_parameters,
+                                                   configuration,
+                                                   verbose,
+                                                   async=False)
     else:
-        query_results, query_job = async_query()
-        rows = list(query_results)
-        total_rows = len(rows)
+        query_results, query_job, rows, schema, columns = run_query(query, 
+                                                   client,
+                                                   dialect,
+                                                   query_parameters,
+                                                   configuration,
+                                                   verbose)
 
     if verbose:
-        print("Got %s rows.") % total_rows
+        print("Got %s rows.") % len(rows)
         if query_job:
             print("\nTotal time taken %ss" % (datetime.utcnow() -
                   query_job.created.replace(tzinfo=None)).seconds)
@@ -923,10 +947,7 @@ def async_query():
     elif return_type == 'list':
         return rows
 
-    columns = [field.name for field in query_results.schema]
-    data = rows
-
-    final_df = DataFrame(data=data, columns=columns)
+    final_df = DataFrame(data=rows, columns=columns)
 
     # Manual field type conversion. Inserted to handle tests
     # with only null rows, otherwise type conversion works automatically

From 208e39f4e4e8d28d3946da0b96057d778594ae2d Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Fri, 27 Oct 2017 16:28:05 -0400
Subject: [PATCH 25/42] Move schema handling and creating the df to separate
 functions, update check schema test

---
 pandas_gbq/gbq.py            | 133 +++++++++++++++++------------------
 pandas_gbq/tests/test_gbq.py |  14 ++--
 2 files changed, 71 insertions(+), 76 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index a5b0c892..e6d854fc 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -690,8 +690,8 @@ def sizeof_fmt(num, suffix='B'):
     return fmt % (num, 'Y', suffix)
 
 
-def run_query(query, client, dialect, query_parameters, configuration, verbose,
-              async=True):
+def run_query(query, client, dialect='legacy', query_parameters=(), 
+              configuration=None, verbose=True, async=True):
     def _wait_for_job(job):
         while True:
             job.reload()  # Refreshes the state via a GET request.
@@ -766,6 +766,15 @@ def async_query():
             print("\nRetrieving results...")
         return query_results, query_job
 
+    def get_columns_schema(query_results):
+        schema = [{"name":f.name,
+                   "field_type":f.field_type,
+                   "mode":f.mode,
+                   "fields":f.fields,
+                   "description":f.description} for f in query_results.schema]
+        columns = [field["name"] for field in schema]
+        return columns, schema
+
     if async:
         query_results, query_job = async_query()
         rows = list(query_results)
@@ -773,16 +782,23 @@ def async_query():
         query_results, query_job = sync_query()
         rows = list(query_results.rows)
 
-    columns = [field.name for field in query_results.schema]
-    schema = query_results.schema
+    columns, schema = get_columns_schema(query_results)
 
-    return query_results, query_job, rows, schema, columns
+    if verbose:
+        print("Got %s rows.") % len(rows)
+        if query_job:
+            print("\nTotal time taken %ss" % (datetime.utcnow() -
+                  query_job.created.replace(tzinfo=None)).seconds)
+            print("Finished at %s." % datetime.now()
+                  .strftime('%Y-%m-%d %H:%M:%S'))
+
+    return rows, columns, schema
 
 
 def read_gbq(query, project_id=None, index_col=None, col_order=None,
              reauth=False, verbose=True, private_key=None,
              auth_local_webserver=False, dialect='legacy', credentials=None,
-             return_type='df', query_parameters=(), configuration=None,
+             query_parameters=(), configuration=None,
              timeout_ms=None, **kwargs):
     r"""Load data from Google BigQuery using google-cloud-python
 
@@ -852,13 +868,6 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
     credentials: credentials object, default None (optional)
         If generating credentials on your own, pass in. Otherwise, will attempt
         to generate automatically
-    return_type: {'schema','list','df'}, default 'df'
-        schema returns an array of SchemaField objects, which you can access
-            `from pprint import pprint
-            [pprint(vars(field)) for field in schema]`
-        list returns a list of lists of the rows of the results; column names
-            are not included
-        df returns a dataframe by default
     query_parameters: tuple (optional) Can only be used in Standard SQL
         example: gbq.read_gbq("SELECT @param1 + @param2",
                           query_parameters = (bigquery.ScalarQueryParameter(
@@ -891,6 +900,39 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
         DataFrame representing results of query
 
     """
+
+    def _create_df(rows, columns, schema, index_col, col_order):
+        df = DataFrame(data=rows, columns=columns)
+
+        # Manual field type conversion. Inserted to handle tests
+        # with only null rows, otherwise type conversion works automatically
+        for field in schema:
+            if field["field_type"] == 'TIMESTAMP':
+                if df[field["name"]].isnull().values.all():
+                    df[field["name"]] = to_datetime(df[field["name"]])
+            if field["field_type"] == 'FLOAT':
+                if df[field["name"]].isnull().values.all():
+                    df[field["name"]] = to_numeric(df[field["name"]])
+
+        # Reindex the DataFrame on the provided column
+        if index_col:
+            if index_col in df.columns:
+                df.set_index(index_col, inplace=True)
+            else:
+                raise InvalidIndexColumn(
+                    'Index column "{0}" does not exist in DataFrame.'
+                    .format(index_col))
+
+        # Change the order of columns in the DataFrame based on provided list
+        if col_order:
+            if sorted(col_order) == sorted(df.columns):
+                df = df[col_order]
+            else:
+                raise InvalidColumnOrder(
+                    'Column order does not match this DataFrame.')
+
+        return df
+
     _test_google_api_imports()
 
     if not project_id:
@@ -914,69 +956,22 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
                                    auth_local_webserver=auth_local_webserver,
                                    private_key=private_key).credentials
     client = bigquery.Client(project=project_id, credentials=credentials)
-
     
     if timeout_ms:
         configuration['timeout_ms'] = timeout_ms
     if (configuration and "timeout_ms" in configuration):
-        query_results, query_job, rows, schame, columns = run_query(query, 
-                                                   client,
-                                                   dialect, 
-                                                   query_parameters,
-                                                   configuration,
-                                                   verbose,
-                                                   async=False)
+        rows, columns, schema = run_query(query, client, dialect, 
+                                          query_parameters, configuration,
+                                          verbose,
+                                          async=False)
     else:
-        query_results, query_job, rows, schema, columns = run_query(query, 
-                                                   client,
-                                                   dialect,
-                                                   query_parameters,
-                                                   configuration,
-                                                   verbose)
-
-    if verbose:
-        print("Got %s rows.") % len(rows)
-        if query_job:
-            print("\nTotal time taken %ss" % (datetime.utcnow() -
-                  query_job.created.replace(tzinfo=None)).seconds)
-            print("Finished at %s." % datetime.now()
-                  .strftime('%Y-%m-%d %H:%M:%S'))
+        rows, columns, schema = run_query(query, client, dialect,
+                                          query_parameters, configuration,
+                                          verbose)
 
-    if return_type == 'schema':
-        return query_results.schema
-    elif return_type == 'list':
-        return rows
-
-    final_df = DataFrame(data=rows, columns=columns)
-
-    # Manual field type conversion. Inserted to handle tests
-    # with only null rows, otherwise type conversion works automatically
-    for field in query_results.schema:
-        if field.field_type == 'TIMESTAMP':
-            if final_df[field.name].isnull().values.all():
-                final_df[field.name] = to_datetime(final_df[field.name])
-        if field.field_type == 'FLOAT':
-            if final_df[field.name].isnull().values.all():
-                final_df[field.name] = to_numeric(final_df[field.name])
-
-    # Reindex the DataFrame on the provided column
-    if index_col:
-        if index_col in final_df.columns:
-            final_df.set_index(index_col, inplace=True)
-        else:
-            raise InvalidIndexColumn(
-                'Index column "{0}" does not exist in DataFrame.'
-                .format(index_col))
-
-    # Change the order of columns in the DataFrame based on provided list
-    if col_order:
-        if sorted(col_order) == sorted(final_df.columns):
-            final_df = final_df[col_order]
-        else:
-            raise InvalidColumnOrder(
-                'Column order does not match this DataFrame.')
+    df = _create_df(rows, columns, schema, index_col, col_order)
 
-    return final_df
+    return df
 
 
 def to_gbq(dataframe, destination_table, project_id, chunksize=10000,
diff --git a/pandas_gbq/tests/test_gbq.py b/pandas_gbq/tests/test_gbq.py
index f75ef49c..9402f614 100644
--- a/pandas_gbq/tests/test_gbq.py
+++ b/pandas_gbq/tests/test_gbq.py
@@ -188,6 +188,8 @@ def setup_method(self, method):
 
         self.sut = gbq.GbqConnector(
             _get_project_id(), auth_local_webserver=True)
+        self.client = bigquery.Client(project=_get_project_id(),
+                                      credentials=self.sut.credentials)
 
     def test_should_be_able_to_make_a_connector(self):
         assert self.sut is not None, 'Could not create a GbqConnector'
@@ -202,10 +204,7 @@ def test_should_be_able_to_get_a_bigquery_service(self):
 
     def test_should_be_able_to_get_schema_from_query(self):
         credentials = self.sut.credentials
-        schema = gbq.read_gbq('SELECT 1',
-                              project_id=_get_project_id(),
-                              credentials=credentials,
-                              return_type='schema')
+        schema = gbq.run_query('SELECT 1', client=self.client)
         assert schema is not None
 
     def test_should_be_able_to_get_results_from_query(self):
@@ -258,6 +257,9 @@ def setup_method(self, method):
 
         self.sut = gbq.GbqConnector(_get_project_id(),
                                     private_key=_get_private_key_path())
+        credentials = self.sut.get_credentials()
+        self.client = bigquery.Client(project=_get_project_id(),
+                                      credentials=self.sut.credentials)
 
     def test_should_be_able_to_make_a_connector(self):
         assert self.sut is not None
@@ -272,9 +274,7 @@ def test_should_be_able_to_get_a_bigquery_service(self):
 
     def test_should_be_able_to_get_schema_from_query(self):
         credentials = self.sut.credentials
-        schema = gbq.read_gbq('SELECT 1', project_id=_get_project_id(),
-                              credentials=credentials,
-                              return_type='schema')
+        schema = gbq.run_query('SELECT 1', client=self.client)
         assert schema is not None
 
     def test_should_be_able_to_get_results_from_query(self):

From 2cd32c7f4c8965ea329f190da978f4cc23201eb7 Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Fri, 27 Oct 2017 16:48:31 -0400
Subject: [PATCH 26/42] Linting

More linting
---
 pandas_gbq/gbq.py            | 39 ++++++++++++++++++------------------
 pandas_gbq/tests/test_gbq.py |  6 +-----
 2 files changed, 21 insertions(+), 24 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index e6d854fc..6ffe1789 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -507,7 +507,7 @@ def process_insert_errors(self, insert_errors):
     def load_data(self, dataframe, dataset_id, table_id, chunksize):
         try:
             from googleapiclient.errors import HttpError
-        except:
+        except ImportError:
             from apiclient.errors import HttpError
 
         job_id = uuid.uuid4().hex
@@ -581,7 +581,7 @@ def schema(self, dataset_id, table_id):
 
         try:
             from googleapiclient.errors import HttpError
-        except:
+        except ImportError:
             from apiclient.errors import HttpError
 
         try:
@@ -690,7 +690,7 @@ def sizeof_fmt(num, suffix='B'):
     return fmt % (num, 'Y', suffix)
 
 
-def run_query(query, client, dialect='legacy', query_parameters=(), 
+def run_query(query, client, dialect='legacy', query_parameters=(),
               configuration=None, verbose=True, async=True):
     def _wait_for_job(job):
         while True:
@@ -743,7 +743,7 @@ def async_query():
         query_job.begin()
         try:
             query_results = query_job.results().fetch_data()
-        except:
+        except AttributeError:
             query_results = query_job.result().fetch_data()
         if verbose:
             print("Query running...")
@@ -767,11 +767,11 @@ def async_query():
         return query_results, query_job
 
     def get_columns_schema(query_results):
-        schema = [{"name":f.name,
-                   "field_type":f.field_type,
-                   "mode":f.mode,
-                   "fields":f.fields,
-                   "description":f.description} for f in query_results.schema]
+        schema = [{"name": f.name,
+                   "field_type": f.field_type,
+                   "mode": f.mode,
+                   "fields": f.fields,
+                   "description": f.description} for f in query_results.schema]
         columns = [field["name"] for field in schema]
         return columns, schema
 
@@ -943,12 +943,13 @@ def _create_df(rows, columns, schema, index_col, col_order):
     if configuration and any(key in configuration for key in
                              ["query", "copy", "load", "extract"]):
         raise ValueError("The Google Cloud BigQuery API handles configuration "
-            "settings differently. There are now a discrete set of query "
-            "settings one can set by passing in a dictionary, e.g.: "
-            "`configuration={'maximum_billing_tier':2}`. See "
-            "http://google-cloud-python.readthedocs.io/en/latest/_modules/"
-            "google/cloud/bigquery/job.html?highlight=QueryJobConfig "
-            "for allowable paramaters.")
+                         "settings differently. There are now a discrete set "
+                         "of query settings one can set by passing in a "
+                         "dictionary, e.g.: `configuration="
+                         "{'maximum_billing_tier':2}`. See http://google-cloud"
+                         "-python.readthedocs.io/en/latest/_modules/google/"
+                         "cloud/bigquery/job.html?highlight=QueryJobConfig "
+                         "for allowable paramaters.")
 
     if credentials is None:
         credentials = GbqConnector(project_id=project_id,
@@ -956,11 +957,11 @@ def _create_df(rows, columns, schema, index_col, col_order):
                                    auth_local_webserver=auth_local_webserver,
                                    private_key=private_key).credentials
     client = bigquery.Client(project=project_id, credentials=credentials)
-    
+
     if timeout_ms:
         configuration['timeout_ms'] = timeout_ms
     if (configuration and "timeout_ms" in configuration):
-        rows, columns, schema = run_query(query, client, dialect, 
+        rows, columns, schema = run_query(query, client, dialect,
                                           query_parameters, configuration,
                                           verbose,
                                           async=False)
@@ -1121,7 +1122,7 @@ def __init__(self, project_id, dataset_id, reauth=False, verbose=False,
                  private_key=None):
         try:
             from googleapiclient.errors import HttpError
-        except:
+        except ImportError:
             from apiclient.errors import HttpError
         self.http_error = HttpError
         self.dataset_id = dataset_id
@@ -1220,7 +1221,7 @@ def __init__(self, project_id, reauth=False, verbose=False,
                  private_key=None):
         try:
             from googleapiclient.errors import HttpError
-        except:
+        except ImportError:
             from apiclient.errors import HttpError
         self.http_error = HttpError
         super(_Dataset, self).__init__(project_id, reauth, verbose,
diff --git a/pandas_gbq/tests/test_gbq.py b/pandas_gbq/tests/test_gbq.py
index 9402f614..4af30608 100644
--- a/pandas_gbq/tests/test_gbq.py
+++ b/pandas_gbq/tests/test_gbq.py
@@ -203,7 +203,6 @@ def test_should_be_able_to_get_a_bigquery_service(self):
         assert bigquery_service is not None
 
     def test_should_be_able_to_get_schema_from_query(self):
-        credentials = self.sut.credentials
         schema = gbq.run_query('SELECT 1', client=self.client)
         assert schema is not None
 
@@ -257,7 +256,6 @@ def setup_method(self, method):
 
         self.sut = gbq.GbqConnector(_get_project_id(),
                                     private_key=_get_private_key_path())
-        credentials = self.sut.get_credentials()
         self.client = bigquery.Client(project=_get_project_id(),
                                       credentials=self.sut.credentials)
 
@@ -273,14 +271,12 @@ def test_should_be_able_to_get_a_bigquery_service(self):
         assert bigquery_service is not None
 
     def test_should_be_able_to_get_schema_from_query(self):
-        credentials = self.sut.credentials
         schema = gbq.run_query('SELECT 1', client=self.client)
         assert schema is not None
 
     def test_should_be_able_to_get_results_from_query(self):
-        credentials = self.sut.credentials
         results = gbq.read_gbq('SELECT 1', project_id=_get_project_id(),
-                               credentials=credentials)
+                               credentials=self.sut.credentials)
         assert results is not None
 
 

From 446819729e551c5b030e80bbe5e131d00134ebfd Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Fri, 27 Oct 2017 17:47:35 -0400
Subject: [PATCH 27/42] Update schema test

---
 pandas_gbq/tests/test_gbq.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/pandas_gbq/tests/test_gbq.py b/pandas_gbq/tests/test_gbq.py
index 4af30608..f94d7b3d 100644
--- a/pandas_gbq/tests/test_gbq.py
+++ b/pandas_gbq/tests/test_gbq.py
@@ -203,7 +203,8 @@ def test_should_be_able_to_get_a_bigquery_service(self):
         assert bigquery_service is not None
 
     def test_should_be_able_to_get_schema_from_query(self):
-        schema = gbq.run_query('SELECT 1', client=self.client)
+        result = gbq.run_query('SELECT 1', client=self.client)
+        rows, columns, schema = result
         assert schema is not None
 
     def test_should_be_able_to_get_results_from_query(self):
@@ -271,7 +272,8 @@ def test_should_be_able_to_get_a_bigquery_service(self):
         assert bigquery_service is not None
 
     def test_should_be_able_to_get_schema_from_query(self):
-        schema = gbq.run_query('SELECT 1', client=self.client)
+        result = gbq.run_query('SELECT 1', client=self.client)
+        rows, columns, schema = result
         assert schema is not None
 
     def test_should_be_able_to_get_results_from_query(self):
@@ -290,6 +292,8 @@ def setup_method(self, method):
 
         self.sut = gbq.GbqConnector(_get_project_id(),
                                     private_key=_get_private_key_contents())
+        self.client = bigquery.Client(project=_get_project_id(),
+                                      credentials=self.sut.credentials)
 
     def test_should_be_able_to_make_a_connector(self):
         assert self.sut is not None
@@ -303,18 +307,14 @@ def test_should_be_able_to_get_a_bigquery_service(self):
         assert bigquery_service is not None
 
     def test_should_be_able_to_get_schema_from_query(self):
-        credentials = self.sut.credentials
-        schema = gbq.read_gbq('SELECT 1',
-                              project_id=_get_project_id(),
-                              credentials=credentials,
-                              get_schema=True)
+        result = gbq.run_query('SELECT 1', client=self.client)
+        rows, columns, schema = result
         assert schema is not None
 
     def test_should_be_able_to_get_results_from_query(self):
-        credentials = self.sut.credentials
         results = gbq.read_gbq('SELECT 1',
                                project_id=_get_project_id(),
-                               credentials=credentials)
+                               credentials=self.sut.credentials)
         assert results is not None
 
 

From 6a6fc2dd1b136468b53647bdbd7bc829c14b564d Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Sat, 28 Oct 2017 01:12:03 -0400
Subject: [PATCH 28/42] Fix read_gbq docstring for Sphinx

Whitespace
---
 pandas_gbq/gbq.py | 56 ++++++++++++++++++++++++++++-------------------
 1 file changed, 34 insertions(+), 22 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 6ffe1789..073a58f1 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -807,17 +807,14 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
 
     The Google Cloud library is used.
     Documentation is available `here
-    <https://googlecloudplatform.github.io/google-cloud-python/stable/>`
+    <https://googlecloudplatform.github.io/google-cloud-python/stable/>`__
 
-    Authentication via Google Cloud can be performed a number of ways, see:
-    <https://googlecloudplatform.github.io/google-cloud-python/stable/google-
-    cloud-auth.html>
+    Authentication via Google Cloud can be performed a number of ways.
 
     One method is to generate user credentials via
-    `gcloud auth application-default login` <https://cloud.google.com/sdk/
-    gcloud/reference/auth/application-default/login> and point to it using an
+    ``gcloud auth application-default login`` and point to it using an
     environment variable:
-    `$ export GOOGLE_APPLICATION_CREDENTIALS="/path/to/keyfile.json"`
+    ``$ export GOOGLE_APPLICATION_CREDENTIALS="/path/to/keyfile.json"``
 
     You can also download a service account private key JSON file and pass the
     path to the file to the private_key paramater.
@@ -854,45 +851,60 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
         be created in current dir. You can also set PANDAS_GBQ_CREDENTIALS_FILE
         environment variable so as to define a specific path to store this
         credential (eg. /etc/keys/bigquery.dat).
+
         .. [local webserver flow]
             http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server
         .. [console flow]
             http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console
         .. versionadded:: 0.2.0
+
     dialect : {'legacy', 'standard'}, default 'legacy'
         'legacy' : Use BigQuery's legacy SQL dialect.
         'standard' : Use BigQuery's standard SQL (beta), which is
         compliant with the SQL 2011 standard. For more information
         see `BigQuery SQL Reference
-        <https://cloud.google.com/bigquery/sql-reference/>`
+        <https://cloud.google.com/bigquery/sql-reference/>`__
     credentials: credentials object, default None (optional)
         If generating credentials on your own, pass in. Otherwise, will attempt
         to generate automatically
+
+        .. versionadded:: 0.3.0
+
     query_parameters: tuple (optional) Can only be used in Standard SQL
-        example: gbq.read_gbq("SELECT @param1 + @param2",
-                          query_parameters = (bigquery.ScalarQueryParameter(
-                                                      'param1', 'INT64', 1),
-                                              bigquery.ScalarQueryParameter(
-                                                      'param2', 'INT64', 2)))
-        <https://cloud.google.com/bigquery/docs/parameterized-queries>
+        example. `More info
+        <https://cloud.google.com/bigquery/docs/parameterized-queries>`__::
+
+            gbq.read_gbq("SELECT @param1 + @param2",
+                         query_parameters = (bigquery.ScalarQueryParameter(
+                                             'param1', 'INT64', 1),
+                                             bigquery.ScalarQueryParameter(
+                                             'param2', 'INT64', 2)))
+
+        .. versionadded:: 0.3.0
+
     configuration : dict (optional)
-        Because of current limitations <https://github.com/GoogleCloudPlatform/
-        google-cloud-python/issues/2765> only some configuration settings are
-        currently implemented. You can pass them along like in the following:
+        Due to the [current implementation in Google Cloud Python] only some
+        configuration settings are able to be set. You can pass them along like
+        in the following:
         `read_gbq(q,configuration={'allow_large_results':True,
                                    'maximum_billing_tier':2})`
-        Example allowable settings:
+        [Example allowable settings]:
             allow_large_results, create_disposition, default_dataset,
             destination, flatten_results, priority, use_query_cache,
             use_legacy_sql, dry_run, write_disposition, udf_resources,
             maximum_billing_tier, maximum_bytes_billed
-            <http://google-cloud-python.readthedocs.io/en/latest/_modules/
-            google/cloud/bigquery/job.html?highlight=QueryJobConfig>
+
+        .. [current implementation in Google Cloud Python]
+            https://github.com/GoogleCloudPlatform/google-cloud-python/issues/2765
+        .. [Example allowable settings]
+            http://google-cloud-python.readthedocs.io/en/latest/_modules/google/cloud/bigquery/job.html?highlight=QueryJobConfig
+        .. versionadded:: 0.3.0
+
     timeout_ms: int (optional) If set or found in config, triggers a sync query
         that times out with no results if it can't be completed in the time
         desired
-        <http://google-cloud-python.readthedocs.io/en/latest/bigquery/
-        query.html#google.cloud.bigquery.query.QueryResults.fetch_data>
+
+        .. versionadded:: 0.3.0
 
     Returns
     -------

From 9ab0de39d5e3c11ff3f864469f6aedc70818e4cd Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Mon, 30 Oct 2017 18:24:54 -0400
Subject: [PATCH 29/42] Remove timeout_ms paramater from read_gbq, it should be
 set in configuration

---
 pandas_gbq/gbq.py | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 073a58f1..68f4095e 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -798,8 +798,7 @@ def get_columns_schema(query_results):
 def read_gbq(query, project_id=None, index_col=None, col_order=None,
              reauth=False, verbose=True, private_key=None,
              auth_local_webserver=False, dialect='legacy', credentials=None,
-             query_parameters=(), configuration=None,
-             timeout_ms=None, **kwargs):
+             query_parameters=(), configuration=None, **kwargs):
     r"""Load data from Google BigQuery using google-cloud-python
 
     The main method a user calls to execute a Query in Google BigQuery
@@ -900,12 +899,6 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
             http://google-cloud-python.readthedocs.io/en/latest/_modules/google/cloud/bigquery/job.html?highlight=QueryJobConfig
         .. versionadded:: 0.3.0
 
-    timeout_ms: int (optional) If set or found in config, triggers a sync query
-        that times out with no results if it can't be completed in the time
-        desired
-
-        .. versionadded:: 0.3.0
-
     Returns
     -------
     df: DataFrame
@@ -970,8 +963,6 @@ def _create_df(rows, columns, schema, index_col, col_order):
                                    private_key=private_key).credentials
     client = bigquery.Client(project=project_id, credentials=credentials)
 
-    if timeout_ms:
-        configuration['timeout_ms'] = timeout_ms
     if (configuration and "timeout_ms" in configuration):
         rows, columns, schema = run_query(query, client, dialect,
                                           query_parameters, configuration,

From e203ba4665c323c66982757d0d582cbc1c8b199d Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Mon, 30 Oct 2017 18:57:34 -0400
Subject: [PATCH 30/42] Add more documentation

---
 pandas_gbq/gbq.py | 32 ++++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 68f4095e..8c3c6b1c 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -692,6 +692,29 @@ def sizeof_fmt(num, suffix='B'):
 
 def run_query(query, client, dialect='legacy', query_parameters=(),
               configuration=None, verbose=True, async=True):
+    """Execute a query job
+
+    Parameters
+    ----------
+    query, dialect, query_paramaters, configuration, verbose : see read_gbq()
+    client : bigQuery Client object
+        Client with the specified project_id and credentials used to run the
+        query
+    async: bool
+        Whether a synchronous or asynchronous query should be run. To be
+        deprecated in future versions; synchronous queries are used as a
+        workaround to implement timeouts, and will be removed in a
+        future update once Google Cloud Python resolves the issue.
+
+    Returns
+    -------
+    Tuple
+        rows : list of lists
+        columns: list of strings
+        schema: dictionary
+            Has the following keys: name, field_type, mode, fields, description
+    """
+
     def _wait_for_job(job):
         while True:
             job.reload()  # Refreshes the state via a GET request.
@@ -775,6 +798,7 @@ def get_columns_schema(query_results):
         columns = [field["name"] for field in schema]
         return columns, schema
 
+    # sync_query code to be removed in future
     if async:
         query_results, query_job = async_query()
         rows = list(query_results)
@@ -870,8 +894,9 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
         .. versionadded:: 0.3.0
 
     query_parameters: tuple (optional) Can only be used in Standard SQL
-        example. `More info
-        <https://cloud.google.com/bigquery/docs/parameterized-queries>`__::
+        `More info
+        <https://cloud.google.com/bigquery/docs/parameterized-queries>`__
+        Example::
 
             gbq.read_gbq("SELECT @param1 + @param2",
                          query_parameters = (bigquery.ScalarQueryParameter(
@@ -963,6 +988,9 @@ def _create_df(rows, columns, schema, index_col, col_order):
                                    private_key=private_key).credentials
     client = bigquery.Client(project=project_id, credentials=credentials)
 
+    # Temporary workaround in order to perform timeouts on queries.
+    # Once Google Cloud Python resolves, differentiation between sync and async
+    # code will be removed.
     if (configuration and "timeout_ms" in configuration):
         rows, columns, schema = run_query(query, client, dialect,
                                           query_parameters, configuration,

From 92f39434daab0da8276a64d0293a29d67e4a2ce6 Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Wed, 15 Nov 2017 14:07:57 -0500
Subject: [PATCH 31/42] Moved read_query to GbqConnector, update credentials
 and client generation to GbqConnector, and remove wait_for_job

---
 pandas_gbq/gbq.py            | 262 ++++++++++++++++-------------------
 pandas_gbq/tests/test_gbq.py |  22 +--
 2 files changed, 129 insertions(+), 155 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 8c3c6b1c..ed339d75 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -10,7 +10,6 @@
 from distutils.version import StrictVersion
 from pandas import compat, DataFrame, to_datetime, to_numeric
 from pandas.compat import bytes_to_str
-from google.cloud import bigquery
 
 
 def _check_google_client_version():
@@ -201,6 +200,7 @@ class GbqConnector(object):
     def __init__(self, project_id, reauth=False, verbose=False,
                  private_key=None, auth_local_webserver=False,
                  dialect='legacy'):
+        from google.cloud import bigquery
         self.project_id = project_id
         self.reauth = reauth
         self.verbose = verbose
@@ -210,6 +210,8 @@ def __init__(self, project_id, reauth=False, verbose=False,
         self.credentials_path = _get_credentials_file()
         self.credentials = self.get_credentials()
         self.service = self.get_service()
+        self.client = bigquery.Client(project=project_id, 
+                                      credentials=self.credentials)
 
         # BQ Queries costs $5 per TB. First 1 TB per month is free
         # see here for more: https://cloud.google.com/bigquery/pricing
@@ -504,6 +506,119 @@ def process_insert_errors(self, insert_errors):
 
         raise StreamingInsertError
 
+    def run_query(self, query, dialect='legacy', query_parameters=(),
+                  configuration=None, verbose=True, async=True):
+        """Execute a query job
+
+        Parameters
+        ----------
+        query, dialect, query_paramaters, configuration, verbose : see read_gbq()
+        async: bool
+            Whether a synchronous or asynchronous query should be run. To be
+            deprecated in future versions; synchronous queries are used as a
+            workaround to implement timeouts, and will be removed in a
+            future update once Google Cloud Python resolves the issue.
+
+        Returns
+        -------
+        Tuple
+            rows : list of lists
+            columns: list of strings
+            schema: dictionary
+                Has the following keys: name, field_type, mode, fields, description
+        """
+
+        def _set_common_query_settings(query_job):
+            if dialect == 'legacy':
+                query_job.use_legacy_sql = True
+            elif dialect == 'standard':
+                query_job.use_legacy_sql = False
+
+            if configuration:
+                for setting, value in configuration.items():
+                    setattr(query_job, setting, value)
+            return query_job
+
+        def sync_query():
+            query_job = self.client.run_sync_query(query,
+                                              query_parameters=query_parameters)
+            query_job = _set_common_query_settings(query_job)
+            if verbose:
+                print("Query running...")
+            query_job.run()
+            if not query_job._properties.get("jobComplete", False):
+                raise QueryTimeout("Sync query timed out")
+            if verbose:
+                print("Query done.")
+                if query_job._properties.get("cacheHit", False):
+                    print("Cache hit.")
+                else:
+                    bytes_billed = int(query_job._properties
+                                       .get("totalBytesProcessed", 0))
+                    bytes_processed = int(query_job._properties
+                                          .get("totalBytesBilled", 0))
+                    print("Total bytes billed (processed): %s (%s)" %
+                          (sizeof_fmt(bytes_billed), sizeof_fmt(bytes_processed)))
+                print("\nRetrieving results...")
+            return query_job, None
+
+        def async_query():
+            query_job = self.client.run_async_query(str(uuid.uuid4()),
+                                               query,
+                                               query_parameters=query_parameters)
+            query_job = _set_common_query_settings(query_job)
+            query_job.begin()
+            try:
+                query_results = query_job.results().fetch_data()
+            except AttributeError:
+                query_results = query_job.result().fetch_data()
+            if verbose:
+                print("Query done.")
+                if query_job._properties["statistics"]["query"].get("cacheHit",
+                                                                    False):
+                    print("Cache hit.")
+                elif ("statistics" in query_job._properties and
+                        "query" in query_job._properties["statistics"]):
+                    bytes_billed = int(query_job
+                                       ._properties["statistics"]["query"]
+                                       .get("totalBytesProcessed", 0))
+                    bytes_processed = int(query_job
+                                          ._properties["statistics"]["query"]
+                                          .get("totalBytesBilled", 0))
+                    print("Total bytes billed (processed): %s (%s)" %
+                          (sizeof_fmt(bytes_billed), sizeof_fmt(bytes_processed)))
+                print("\nRetrieving results...")
+            return query_results, query_job
+
+        def get_columns_schema(query_results):
+            schema = [{"name": f.name,
+                       "field_type": f.field_type,
+                       "mode": f.mode,
+                       "fields": f.fields,
+                       "description": f.description} for f in query_results.schema]
+            columns = [field["name"] for field in schema]
+            return columns, schema
+
+        # sync_query code to be removed in future
+        if async:
+            query_results, query_job = async_query()
+            rows = list(query_results)
+        else:
+            query_results, query_job = sync_query()
+            rows = list(query_results.rows)
+
+        columns, schema = get_columns_schema(query_results)
+
+        if verbose:
+            print("Got %s rows.") % len(rows)
+            if query_job:
+                print("\nTotal time taken %ss" % (datetime.utcnow() -
+                      query_job.created.replace(tzinfo=None)).seconds)
+                print("Finished at %s." % datetime.now()
+                      .strftime('%Y-%m-%d %H:%M:%S'))
+
+        return rows, columns, schema
+
     def load_data(self, dataframe, dataset_id, table_id, chunksize):
         try:
             from googleapiclient.errors import HttpError
@@ -690,138 +805,9 @@ def sizeof_fmt(num, suffix='B'):
     return fmt % (num, 'Y', suffix)
 
 
-def run_query(query, client, dialect='legacy', query_parameters=(),
-              configuration=None, verbose=True, async=True):
-    """Execute a query job
-
-    Parameters
-    ----------
-    query, dialect, query_paramaters, configuration, verbose : see read_gbq()
-    client : bigQuery Client object
-        Client with the specified project_id and credentials used to run the
-        query
-    async: bool
-        Whether a synchronous or asynchronous query should be run. To be
-        deprecated in future versions; synchronous queries are used as a
-        workaround to implement timeouts, and will be removed in a
-        future update once Google Cloud Python resolves the issue.
-
-    Returns
-    -------
-    Tuple
-        rows : list of lists
-        columns: list of strings
-        schema: dictionary
-            Has the following keys: name, field_type, mode, fields, description
-    """
-
-    def _wait_for_job(job):
-        while True:
-            job.reload()  # Refreshes the state via a GET request.
-            if job.state == 'DONE':
-                if job.error_result:
-                    raise RuntimeError(job.errors)
-                return
-            time.sleep(1)
-
-    def _set_common_query_settings(query_job):
-        if dialect == 'legacy':
-            query_job.use_legacy_sql = True
-        elif dialect == 'standard':
-            query_job.use_legacy_sql = False
-
-        if configuration:
-            for setting, value in configuration.items():
-                setattr(query_job, setting, value)
-        return query_job
-
-    def sync_query():
-        query_job = client.run_sync_query(query,
-                                          query_parameters=query_parameters)
-        query_job = _set_common_query_settings(query_job)
-        if verbose:
-            print("Query running...")
-        query_job.run()
-        if not query_job._properties.get("jobComplete", False):
-            raise QueryTimeout("Sync query timed out")
-        if verbose:
-            print("Query done.")
-            if query_job._properties.get("cacheHit", False):
-                print("Cache hit.")
-            else:
-                bytes_billed = int(query_job._properties
-                                   .get("totalBytesProcessed", 0))
-                bytes_processed = int(query_job._properties
-                                      .get("totalBytesBilled", 0))
-                print("Total bytes billed (processed): %s (%s)" %
-                      (sizeof_fmt(bytes_billed), sizeof_fmt(bytes_processed)))
-            print("\nRetrieving results...")
-        return query_job, None
-
-    def async_query():
-        query_job = client.run_async_query(str(uuid.uuid4()),
-                                           query,
-                                           query_parameters=query_parameters)
-        query_job = _set_common_query_settings(query_job)
-        query_job.begin()
-        try:
-            query_results = query_job.results().fetch_data()
-        except AttributeError:
-            query_results = query_job.result().fetch_data()
-        if verbose:
-            print("Query running...")
-        _wait_for_job(query_job)
-        if verbose:
-            print("Query done.")
-            if query_job._properties["statistics"]["query"].get("cacheHit",
-                                                                False):
-                print("Cache hit.")
-            elif ("statistics" in query_job._properties and
-                    "query" in query_job._properties["statistics"]):
-                bytes_billed = int(query_job
-                                   ._properties["statistics"]["query"]
-                                   .get("totalBytesProcessed", 0))
-                bytes_processed = int(query_job
-                                      ._properties["statistics"]["query"]
-                                      .get("totalBytesBilled", 0))
-                print("Total bytes billed (processed): %s (%s)" %
-                      (sizeof_fmt(bytes_billed), sizeof_fmt(bytes_processed)))
-            print("\nRetrieving results...")
-        return query_results, query_job
-
-    def get_columns_schema(query_results):
-        schema = [{"name": f.name,
-                   "field_type": f.field_type,
-                   "mode": f.mode,
-                   "fields": f.fields,
-                   "description": f.description} for f in query_results.schema]
-        columns = [field["name"] for field in schema]
-        return columns, schema
-
-    # sync_query code to be removed in future
-    if async:
-        query_results, query_job = async_query()
-        rows = list(query_results)
-    else:
-        query_results, query_job = sync_query()
-        rows = list(query_results.rows)
-
-    columns, schema = get_columns_schema(query_results)
-
-    if verbose:
-        print("Got %s rows.") % len(rows)
-        if query_job:
-            print("\nTotal time taken %ss" % (datetime.utcnow() -
-                  query_job.created.replace(tzinfo=None)).seconds)
-            print("Finished at %s." % datetime.now()
-                  .strftime('%Y-%m-%d %H:%M:%S'))
-
-    return rows, columns, schema
-
-
 def read_gbq(query, project_id=None, index_col=None, col_order=None,
              reauth=False, verbose=True, private_key=None,
-             auth_local_webserver=False, dialect='legacy', credentials=None,
+             auth_local_webserver=False, dialect='legacy',
              query_parameters=(), configuration=None, **kwargs):
     r"""Load data from Google BigQuery using google-cloud-python
 
@@ -981,23 +967,21 @@ def _create_df(rows, columns, schema, index_col, col_order):
                          "cloud/bigquery/job.html?highlight=QueryJobConfig "
                          "for allowable paramaters.")
 
-    if credentials is None:
-        credentials = GbqConnector(project_id=project_id,
-                                   reauth=reauth,
-                                   auth_local_webserver=auth_local_webserver,
-                                   private_key=private_key).credentials
-    client = bigquery.Client(project=project_id, credentials=credentials)
+    connector = GbqConnector(project_id=project_id,
+                             reauth=reauth,
+                             auth_local_webserver=auth_local_webserver,
+                             private_key=private_key)
 
     # Temporary workaround in order to perform timeouts on queries.
     # Once Google Cloud Python resolves, differentiation between sync and async
     # code will be removed.
     if (configuration and "timeout_ms" in configuration):
-        rows, columns, schema = run_query(query, client, dialect,
+        rows, columns, schema = connector.run_query(query, dialect,
                                           query_parameters, configuration,
                                           verbose,
                                           async=False)
     else:
-        rows, columns, schema = run_query(query, client, dialect,
+        rows, columns, schema = connector.run_query(query, dialect,
                                           query_parameters, configuration,
                                           verbose)
 
diff --git a/pandas_gbq/tests/test_gbq.py b/pandas_gbq/tests/test_gbq.py
index f94d7b3d..e265313f 100644
--- a/pandas_gbq/tests/test_gbq.py
+++ b/pandas_gbq/tests/test_gbq.py
@@ -188,8 +188,6 @@ def setup_method(self, method):
 
         self.sut = gbq.GbqConnector(
             _get_project_id(), auth_local_webserver=True)
-        self.client = bigquery.Client(project=_get_project_id(),
-                                      credentials=self.sut.credentials)
 
     def test_should_be_able_to_make_a_connector(self):
         assert self.sut is not None, 'Could not create a GbqConnector'
@@ -203,15 +201,13 @@ def test_should_be_able_to_get_a_bigquery_service(self):
         assert bigquery_service is not None
 
     def test_should_be_able_to_get_schema_from_query(self):
-        result = gbq.run_query('SELECT 1', client=self.client)
+        result = self.sut.run_query('SELECT 1')
         rows, columns, schema = result
         assert schema is not None
 
     def test_should_be_able_to_get_results_from_query(self):
-        credentials = self.sut.credentials
         results = gbq.read_gbq('SELECT 1',
-                               project_id=_get_project_id(),
-                               credentials=credentials)
+                               project_id=_get_project_id())
         assert results is not None
 
     def test_get_application_default_credentials_does_not_throw_error(self):
@@ -257,8 +253,6 @@ def setup_method(self, method):
 
         self.sut = gbq.GbqConnector(_get_project_id(),
                                     private_key=_get_private_key_path())
-        self.client = bigquery.Client(project=_get_project_id(),
-                                      credentials=self.sut.credentials)
 
     def test_should_be_able_to_make_a_connector(self):
         assert self.sut is not None
@@ -272,13 +266,12 @@ def test_should_be_able_to_get_a_bigquery_service(self):
         assert bigquery_service is not None
 
     def test_should_be_able_to_get_schema_from_query(self):
-        result = gbq.run_query('SELECT 1', client=self.client)
+        result = self.sut.run_query('SELECT 1')
         rows, columns, schema = result
         assert schema is not None
 
     def test_should_be_able_to_get_results_from_query(self):
-        results = gbq.read_gbq('SELECT 1', project_id=_get_project_id(),
-                               credentials=self.sut.credentials)
+        results = gbq.read_gbq('SELECT 1', project_id=_get_project_id())
         assert results is not None
 
 
@@ -292,8 +285,6 @@ def setup_method(self, method):
 
         self.sut = gbq.GbqConnector(_get_project_id(),
                                     private_key=_get_private_key_contents())
-        self.client = bigquery.Client(project=_get_project_id(),
-                                      credentials=self.sut.credentials)
 
     def test_should_be_able_to_make_a_connector(self):
         assert self.sut is not None
@@ -307,14 +298,13 @@ def test_should_be_able_to_get_a_bigquery_service(self):
         assert bigquery_service is not None
 
     def test_should_be_able_to_get_schema_from_query(self):
-        result = gbq.run_query('SELECT 1', client=self.client)
+        result = self.sut.run_query('SELECT 1')
         rows, columns, schema = result
         assert schema is not None
 
     def test_should_be_able_to_get_results_from_query(self):
         results = gbq.read_gbq('SELECT 1',
-                               project_id=_get_project_id(),
-                               credentials=self.sut.credentials)
+                               project_id=_get_project_id())
         assert results is not None
 
 

From 3cc153a9786a0161cd758efef89ca8ce07289cb4 Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Wed, 15 Nov 2017 14:16:41 -0500
Subject: [PATCH 32/42] Move sizeof_fmt back into GbqConnector

Linting

More linting
---
 pandas_gbq/gbq.py | 61 +++++++++++++++++++++++++++--------------------
 1 file changed, 35 insertions(+), 26 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index ed339d75..6d6b670f 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -3,7 +3,6 @@
 import json
 from time import sleep
 import uuid
-import time
 import sys
 import os
 
@@ -210,7 +209,7 @@ def __init__(self, project_id, reauth=False, verbose=False,
         self.credentials_path = _get_credentials_file()
         self.credentials = self.get_credentials()
         self.service = self.get_service()
-        self.client = bigquery.Client(project=project_id, 
+        self.client = bigquery.Client(project=project_id,
                                       credentials=self.credentials)
 
         # BQ Queries costs $5 per TB. First 1 TB per month is free
@@ -454,6 +453,16 @@ def _print(self, msg, end='\n'):
             sys.stdout.write(msg + end)
             sys.stdout.flush()
 
+    # http://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
+    @staticmethod
+    def sizeof_fmt(num, suffix='B'):
+        fmt = "%3.1f %s%s"
+        for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
+            if abs(num) < 1024.0:
+                return fmt % (num, unit, suffix)
+            num /= 1024.0
+        return fmt % (num, 'Y', suffix)
+
     def get_service(self):
         import httplib2
         from google_auth_httplib2 import AuthorizedHttp
@@ -512,7 +521,7 @@ def run_query(self, query, dialect='legacy', query_parameters=(),
 
         Parameters
         ----------
-        query, dialect, query_paramaters, configuration, verbose : see read_gbq()
+        query, dialect, query_paramaters, configuration, verbose: see read_gbq
         async: bool
             Whether a synchronous or asynchronous query should be run. To be
             deprecated in future versions; synchronous queries are used as a
@@ -525,7 +534,7 @@ def run_query(self, query, dialect='legacy', query_parameters=(),
             rows : list of lists
             columns: list of strings
             schema: dictionary
-                Has the following keys: name, field_type, mode, fields, description
+                Has keys: name, field_type, mode, fields, description
         """
 
         def _set_common_query_settings(query_job):
@@ -541,7 +550,7 @@ def _set_common_query_settings(query_job):
 
         def sync_query():
             query_job = self.client.run_sync_query(query,
-                                              query_parameters=query_parameters)
+                                                   query_parameters=query_parameters)
             query_job = _set_common_query_settings(query_job)
             if verbose:
                 print("Query running...")
@@ -558,15 +567,18 @@ def sync_query():
                     bytes_processed = int(query_job._properties
                                           .get("totalBytesBilled", 0))
                     print("Total bytes billed (processed): %s (%s)" %
-                          (sizeof_fmt(bytes_billed), sizeof_fmt(bytes_processed)))
+                          (self.sizeof_fmt(bytes_billed),
+                           self.sizeof_fmt(bytes_processed)))
                 print("\nRetrieving results...")
             return query_job, None
 
         def async_query():
             query_job = self.client.run_async_query(str(uuid.uuid4()),
-                                               query,
-                                               query_parameters=query_parameters)
+                                                    query,
+                                                    query_parameters=query_parameters)
             query_job = _set_common_query_settings(query_job)
+            if verbose:
+                print("Query running...")
             query_job.begin()
             try:
                 query_results = query_job.results().fetch_data()
@@ -586,7 +598,8 @@ def async_query():
                                           ._properties["statistics"]["query"]
                                           .get("totalBytesBilled", 0))
                     print("Total bytes billed (processed): %s (%s)" %
-                          (sizeof_fmt(bytes_billed), sizeof_fmt(bytes_processed)))
+                          (self.sizeof_fmt(bytes_billed),
+                           self.sizeof_fmt(bytes_processed)))
                 print("\nRetrieving results...")
             return query_results, query_job
 
@@ -595,7 +608,8 @@ def get_columns_schema(query_results):
                        "field_type": f.field_type,
                        "mode": f.mode,
                        "fields": f.fields,
-                       "description": f.description} for f in query_results.schema]
+                       "description": f.description}
+                      for f in query_results.schema]
             columns = [field["name"] for field in schema]
             return columns, schema
 
@@ -796,15 +810,6 @@ def _get_credentials_file():
         'PANDAS_GBQ_CREDENTIALS_FILE')
 
 
-def sizeof_fmt(num, suffix='B'):
-    fmt = "%3.1f %s%s"
-    for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
-        if abs(num) < 1024.0:
-            return fmt % (num, unit, suffix)
-        num /= 1024.0
-    return fmt % (num, 'Y', suffix)
-
-
 def read_gbq(query, project_id=None, index_col=None, col_order=None,
              reauth=False, verbose=True, private_key=None,
              auth_local_webserver=False, dialect='legacy',
@@ -976,14 +981,18 @@ def _create_df(rows, columns, schema, index_col, col_order):
     # Once Google Cloud Python resolves, differentiation between sync and async
     # code will be removed.
     if (configuration and "timeout_ms" in configuration):
-        rows, columns, schema = connector.run_query(query, dialect,
-                                          query_parameters, configuration,
-                                          verbose,
-                                          async=False)
+        rows, columns, schema = connector.run_query(query,
+                                                    dialect,
+                                                    query_parameters,
+                                                    configuration,
+                                                    verbose,
+                                                    async=False)
     else:
-        rows, columns, schema = connector.run_query(query, dialect,
-                                          query_parameters, configuration,
-                                          verbose)
+        rows, columns, schema = connector.run_query(query,
+                                                    dialect,
+                                                    query_parameters,
+                                                    configuration,
+                                                    verbose)
 
     df = _create_df(rows, columns, schema, index_col, col_order)
 

From 91f900e44e297cf16f2ecde6aaafb688cd2270ea Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Wed, 15 Nov 2017 17:47:36 -0500
Subject: [PATCH 33/42] Fix sizeof_fmt test and move create_df to top-level
 function

Final linting
---
 pandas_gbq/gbq.py            | 75 ++++++++++++++++++------------------
 pandas_gbq/tests/test_gbq.py | 27 ++++++-------
 2 files changed, 52 insertions(+), 50 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 6d6b670f..321f91ed 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -549,8 +549,8 @@ def _set_common_query_settings(query_job):
             return query_job
 
         def sync_query():
-            query_job = self.client.run_sync_query(query,
-                                                   query_parameters=query_parameters)
+            query_job = self.client.run_sync_query(
+                query, query_parameters=query_parameters)
             query_job = _set_common_query_settings(query_job)
             if verbose:
                 print("Query running...")
@@ -573,9 +573,10 @@ def sync_query():
             return query_job, None
 
         def async_query():
-            query_job = self.client.run_async_query(str(uuid.uuid4()),
-                                                    query,
-                                                    query_parameters=query_parameters)
+            query_job = self.client.run_async_query(
+                str(uuid.uuid4()),
+                query,
+                query_parameters=query_parameters)
             query_job = _set_common_query_settings(query_job)
             if verbose:
                 print("Query running...")
@@ -810,6 +811,38 @@ def _get_credentials_file():
         'PANDAS_GBQ_CREDENTIALS_FILE')
 
 
+def _create_df(rows, columns, schema, index_col, col_order):
+    df = DataFrame(data=rows, columns=columns)
+
+    # Manual field type conversion. Inserted to handle tests
+    # with only null rows, otherwise type conversion works automatically
+    for field in schema:
+        if field["field_type"] == 'TIMESTAMP':
+            if df[field["name"]].isnull().values.all():
+                df[field["name"]] = to_datetime(df[field["name"]])
+        if field["field_type"] == 'FLOAT':
+            if df[field["name"]].isnull().values.all():
+                df[field["name"]] = to_numeric(df[field["name"]])
+
+    # Reindex the DataFrame on the provided column
+    if index_col:
+        if index_col in df.columns:
+            df.set_index(index_col, inplace=True)
+        else:
+            raise InvalidIndexColumn(
+                'Index column "{0}" does not exist in DataFrame.'
+                .format(index_col))
+
+    # Change the order of columns in the DataFrame based on provided list
+    if col_order:
+        if sorted(col_order) == sorted(df.columns):
+            df = df[col_order]
+        else:
+            raise InvalidColumnOrder(
+                'Column order does not match this DataFrame.')
+    return df
+
+
 def read_gbq(query, project_id=None, index_col=None, col_order=None,
              reauth=False, verbose=True, private_key=None,
              auth_local_webserver=False, dialect='legacy',
@@ -922,38 +955,6 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
 
     """
 
-    def _create_df(rows, columns, schema, index_col, col_order):
-        df = DataFrame(data=rows, columns=columns)
-
-        # Manual field type conversion. Inserted to handle tests
-        # with only null rows, otherwise type conversion works automatically
-        for field in schema:
-            if field["field_type"] == 'TIMESTAMP':
-                if df[field["name"]].isnull().values.all():
-                    df[field["name"]] = to_datetime(df[field["name"]])
-            if field["field_type"] == 'FLOAT':
-                if df[field["name"]].isnull().values.all():
-                    df[field["name"]] = to_numeric(df[field["name"]])
-
-        # Reindex the DataFrame on the provided column
-        if index_col:
-            if index_col in df.columns:
-                df.set_index(index_col, inplace=True)
-            else:
-                raise InvalidIndexColumn(
-                    'Index column "{0}" does not exist in DataFrame.'
-                    .format(index_col))
-
-        # Change the order of columns in the DataFrame based on provided list
-        if col_order:
-            if sorted(col_order) == sorted(df.columns):
-                df = df[col_order]
-            else:
-                raise InvalidColumnOrder(
-                    'Column order does not match this DataFrame.')
-
-        return df
-
     _test_google_api_imports()
 
     if not project_id:
diff --git a/pandas_gbq/tests/test_gbq.py b/pandas_gbq/tests/test_gbq.py
index e265313f..9f249ddd 100644
--- a/pandas_gbq/tests/test_gbq.py
+++ b/pandas_gbq/tests/test_gbq.py
@@ -855,19 +855,20 @@ def test_timeout_configuration(self):
                          configuration=config)
 
     def test_query_response_bytes(self):
-        assert gbq.sizeof_fmt(999) == "999.0 B"
-        assert gbq.sizeof_fmt(1024) == "1.0 KB"
-        assert gbq.sizeof_fmt(1099) == "1.1 KB"
-        assert gbq.sizeof_fmt(1044480) == "1020.0 KB"
-        assert gbq.sizeof_fmt(1048576) == "1.0 MB"
-        assert gbq.sizeof_fmt(1048576000) == "1000.0 MB"
-        assert gbq.sizeof_fmt(1073741824) == "1.0 GB"
-        assert gbq.sizeof_fmt(1.099512E12) == "1.0 TB"
-        assert gbq.sizeof_fmt(1.125900E15) == "1.0 PB"
-        assert gbq.sizeof_fmt(1.152922E18) == "1.0 EB"
-        assert gbq.sizeof_fmt(1.180592E21) == "1.0 ZB"
-        assert gbq.sizeof_fmt(1.208926E24) == "1.0 YB"
-        assert gbq.sizeof_fmt(1.208926E28) == "10000.0 YB"
+        connector = gbq.GbqConnector(project_id=_get_project_id())
+        assert connector.sizeof_fmt(999) == "999.0 B"
+        assert connector.sizeof_fmt(1024) == "1.0 KB"
+        assert connector.sizeof_fmt(1099) == "1.1 KB"
+        assert connector.sizeof_fmt(1044480) == "1020.0 KB"
+        assert connector.sizeof_fmt(1048576) == "1.0 MB"
+        assert connector.sizeof_fmt(1048576000) == "1000.0 MB"
+        assert connector.sizeof_fmt(1073741824) == "1.0 GB"
+        assert connector.sizeof_fmt(1.099512E12) == "1.0 TB"
+        assert connector.sizeof_fmt(1.125900E15) == "1.0 PB"
+        assert connector.sizeof_fmt(1.152922E18) == "1.0 EB"
+        assert connector.sizeof_fmt(1.180592E21) == "1.0 ZB"
+        assert connector.sizeof_fmt(1.208926E24) == "1.0 YB"
+        assert connector.sizeof_fmt(1.208926E28) == "10000.0 YB"
 
 
 class TestToGBQIntegrationWithServiceAccountKeyPath(object):

From 4d967e69bb5bac12bcc8c3e15e0bdbf629365421 Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Wed, 15 Nov 2017 18:09:19 -0500
Subject: [PATCH 34/42] Add import error message

---
 pandas_gbq/gbq.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 321f91ed..0aa00c89 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -74,6 +74,13 @@ def _test_google_api_imports():
             "pandas requires google-auth for Google BigQuery support: "
             "{0}".format(ex))
 
+    try:
+        from google.cloud import bigquery  # noqa
+    except ImportError as ex:
+        raise ImportError(
+            "pandas requires google-cloud-python for Google BigQuery support: "
+            "{0}".format(ex))
+
     _check_google_client_version()
 
 

From 476d72b3cdb9042f33db24d397b4116f08291c12 Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Thu, 16 Nov 2017 12:41:07 -0500
Subject: [PATCH 35/42] Revert test_query_response_bytes test to original

---
 pandas_gbq/tests/test_gbq.py | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/pandas_gbq/tests/test_gbq.py b/pandas_gbq/tests/test_gbq.py
index 9f249ddd..697eb2e1 100644
--- a/pandas_gbq/tests/test_gbq.py
+++ b/pandas_gbq/tests/test_gbq.py
@@ -855,20 +855,19 @@ def test_timeout_configuration(self):
                          configuration=config)
 
     def test_query_response_bytes(self):
-        connector = gbq.GbqConnector(project_id=_get_project_id())
-        assert connector.sizeof_fmt(999) == "999.0 B"
-        assert connector.sizeof_fmt(1024) == "1.0 KB"
-        assert connector.sizeof_fmt(1099) == "1.1 KB"
-        assert connector.sizeof_fmt(1044480) == "1020.0 KB"
-        assert connector.sizeof_fmt(1048576) == "1.0 MB"
-        assert connector.sizeof_fmt(1048576000) == "1000.0 MB"
-        assert connector.sizeof_fmt(1073741824) == "1.0 GB"
-        assert connector.sizeof_fmt(1.099512E12) == "1.0 TB"
-        assert connector.sizeof_fmt(1.125900E15) == "1.0 PB"
-        assert connector.sizeof_fmt(1.152922E18) == "1.0 EB"
-        assert connector.sizeof_fmt(1.180592E21) == "1.0 ZB"
-        assert connector.sizeof_fmt(1.208926E24) == "1.0 YB"
-        assert connector.sizeof_fmt(1.208926E28) == "10000.0 YB"
+        assert self.gbq_connector.sizeof_fmt(999) == "999.0 B"
+        assert self.gbq_connector.sizeof_fmt(1024) == "1.0 KB"
+        assert self.gbq_connector.sizeof_fmt(1099) == "1.1 KB"
+        assert self.gbq_connector.sizeof_fmt(1044480) == "1020.0 KB"
+        assert self.gbq_connector.sizeof_fmt(1048576) == "1.0 MB"
+        assert self.gbq_connector.sizeof_fmt(1048576000) == "1000.0 MB"
+        assert self.gbq_connector.sizeof_fmt(1073741824) == "1.0 GB"
+        assert self.gbq_connector.sizeof_fmt(1.099512E12) == "1.0 TB"
+        assert self.gbq_connector.sizeof_fmt(1.125900E15) == "1.0 PB"
+        assert self.gbq_connector.sizeof_fmt(1.152922E18) == "1.0 EB"
+        assert self.gbq_connector.sizeof_fmt(1.180592E21) == "1.0 ZB"
+        assert self.gbq_connector.sizeof_fmt(1.208926E24) == "1.0 YB"
+        assert self.gbq_connector.sizeof_fmt(1.208926E28) == "10000.0 YB"
 
 
 class TestToGBQIntegrationWithServiceAccountKeyPath(object):

From 6da0ef27444352e86f0dbed646338d148e57787b Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Wed, 22 Nov 2017 17:13:40 -0800
Subject: [PATCH 36/42] Convert rest of methods to use google-cloud-bigquery

- Removes references to google-api-client-library and httplib2.
- Updates PR to not make any surface-level changes to the API, only
  swaps out the dependencies.
- Updates PR to use latest version of google-cloud-bigquery.
---
 ci/requirements-2.7-0.19.2.pip   |   3 -
 ci/requirements-3.5-0.18.1.pip   |   4 +-
 ci/requirements-3.6-0.20.1.conda |   2 -
 ci/requirements-3.6-MASTER.pip   |   2 -
 pandas_gbq/gbq.py                | 777 ++++++++++++-------------------
 pandas_gbq/tests/test_gbq.py     | 189 +++++---
 requirements.txt                 |   5 +-
 setup.py                         |   5 +-
 8 files changed, 414 insertions(+), 573 deletions(-)

diff --git a/ci/requirements-2.7-0.19.2.pip b/ci/requirements-2.7-0.19.2.pip
index a31accd7..cd94478a 100644
--- a/ci/requirements-2.7-0.19.2.pip
+++ b/ci/requirements-2.7-0.19.2.pip
@@ -1,8 +1,5 @@
-google-api-python-client
 google-auth
-google-auth-httplib2
 google-auth-oauthlib
 PyCrypto
-python-gflags
 mock
 google-cloud-bigquery
diff --git a/ci/requirements-3.5-0.18.1.pip b/ci/requirements-3.5-0.18.1.pip
index 9c94dbbb..18369345 100644
--- a/ci/requirements-3.5-0.18.1.pip
+++ b/ci/requirements-3.5-0.18.1.pip
@@ -1,6 +1,4 @@
-google-api-python-client==1.6.0
 google-auth==1.0.0
-google-auth-httplib2==0.0.1
 google-auth-oauthlib==0.0.1
 mock
-google-cloud-bigquery==0.26.0
+google-cloud-bigquery==0.28.0
diff --git a/ci/requirements-3.6-0.20.1.conda b/ci/requirements-3.6-0.20.1.conda
index 3ee89b92..b52f2aeb 100644
--- a/ci/requirements-3.6-0.20.1.conda
+++ b/ci/requirements-3.6-0.20.1.conda
@@ -1,6 +1,4 @@
-google-api-python-client
 google-auth
-google-auth-httplib2
 google-auth-oauthlib
 mock
 google-cloud-bigquery
diff --git a/ci/requirements-3.6-MASTER.pip b/ci/requirements-3.6-MASTER.pip
index 3ee89b92..b52f2aeb 100644
--- a/ci/requirements-3.6-MASTER.pip
+++ b/ci/requirements-3.6-MASTER.pip
@@ -1,6 +1,4 @@
-google-api-python-client
 google-auth
-google-auth-httplib2
 google-auth-oauthlib
 mock
 google-cloud-bigquery
diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 0aa00c89..15130321 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -1,14 +1,16 @@
 import warnings
 from datetime import datetime
 import json
+import time
 from time import sleep
-import uuid
 import sys
 import os
 
+import numpy as np
+
 from distutils.version import StrictVersion
-from pandas import compat, DataFrame, to_datetime, to_numeric
-from pandas.compat import bytes_to_str
+from pandas import compat, DataFrame
+from pandas.compat import lzip
 
 
 def _check_google_client_version():
@@ -19,31 +21,24 @@ def _check_google_client_version():
     except ImportError:
         raise ImportError('Could not import pkg_resources (setuptools).')
 
-    # Version 1.6.0 is the first version to support google-auth.
-    # https://github.com/google/google-api-python-client/blob/master/CHANGELOG
-    google_api_minimum_version = '1.6.0'
+    # Version 0.28.0 includes many changes compared to previous versions
+    # https://github.com/GoogleCloudPlatform/google-cloud-python/blob/master/bigquery/CHANGELOG.md
+    bigquery_client_minimum_version = '0.28.0'
 
-    _GOOGLE_API_CLIENT_VERSION = pkg_resources.get_distribution(
-        'google-api-python-client').version
+    _BIGQUERY_CLIENT_VERSION = pkg_resources.get_distribution(
+        'google-cloud-bigquery').version
 
-    if (StrictVersion(_GOOGLE_API_CLIENT_VERSION) <
-            StrictVersion(google_api_minimum_version)):
-        raise ImportError('pandas requires google-api-python-client >= {0} '
+    if (StrictVersion(_BIGQUERY_CLIENT_VERSION) <
+            StrictVersion(bigquery_client_minimum_version)):
+        raise ImportError('pandas requires google-cloud-bigquery >= {0} '
                           'for Google BigQuery support, '
                           'current version {1}'
-                          .format(google_api_minimum_version,
-                                  _GOOGLE_API_CLIENT_VERSION))
+                          .format(bigquery_client_minimum_version,
+                                  _BIGQUERY_CLIENT_VERSION))
 
 
 def _test_google_api_imports():
 
-    try:
-        import httplib2  # noqa
-    except ImportError as ex:
-        raise ImportError(
-            'pandas requires httplib2 for Google BigQuery support: '
-            '{0}'.format(ex))
-
     try:
         from google_auth_oauthlib.flow import InstalledAppFlow  # noqa
     except ImportError as ex:
@@ -51,22 +46,6 @@ def _test_google_api_imports():
             'pandas requires google-auth-oauthlib for Google BigQuery '
             'support: {0}'.format(ex))
 
-    try:
-        from google_auth_httplib2 import AuthorizedHttp  # noqa
-        from google_auth_httplib2 import Request  # noqa
-    except ImportError as ex:
-        raise ImportError(
-            'pandas requires google-auth-httplib2 for Google BigQuery '
-            'support: {0}'.format(ex))
-
-    try:
-        from googleapiclient.discovery import build  # noqa
-        from googleapiclient.errors import HttpError  # noqa
-    except ImportError as ex:
-        raise ImportError(
-            "pandas requires google-api-python-client for Google BigQuery "
-            "support: {0}".format(ex))
-
     try:
         import google.auth  # noqa
     except ImportError as ex:
@@ -85,24 +64,18 @@ def _test_google_api_imports():
 
 
 def _try_credentials(project_id, credentials):
-    import httplib2
-    from googleapiclient.discovery import build
-    import googleapiclient.errors
-    from google_auth_httplib2 import AuthorizedHttp
+    from google.cloud import bigquery
+    import google.api_core.exceptions
 
     if credentials is None:
         return None
 
-    http = httplib2.Http()
     try:
-        authed_http = AuthorizedHttp(credentials, http=http)
-        bigquery_service = build('bigquery', 'v2', http=authed_http)
+        client = bigquery.Client(project=project_id, credentials=credentials)
         # Check if the application has rights to the BigQuery project
-        jobs = bigquery_service.jobs()
-        job_data = {'configuration': {'query': {'query': 'SELECT 1'}}}
-        jobs.insert(projectId=project_id, body=job_data).execute()
+        client.query('SELECT 1').result()
         return credentials
-    except googleapiclient.errors.Error:
+    except google.api_core.exceptions.GoogleAPIError:
         return None
 
 
@@ -185,14 +158,6 @@ class QueryTimeout(ValueError):
     pass
 
 
-class StreamingInsertError(ValueError):
-    """
-    Raised when BigQuery reports a streaming insert error.
-    For more information see `Streaming Data Into BigQuery
-    <https://cloud.google.com/bigquery/streaming-data-into-bigquery>`__
-    """
-
-
 class TableCreationError(ValueError):
     """
     Raised when the create table method fails
@@ -206,7 +171,9 @@ class GbqConnector(object):
     def __init__(self, project_id, reauth=False, verbose=False,
                  private_key=None, auth_local_webserver=False,
                  dialect='legacy'):
-        from google.cloud import bigquery
+        from google.api_core.exceptions import GoogleAPIError
+        from google.api_core.exceptions import ClientError
+        self.http_error = (ClientError, GoogleAPIError)
         self.project_id = project_id
         self.reauth = reauth
         self.verbose = verbose
@@ -215,9 +182,7 @@ def __init__(self, project_id, reauth=False, verbose=False,
         self.dialect = dialect
         self.credentials_path = _get_credentials_file()
         self.credentials = self.get_credentials()
-        self.service = self.get_service()
-        self.client = bigquery.Client(project=project_id,
-                                      credentials=self.credentials)
+        self.client = self.get_client()
 
         # BQ Queries costs $5 per TB. First 1 TB per month is free
         # see here for more: https://cloud.google.com/bigquery/pricing
@@ -283,8 +248,7 @@ def load_user_account_credentials(self):
             credentials do not have access to the project (self.project_id)
             on BigQuery.
         """
-        import httplib2
-        from google_auth_httplib2 import Request
+        import google.auth.transport.requests
         from google.oauth2.credentials import Credentials
 
         # Use the default credentials location under ~/.config and the
@@ -316,8 +280,7 @@ def load_user_account_credentials(self):
             scopes=credentials_json.get('scopes'))
 
         # Refresh the token before trying to use it.
-        http = httplib2.Http()
-        request = Request(http)
+        request = google.auth.transport.requests.Request()
         credentials.refresh(request)
 
         return _try_credentials(self.project_id, credentials)
@@ -418,8 +381,7 @@ def get_user_account_credentials(self):
         return credentials
 
     def get_service_account_credentials(self):
-        import httplib2
-        from google_auth_httplib2 import Request
+        import google.auth.transport.requests
         from google.oauth2.service_account import Credentials
         from os.path import isfile
 
@@ -442,8 +404,7 @@ def get_service_account_credentials(self):
             credentials = credentials.with_scopes([self.scope])
 
             # Refresh the token before trying to use it.
-            http = httplib2.Http()
-            request = Request(http)
+            request = google.auth.transport.requests.Request()
             credentials.refresh(request)
 
             return credentials
@@ -460,6 +421,18 @@ def _print(self, msg, end='\n'):
             sys.stdout.write(msg + end)
             sys.stdout.flush()
 
+    def _start_timer(self):
+        self.start = time.time()
+
+    def get_elapsed_seconds(self):
+        return round(time.time() - self.start, 2)
+
+    def print_elapsed_seconds(self, prefix='Elapsed', postfix='s.',
+                              overlong=7):
+        sec = self.get_elapsed_seconds()
+        if sec > overlong:
+            self._print('{} {} {}'.format(prefix, sec, postfix))
+
     # http://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
     @staticmethod
     def sizeof_fmt(num, suffix='B'):
@@ -470,184 +443,128 @@ def sizeof_fmt(num, suffix='B'):
             num /= 1024.0
         return fmt % (num, 'Y', suffix)
 
-    def get_service(self):
-        import httplib2
-        from google_auth_httplib2 import AuthorizedHttp
-        from googleapiclient.discovery import build
-
-        http = httplib2.Http()
-        authed_http = AuthorizedHttp(
-            self.credentials, http=http)
-        bigquery_service = build('bigquery', 'v2', http=authed_http)
-
-        return bigquery_service
+    def get_client(self):
+        from google.cloud import bigquery
+        return bigquery.Client(
+            project=self.project_id, credentials=self.credentials)
 
     @staticmethod
     def process_http_error(ex):
         # See `BigQuery Troubleshooting Errors
         # <https://cloud.google.com/bigquery/troubleshooting-errors>`__
 
-        status = json.loads(bytes_to_str(ex.content))['error']
-        errors = status.get('errors', None)
-
-        if errors:
-            for error in errors:
-                reason = error['reason']
-                message = error['message']
-
-                raise GenericGBQException(
-                    "Reason: {0}, Message: {1}".format(reason, message))
-
-        raise GenericGBQException(errors)
-
-    def process_insert_errors(self, insert_errors):
-        for insert_error in insert_errors:
-            row = insert_error['index']
-            errors = insert_error.get('errors', None)
-            for error in errors:
-                reason = error['reason']
-                message = error['message']
-                location = error['location']
-                error_message = ('Error at Row: {0}, Reason: {1}, '
-                                 'Location: {2}, Message: {3}'
-                                 .format(row, reason, location, message))
-
-                # Report all error messages if verbose is set
-                if self.verbose:
-                    self._print(error_message)
-                else:
-                    raise StreamingInsertError(error_message +
-                                               '\nEnable verbose logging to '
-                                               'see all errors')
+        raise GenericGBQException("Reason: {0}".format(ex))
 
-        raise StreamingInsertError
+    def run_query(self, query, **kwargs):
+        from google.auth.exceptions import RefreshError
+        from google.cloud.bigquery import QueryJobConfig
+        from concurrent.futures import TimeoutError
 
-    def run_query(self, query, dialect='legacy', query_parameters=(),
-                  configuration=None, verbose=True, async=True):
-        """Execute a query job
+        job_config = {
+            'query': {
+                'useLegacySql': self.dialect == 'legacy'
+                # 'allowLargeResults', 'createDisposition',
+                # 'preserveNulls', destinationTable, useQueryCache
+            }
+        }
+        config = kwargs.get('configuration')
+        if config is not None:
+            if len(config) != 1:
+                raise ValueError("Only one job type must be specified, but "
+                                 "given {}".format(','.join(config.keys())))
+            if 'query' in config:
+                if 'query' in config['query']:
+                    if query is not None:
+                        raise ValueError("Query statement can't be specified "
+                                        "inside config while it is specified "
+                                        "as parameter")
+                    query = config['query']['query']
+                    del config['query']['query']
+
+                job_config['query'].update(config['query'])
+            else:
+                raise ValueError("Only 'query' job type is supported")
 
-        Parameters
-        ----------
-        query, dialect, query_paramaters, configuration, verbose: see read_gbq
-        async: bool
-            Whether a synchronous or asynchronous query should be run. To be
-            deprecated in future versions; synchronous queries are used as a
-            workaround to implement timeouts, and will be removed in a
-            future update once Google Cloud Python resolves the issue.
+        self._start_timer()
+        try:
+            self._print('Requesting query... ', end="")
+            query_reply = self.client.query(
+                query,
+                job_config=QueryJobConfig.from_api_repr(job_config['query']))
+            self._print('ok.')
+        except (RefreshError, ValueError):
+            if self.private_key:
+                raise AccessDenied(
+                    "The service account credentials are not valid")
+            else:
+                raise AccessDenied(
+                    "The credentials have been revoked or expired, "
+                    "please re-run the application to re-authorize")
+        except self.http_error as ex:
+            self.process_http_error(ex)
 
-        Returns
-        -------
-        Tuple
-            rows : list of lists
-            columns: list of strings
-            schema: dictionary
-                Has keys: name, field_type, mode, fields, description
-        """
+        job_id = query_reply.job_id
+        self._print('Job ID: %s\nQuery running...' % job_id)
+
+        while query_reply.state != 'DONE':
+            self.print_elapsed_seconds('  Elapsed', 's. Waiting...')
+
+            timeout_ms = job_config['query'].get('timeoutMs')
+            if timeout_ms and timeout_ms < self.get_elapsed_seconds() * 1000:
+                raise QueryTimeout('Query timeout: {} ms'.format(timeout_ms))
+
+            timeout_sec = 1.0
+            if timeout_ms:
+                # Wait at most 1 second so we can show progress bar
+                timeout_sec = min(1.0, timeout_ms / 1000.0)
 
-        def _set_common_query_settings(query_job):
-            if dialect == 'legacy':
-                query_job.use_legacy_sql = True
-            elif dialect == 'standard':
-                query_job.use_legacy_sql = False
-
-            if configuration:
-                for setting, value in configuration.items():
-                    setattr(query_job, setting, value)
-            return query_job
-
-        def sync_query():
-            query_job = self.client.run_sync_query(
-                query, query_parameters=query_parameters)
-            query_job = _set_common_query_settings(query_job)
-            if verbose:
-                print("Query running...")
-            query_job.run()
-            if not query_job._properties.get("jobComplete", False):
-                raise QueryTimeout("Sync query timed out")
-            if verbose:
-                print("Query done.")
-                if query_job._properties.get("cacheHit", False):
-                    print("Cache hit.")
-                else:
-                    bytes_billed = int(query_job._properties
-                                       .get("totalBytesProcessed", 0))
-                    bytes_processed = int(query_job._properties
-                                          .get("totalBytesBilled", 0))
-                    print("Total bytes billed (processed): %s (%s)" %
-                          (self.sizeof_fmt(bytes_billed),
-                           self.sizeof_fmt(bytes_processed)))
-                print("\nRetrieving results...")
-            return query_job, None
-
-        def async_query():
-            query_job = self.client.run_async_query(
-                str(uuid.uuid4()),
-                query,
-                query_parameters=query_parameters)
-            query_job = _set_common_query_settings(query_job)
-            if verbose:
-                print("Query running...")
-            query_job.begin()
             try:
-                query_results = query_job.results().fetch_data()
-            except AttributeError:
-                query_results = query_job.result().fetch_data()
-            if verbose:
-                print("Query done.")
-                if query_job._properties["statistics"]["query"].get("cacheHit",
-                                                                    False):
-                    print("Cache hit.")
-                elif ("statistics" in query_job._properties and
-                        "query" in query_job._properties["statistics"]):
-                    bytes_billed = int(query_job
-                                       ._properties["statistics"]["query"]
-                                       .get("totalBytesProcessed", 0))
-                    bytes_processed = int(query_job
-                                          ._properties["statistics"]["query"]
-                                          .get("totalBytesBilled", 0))
-                    print("Total bytes billed (processed): %s (%s)" %
-                          (self.sizeof_fmt(bytes_billed),
-                           self.sizeof_fmt(bytes_processed)))
-                print("\nRetrieving results...")
-            return query_results, query_job
-
-        def get_columns_schema(query_results):
-            schema = [{"name": f.name,
-                       "field_type": f.field_type,
-                       "mode": f.mode,
-                       "fields": f.fields,
-                       "description": f.description}
-                      for f in query_results.schema]
-            columns = [field["name"] for field in schema]
-            return columns, schema
-
-        # sync_query code to be removed in future
-        if async:
-            query_results, query_job = async_query()
-            rows = list(query_results)
-        else:
-            query_results, query_job = sync_query()
-            rows = list(query_results.rows)
+                query_reply.result(timeout=timeout_sec)
+            except TimeoutError:
+                # Use our own timeout logic
+                pass
+            except self.http_error as ex:
+                self.process_http_error(ex)
+
+        if self.verbose:
+            if query_reply.cache_hit:
+                self._print('Query done.\nCache hit.\n')
+            else:
+                bytes_processed = query_reply.total_bytes_processed or 0
+                bytes_billed = query_reply.total_bytes_billed or 0
+                self._print('Query done.\nProcessed: {} Billed: {}'.format(
+                    self.sizeof_fmt(bytes_processed),
+                    self.sizeof_fmt(bytes_billed)))
+                self._print('Standard price: ${:,.2f} USD\n'.format(
+                    bytes_billed * self.query_price_for_TB))
 
-        columns, schema = get_columns_schema(query_results)
+            self._print('Retrieving results...')
 
-        if verbose:
-            print("Got %s rows.") % len(rows)
-            if query_job:
-                print("\nTotal time taken %ss" % (datetime.utcnow() -
-                      query_job.created.replace(tzinfo=None)).seconds)
-                print("Finished at %s." % datetime.now()
-                      .strftime('%Y-%m-%d %H:%M:%S'))
+        try:
+            rows_iter = query_reply.result()
+        except self.http_error as ex:
+            self.process_http_error(ex)
+        result_rows = list(rows_iter)
+        total_rows = rows_iter.total_rows
+        schema = {
+            'fields': [
+                field.to_api_repr()
+                for field in rows_iter.schema],
+        }
 
-        return rows, columns, schema
+        # print basic query stats
+        self._print('Got {} rows.\n'.format(total_rows))
+
+        return schema, result_rows
 
     def load_data(self, dataframe, dataset_id, table_id, chunksize):
-        try:
-            from googleapiclient.errors import HttpError
-        except ImportError:
-            from apiclient.errors import HttpError
+        from google.cloud.bigquery import LoadJobConfig
+        from six import StringIO
 
-        job_id = uuid.uuid4().hex
+        destination_table = self.client.dataset(dataset_id).table(table_id)
+        job_config = LoadJobConfig()
+        job_config.write_disposition = 'WRITE_APPEND'
+        job_config.source_format = 'NEWLINE_DELIMITED_JSON'
         rows = []
         remaining_rows = len(dataframe)
 
@@ -655,44 +572,25 @@ def load_data(self, dataframe, dataset_id, table_id, chunksize):
         self._print("\n\n")
 
         for index, row in dataframe.reset_index(drop=True).iterrows():
-            row_dict = dict()
-            row_dict['json'] = json.loads(row.to_json(force_ascii=False,
-                                                      date_unit='s',
-                                                      date_format='iso'))
-            row_dict['insertId'] = job_id + str(index)
-            rows.append(row_dict)
+            row_json = row.to_json(
+                force_ascii=False, date_unit='s', date_format='iso')
+            rows.append(row_json)
             remaining_rows -= 1
 
             if (len(rows) % chunksize == 0) or (remaining_rows == 0):
-                self._print("\rStreaming Insert is {0}% Complete".format(
+                self._print("\rLoad is {0}% Complete".format(
                     ((total_rows - remaining_rows) * 100) / total_rows))
 
-                body = {'rows': rows}
+                body = StringIO('{}\n'.format('\n'.join(rows)))
 
                 try:
-                    response = self.service.tabledata().insertAll(
-                        projectId=self.project_id,
-                        datasetId=dataset_id,
-                        tableId=table_id,
-                        body=body).execute()
-                except HttpError as ex:
+                    self.client.load_table_from_file(
+                        body,
+                        destination_table,
+                        job_config=job_config).result()
+                except self.http_error as ex:
                     self.process_http_error(ex)
 
-                # For streaming inserts, even if you receive a success HTTP
-                # response code, you'll need to check the insertErrors property
-                # of the response to determine if the row insertions were
-                # successful, because it's possible that BigQuery was only
-                # partially successful at inserting the rows.  See the `Success
-                # HTTP Response Codes
-                # <https://cloud.google.com/bigquery/
-                #       streaming-data-into-bigquery#troubleshooting>`__
-                # section
-
-                insert_errors = response.get('insertErrors', None)
-                if insert_errors:
-                    self.process_insert_errors(insert_errors)
-
-                sleep(1)  # Maintains the inserts "per second" rate per API
                 rows = []
 
         self._print("\n")
@@ -716,23 +614,18 @@ def schema(self, dataset_id, table_id):
             Fields representing the schema
         """
 
-        try:
-            from googleapiclient.errors import HttpError
-        except ImportError:
-            from apiclient.errors import HttpError
+        table_ref = self.client.dataset(dataset_id).table(table_id)
 
         try:
-            remote_schema = self.service.tables().get(
-                projectId=self.project_id,
-                datasetId=dataset_id,
-                tableId=table_id).execute()['schema']
+            table = self.client.get_table(table_ref)
+            remote_schema = table.schema
 
-            remote_fields = [{'name': field_remote['name'],
-                              'type': field_remote['type']}
-                             for field_remote in remote_schema['fields']]
+            remote_fields = [{'name': field_remote.name,
+                              'type': field_remote.field_type}
+                             for field_remote in remote_schema]
 
             return remote_fields
-        except HttpError as ex:
+        except self.http_error as ex:
             self.process_http_error(ex)
 
     def verify_schema(self, dataset_id, table_id, schema):
@@ -818,42 +711,33 @@ def _get_credentials_file():
         'PANDAS_GBQ_CREDENTIALS_FILE')
 
 
-def _create_df(rows, columns, schema, index_col, col_order):
-    df = DataFrame(data=rows, columns=columns)
+def _parse_data(schema, rows):
+    # see:
+    # http://pandas.pydata.org/pandas-docs/dev/missing_data.html
+    # #missing-data-casting-rules-and-indexing
+    dtype_map = {'FLOAT': np.dtype(float),
+                 'TIMESTAMP': 'M8[ns]'}
 
-    # Manual field type conversion. Inserted to handle tests
-    # with only null rows, otherwise type conversion works automatically
-    for field in schema:
-        if field["field_type"] == 'TIMESTAMP':
-            if df[field["name"]].isnull().values.all():
-                df[field["name"]] = to_datetime(df[field["name"]])
-        if field["field_type"] == 'FLOAT':
-            if df[field["name"]].isnull().values.all():
-                df[field["name"]] = to_numeric(df[field["name"]])
+    fields = schema['fields']
+    col_types = [field['type'] for field in fields]
+    col_names = [str(field['name']) for field in fields]
+    col_dtypes = [
+        dtype_map.get(field['type'].upper(), object)
+        for field in fields
+    ]
+    print(fields)
+    page_array = np.zeros((len(rows),), dtype=lzip(col_names, col_dtypes))
+    for row_num, entries in enumerate(rows):
+        for col_num in range(len(col_types)):
+            field_value = entries[col_num]
+            page_array[row_num][col_num] = field_value
 
-    # Reindex the DataFrame on the provided column
-    if index_col:
-        if index_col in df.columns:
-            df.set_index(index_col, inplace=True)
-        else:
-            raise InvalidIndexColumn(
-                'Index column "{0}" does not exist in DataFrame.'
-                .format(index_col))
-
-    # Change the order of columns in the DataFrame based on provided list
-    if col_order:
-        if sorted(col_order) == sorted(df.columns):
-            df = df[col_order]
-        else:
-            raise InvalidColumnOrder(
-                'Column order does not match this DataFrame.')
-    return df
+    return DataFrame(page_array, columns=col_names)
 
 
 def read_gbq(query, project_id=None, index_col=None, col_order=None,
              reauth=False, verbose=True, private_key=None,
-             auth_local_webserver=False, dialect='legacy',
-             query_parameters=(), configuration=None, **kwargs):
+             auth_local_webserver=False, dialect='legacy', **kwargs):
     r"""Load data from Google BigQuery using google-cloud-python
 
     The main method a user calls to execute a Query in Google BigQuery
@@ -863,21 +747,19 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
     Documentation is available `here
     <https://googlecloudplatform.github.io/google-cloud-python/stable/>`__
 
-    Authentication via Google Cloud can be performed a number of ways.
+    Authentication to the Google BigQuery service is via OAuth 2.0.
 
-    One method is to generate user credentials via
-    ``gcloud auth application-default login`` and point to it using an
-    environment variable:
-    ``$ export GOOGLE_APPLICATION_CREDENTIALS="/path/to/keyfile.json"``
+    - If "private_key" is not provided:
 
-    You can also download a service account private key JSON file and pass the
-    path to the file to the private_key paramater.
+      By default "application default credentials" are used.
 
-    If default credentials are not located and a private key is not passed,
-    an auth flow will begin where a user can auth via a link or via a pop-up
-    through which a user can auth with their Google account. This will
-    generate a user credentials file, which is saved locally and can be re-used
-    in the future.
+      If default application credentials are not found or are restrictive,
+      user account credentials are used. In this case, you will be asked to
+      grant permissions for product name 'pandas GBQ'.
+
+    - If "private_key" is provided:
+
+      Service account credentials will be used to authenticate.
 
     Parameters
     ----------
@@ -918,42 +800,15 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
         compliant with the SQL 2011 standard. For more information
         see `BigQuery SQL Reference
         <https://cloud.google.com/bigquery/sql-reference/>`__
-    credentials: credentials object, default None (optional)
-        If generating credentials on your own, pass in. Otherwise, will attempt
-        to generate automatically
-
-        .. versionadded:: 0.3.0
-
-    query_parameters: tuple (optional) Can only be used in Standard SQL
-        `More info
-        <https://cloud.google.com/bigquery/docs/parameterized-queries>`__
-        Example::
-
-            gbq.read_gbq("SELECT @param1 + @param2",
-                         query_parameters = (bigquery.ScalarQueryParameter(
-                                             'param1', 'INT64', 1),
-                                             bigquery.ScalarQueryParameter(
-                                             'param2', 'INT64', 2)))
-
-        .. versionadded:: 0.3.0
-
-    configuration : dict (optional)
-        Due to the [current implementation in Google Cloud Python] only some
-        configuration settings are able to be set. You can pass them along like
-        in the following:
-        `read_gbq(q,configuration={'allow_large_results':True,
-                                   'maximum_billing_tier':2})`
-        [Example allowable settings]:
-            allow_large_results, create_disposition, default_dataset,
-            destination, flatten_results, priority, use_query_cache,
-            use_legacy_sql, dry_run, write_disposition, udf_resources,
-            maximum_billing_tier, maximum_bytes_billed
-
-        .. [current implementation in Google Cloud Python]
-            https://github.com/GoogleCloudPlatform/google-cloud-python/issues/2765
-        .. [Example allowable settings]
-            http://google-cloud-python.readthedocs.io/en/latest/_modules/google/cloud/bigquery/job.html?highlight=QueryJobConfig
-        .. versionadded:: 0.3.0
+
+    **kwargs : Arbitrary keyword arguments
+        configuration (dict): query config parameters for job processing.
+        For example:
+
+            configuration = {'query': {'useQueryCache': False}}
+
+        For more information see `BigQuery SQL Reference
+        <https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query>`__
 
     Returns
     -------
@@ -969,42 +824,48 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
 
     if dialect not in ('legacy', 'standard'):
         raise ValueError("'{0}' is not valid for dialect".format(dialect))
-    if configuration and any(key in configuration for key in
-                             ["query", "copy", "load", "extract"]):
-        raise ValueError("The Google Cloud BigQuery API handles configuration "
-                         "settings differently. There are now a discrete set "
-                         "of query settings one can set by passing in a "
-                         "dictionary, e.g.: `configuration="
-                         "{'maximum_billing_tier':2}`. See http://google-cloud"
-                         "-python.readthedocs.io/en/latest/_modules/google/"
-                         "cloud/bigquery/job.html?highlight=QueryJobConfig "
-                         "for allowable paramaters.")
-
-    connector = GbqConnector(project_id=project_id,
-                             reauth=reauth,
-                             auth_local_webserver=auth_local_webserver,
-                             private_key=private_key)
-
-    # Temporary workaround in order to perform timeouts on queries.
-    # Once Google Cloud Python resolves, differentiation between sync and async
-    # code will be removed.
-    if (configuration and "timeout_ms" in configuration):
-        rows, columns, schema = connector.run_query(query,
-                                                    dialect,
-                                                    query_parameters,
-                                                    configuration,
-                                                    verbose,
-                                                    async=False)
-    else:
-        rows, columns, schema = connector.run_query(query,
-                                                    dialect,
-                                                    query_parameters,
-                                                    configuration,
-                                                    verbose)
 
-    df = _create_df(rows, columns, schema, index_col, col_order)
+    connector = GbqConnector(
+        project_id, reauth=reauth, verbose=verbose, private_key=private_key,
+        dialect=dialect, auth_local_webserver=auth_local_webserver)
+    schema, rows = connector.run_query(query, **kwargs)
+    final_df = _parse_data(schema, rows)
+
+    # Reindex the DataFrame on the provided column
+    if index_col is not None:
+        if index_col in final_df.columns:
+            final_df.set_index(index_col, inplace=True)
+        else:
+            raise InvalidIndexColumn(
+                'Index column "{0}" does not exist in DataFrame.'
+                .format(index_col)
+            )
+
+    # Change the order of columns in the DataFrame based on provided list
+    if col_order is not None:
+        if sorted(col_order) == sorted(final_df.columns):
+            final_df = final_df[col_order]
+        else:
+            raise InvalidColumnOrder(
+                'Column order does not match this DataFrame.'
+            )
+
+    # cast BOOLEAN and INTEGER columns from object to bool/int
+    # if they dont have any nulls
+    type_map = {'BOOLEAN': bool, 'INTEGER': int}
+    for field in schema['fields']:
+        if field['type'].upper() in type_map and \
+                final_df[field['name']].notnull().all():
+            final_df[field['name']] = \
+                final_df[field['name']].astype(type_map[field['type'].upper()])
 
-    return df
+    connector.print_elapsed_seconds(
+        'Total time taken',
+        datetime.now().strftime('s.\nFinished at %Y-%m-%d %H:%M:%S.'),
+        0
+    )
+
+    return final_df
 
 
 def to_gbq(dataframe, destination_table, project_id, chunksize=10000,
@@ -1152,11 +1013,6 @@ class _Table(GbqConnector):
 
     def __init__(self, project_id, dataset_id, reauth=False, verbose=False,
                  private_key=None):
-        try:
-            from googleapiclient.errors import HttpError
-        except ImportError:
-            from apiclient.errors import HttpError
-        self.http_error = HttpError
         self.dataset_id = dataset_id
         super(_Table, self).__init__(project_id, reauth, verbose, private_key)
 
@@ -1173,18 +1029,16 @@ def exists(self, table_id):
         boolean
             true if table exists, otherwise false
         """
+        from google.api_core.exceptions import NotFound
 
+        table_ref = self.client.dataset(self.dataset_id).table(table_id)
         try:
-            self.service.tables().get(
-                projectId=self.project_id,
-                datasetId=self.dataset_id,
-                tableId=table_id).execute()
+            self.client.get_table(table_ref)
             return True
+        except NotFound:
+            return False
         except self.http_error as ex:
-            if ex.resp.status == 404:
-                return False
-            else:
-                self.process_http_error(ex)
+            self.process_http_error(ex)
 
     def create(self, table_id, schema):
         """ Create a table in Google BigQuery given a table and schema
@@ -1197,6 +1051,8 @@ def create(self, table_id, schema):
             Use the generate_bq_schema to generate your table schema from a
             dataframe.
         """
+        from google.cloud.bigquery import SchemaField
+        from google.cloud.bigquery import Table
 
         if self.exists(table_id):
             raise TableCreationError("Table {0} already "
@@ -1207,20 +1063,20 @@ def create(self, table_id, schema):
             _Dataset(self.project_id,
                      private_key=self.private_key).create(self.dataset_id)
 
-        body = {
-            'schema': schema,
-            'tableReference': {
-                'tableId': table_id,
-                'projectId': self.project_id,
-                'datasetId': self.dataset_id
-            }
-        }
+        table_ref = self.client.dataset(self.dataset_id).table(table_id)
+        table = Table(table_ref)
+
+        for field in schema['fields']:
+            if 'mode' not in field:
+                field['mode'] = 'NULLABLE'
+
+        table.schema = [
+            SchemaField.from_api_repr(field)
+            for field in schema['fields']
+        ]
 
         try:
-            self.service.tables().insert(
-                projectId=self.project_id,
-                datasetId=self.dataset_id,
-                body=body).execute()
+            self.client.create_table(table)
         except self.http_error as ex:
             self.process_http_error(ex)
 
@@ -1232,30 +1088,25 @@ def delete(self, table_id):
         table : str
             Name of table to be deleted
         """
+        from google.api_core.exceptions import NotFound
 
         if not self.exists(table_id):
             raise NotFoundException("Table does not exist")
 
+        table_ref = self.client.dataset(self.dataset_id).table(table_id)
         try:
-            self.service.tables().delete(
-                datasetId=self.dataset_id,
-                projectId=self.project_id,
-                tableId=table_id).execute()
-        except self.http_error as ex:
+            self.client.delete_table(table_ref)
+        except NotFound:
             # Ignore 404 error which may occur if table already deleted
-            if ex.resp.status != 404:
-                self.process_http_error(ex)
+            pass
+        except self.http_error as ex:
+            self.process_http_error(ex)
 
 
 class _Dataset(GbqConnector):
 
     def __init__(self, project_id, reauth=False, verbose=False,
                  private_key=None):
-        try:
-            from googleapiclient.errors import HttpError
-        except ImportError:
-            from apiclient.errors import HttpError
-        self.http_error = HttpError
         super(_Dataset, self).__init__(project_id, reauth, verbose,
                                        private_key)
 
@@ -1272,17 +1123,15 @@ def exists(self, dataset_id):
         boolean
             true if dataset exists, otherwise false
         """
+        from google.api_core.exceptions import NotFound
 
         try:
-            self.service.datasets().get(
-                projectId=self.project_id,
-                datasetId=dataset_id).execute()
+            self.client.get_dataset(self.client.dataset(dataset_id))
             return True
+        except NotFound:
+            return False
         except self.http_error as ex:
-            if ex.resp.status == 404:
-                return False
-            else:
-                self.process_http_error(ex)
+            self.process_http_error(ex)
 
     def datasets(self):
         """ Return a list of datasets in Google BigQuery
@@ -1298,32 +1147,15 @@ def datasets(self):
         """
 
         dataset_list = []
-        next_page_token = None
-        first_query = True
 
-        while first_query or next_page_token:
-            first_query = False
-
-            try:
-                list_dataset_response = self.service.datasets().list(
-                    projectId=self.project_id,
-                    pageToken=next_page_token).execute()
-
-                dataset_response = list_dataset_response.get('datasets')
-                if dataset_response is None:
-                    dataset_response = []
-
-                next_page_token = list_dataset_response.get('nextPageToken')
-
-                if dataset_response is None:
-                    dataset_response = []
+        try:
+            dataset_response = self.client.list_datasets()
 
-                for row_num, raw_row in enumerate(dataset_response):
-                    dataset_list.append(
-                        raw_row['datasetReference']['datasetId'])
+            for row in dataset_response:
+                dataset_list.append(row.dataset_id)
 
-            except self.http_error as ex:
-                self.process_http_error(ex)
+        except self.http_error as ex:
+            self.process_http_error(ex)
 
         return dataset_list
 
@@ -1335,22 +1167,16 @@ def create(self, dataset_id):
         dataset : str
             Name of dataset to be written
         """
+        from google.cloud.bigquery import Dataset
 
         if self.exists(dataset_id):
             raise DatasetCreationError("Dataset {0} already "
                                        "exists".format(dataset_id))
 
-        body = {
-            'datasetReference': {
-                'projectId': self.project_id,
-                'datasetId': dataset_id
-            }
-        }
+        dataset = Dataset(self.client.dataset(dataset_id))
 
         try:
-            self.service.datasets().insert(
-                projectId=self.project_id,
-                body=body).execute()
+            self.client.create_dataset(dataset)
         except self.http_error as ex:
             self.process_http_error(ex)
 
@@ -1362,20 +1188,20 @@ def delete(self, dataset_id):
         dataset : str
             Name of dataset to be deleted
         """
+        from google.api_core.exceptions import NotFound
 
         if not self.exists(dataset_id):
             raise NotFoundException(
                 "Dataset {0} does not exist".format(dataset_id))
 
         try:
-            self.service.datasets().delete(
-                datasetId=dataset_id,
-                projectId=self.project_id).execute()
+            self.client.delete_dataset(self.client.dataset(dataset_id))
 
-        except self.http_error as ex:
+        except NotFound:
             # Ignore 404 error which may occur if dataset already deleted
-            if ex.resp.status != 404:
-                self.process_http_error(ex)
+            pass
+        except self.http_error as ex:
+            self.process_http_error(ex)
 
     def tables(self, dataset_id):
         """ List tables in the specific dataset in Google BigQuery
@@ -1392,28 +1218,15 @@ def tables(self, dataset_id):
         """
 
         table_list = []
-        next_page_token = None
-        first_query = True
-
-        while first_query or next_page_token:
-            first_query = False
 
-            try:
-                list_table_response = self.service.tables().list(
-                    projectId=self.project_id,
-                    datasetId=dataset_id,
-                    pageToken=next_page_token).execute()
-
-                table_response = list_table_response.get('tables')
-                next_page_token = list_table_response.get('nextPageToken')
-
-                if not table_response:
-                    return table_list
+        try:
+            table_response = self.client.list_dataset_tables(
+                self.client.dataset(dataset_id))
 
-                for row_num, raw_row in enumerate(table_response):
-                    table_list.append(raw_row['tableReference']['tableId'])
+            for row in table_response:
+                table_list.append(row.table_id)
 
-            except self.http_error as ex:
-                self.process_http_error(ex)
+        except self.http_error as ex:
+            self.process_http_error(ex)
 
         return table_list
diff --git a/pandas_gbq/tests/test_gbq.py b/pandas_gbq/tests/test_gbq.py
index 697eb2e1..6a2b8480 100644
--- a/pandas_gbq/tests/test_gbq.py
+++ b/pandas_gbq/tests/test_gbq.py
@@ -13,13 +13,10 @@
 from pandas import compat
 
 from pandas.compat import u, range
-from pandas import NaT, DataFrame, to_datetime
+from pandas import NaT, DataFrame
 from pandas_gbq import gbq
 import pandas.util.testing as tm
 from pandas.compat.numpy import np_datetime64_compat
-from google.cloud import bigquery
-
-from google.cloud.exceptions import BadRequest, NotFound
 
 
 TABLE_ID = 'new_test'
@@ -196,19 +193,17 @@ def test_should_be_able_to_get_valid_credentials(self):
         credentials = self.sut.get_credentials()
         assert credentials.valid
 
-    def test_should_be_able_to_get_a_bigquery_service(self):
-        bigquery_service = self.sut.get_service()
-        assert bigquery_service is not None
+    def test_should_be_able_to_get_a_bigquery_client(self):
+        bigquery_client = self.sut.get_client()
+        assert bigquery_client is not None
 
     def test_should_be_able_to_get_schema_from_query(self):
-        result = self.sut.run_query('SELECT 1')
-        rows, columns, schema = result
+        schema, pages = self.sut.run_query('SELECT 1')
         assert schema is not None
 
     def test_should_be_able_to_get_results_from_query(self):
-        results = gbq.read_gbq('SELECT 1',
-                               project_id=_get_project_id())
-        assert results is not None
+        schema, pages = self.sut.run_query('SELECT 1')
+        assert pages is not None
 
     def test_get_application_default_credentials_does_not_throw_error(self):
         if _check_if_can_get_correct_default_credentials():
@@ -261,18 +256,17 @@ def test_should_be_able_to_get_valid_credentials(self):
         credentials = self.sut.get_credentials()
         assert credentials.valid
 
-    def test_should_be_able_to_get_a_bigquery_service(self):
-        bigquery_service = self.sut.get_service()
-        assert bigquery_service is not None
+    def test_should_be_able_to_get_a_bigquery_client(self):
+        bigquery_client = self.sut.get_client()
+        assert bigquery_client is not None
 
     def test_should_be_able_to_get_schema_from_query(self):
-        result = self.sut.run_query('SELECT 1')
-        rows, columns, schema = result
+        schema, pages = self.sut.run_query('SELECT 1')
         assert schema is not None
 
     def test_should_be_able_to_get_results_from_query(self):
-        results = gbq.read_gbq('SELECT 1', project_id=_get_project_id())
-        assert results is not None
+        schema, pages = self.sut.run_query('SELECT 1')
+        assert pages is not None
 
 
 class TestGBQConnectorIntegrationWithServiceAccountKeyContents(object):
@@ -293,19 +287,17 @@ def test_should_be_able_to_get_valid_credentials(self):
         credentials = self.sut.get_credentials()
         assert credentials.valid
 
-    def test_should_be_able_to_get_a_bigquery_service(self):
-        bigquery_service = self.sut.get_service()
-        assert bigquery_service is not None
+    def test_should_be_able_to_get_a_bigquery_client(self):
+        bigquery_client = self.sut.get_client()
+        assert bigquery_client is not None
 
     def test_should_be_able_to_get_schema_from_query(self):
-        result = self.sut.run_query('SELECT 1')
-        rows, columns, schema = result
+        schema, pages = self.sut.run_query('SELECT 1')
         assert schema is not None
 
     def test_should_be_able_to_get_results_from_query(self):
-        results = gbq.read_gbq('SELECT 1',
-                               project_id=_get_project_id())
-        assert results is not None
+        schema, pages = self.sut.run_query('SELECT 1')
+        assert pages is not None
 
 
 class GBQUnitTests(object):
@@ -524,7 +516,7 @@ def test_should_properly_handle_nullable_integers(self):
         df = gbq.read_gbq(query, project_id=_get_project_id(),
                           private_key=_get_private_key_path())
         tm.assert_frame_equal(
-            df, DataFrame({'nullable_integer': [1, None]}))
+            df, DataFrame({'nullable_integer': [1, None]}).astype(object))
 
     def test_should_properly_handle_valid_longs(self):
         query = 'SELECT 1 << 62 AS valid_long'
@@ -540,7 +532,7 @@ def test_should_properly_handle_nullable_longs(self):
         df = gbq.read_gbq(query, project_id=_get_project_id(),
                           private_key=_get_private_key_path())
         tm.assert_frame_equal(
-            df, DataFrame({'nullable_long': [1 << 62, None]}))
+            df, DataFrame({'nullable_long': [1 << 62, None]}).astype(object))
 
     def test_should_properly_handle_null_integers(self):
         query = 'SELECT INTEGER(NULL) AS null_integer'
@@ -594,19 +586,16 @@ def test_should_properly_handle_timestamp_unix_epoch(self):
         query = 'SELECT TIMESTAMP("1970-01-01 00:00:00") AS unix_epoch'
         df = gbq.read_gbq(query, project_id=_get_project_id(),
                           private_key=_get_private_key_path())
-        expected = DataFrame({'unix_epoch':
-                             [np.datetime64('1970-01-01T00:00:00.000000Z')]})
-        tm.assert_frame_equal(df, to_datetime(expected.unix_epoch).dt
-                              .tz_localize('UTC').to_frame())
+        tm.assert_frame_equal(df, DataFrame(
+            {'unix_epoch': [np.datetime64('1970-01-01T00:00:00.000000Z')]}))
 
     def test_should_properly_handle_arbitrary_timestamp(self):
         query = 'SELECT TIMESTAMP("2004-09-15 05:00:00") AS valid_timestamp'
         df = gbq.read_gbq(query, project_id=_get_project_id(),
                           private_key=_get_private_key_path())
-        expected = DataFrame({'valid_timestamp':
-                             [np.datetime64('2004-09-15T05:00:00.000000Z')]})
-        tm.assert_frame_equal(df, to_datetime(expected.valid_timestamp).dt
-                              .tz_localize('UTC').to_frame())
+        tm.assert_frame_equal(df, DataFrame({
+            'valid_timestamp': [np.datetime64('2004-09-15T05:00:00.000000Z')]
+        }))
 
     def test_should_properly_handle_null_timestamp(self):
         query = 'SELECT TIMESTAMP(NULL) AS null_timestamp'
@@ -639,7 +628,7 @@ def test_should_properly_handle_nullable_booleans(self):
         df = gbq.read_gbq(query, project_id=_get_project_id(),
                           private_key=_get_private_key_path())
         tm.assert_frame_equal(
-            df, DataFrame({'nullable_boolean': [True, None]}))
+            df, DataFrame({'nullable_boolean': [True, None]}).astype(object))
 
     def test_unicode_string_conversion_and_normalization(self):
         correct_test_datatype = DataFrame(
@@ -700,7 +689,7 @@ def test_column_order_plus_index(self):
 
     def test_read_gbq_raises_invalid_index_column(self):
         query = "SELECT 'a' AS string_1, 'b' AS string_2, 'c' AS string_3"
-        col_order = ['string_3', 'string_2', 'string_1']
+        col_order = ['string_3', 'string_2']
 
         # Column string_bbb does not exist. Should raise InvalidIndexColumn
         with pytest.raises(gbq.InvalidIndexColumn):
@@ -709,18 +698,18 @@ def test_read_gbq_raises_invalid_index_column(self):
                          private_key=_get_private_key_path())
 
     def test_malformed_query(self):
-        with pytest.raises(BadRequest):
+        with pytest.raises(gbq.GenericGBQException):
             gbq.read_gbq("SELCET * FORM [publicdata:samples.shakespeare]",
                          project_id=_get_project_id(),
                          private_key=_get_private_key_path())
 
     def test_bad_project_id(self):
-        with pytest.raises(NotFound):
+        with pytest.raises(gbq.GenericGBQException):
             gbq.read_gbq("SELECT 1", project_id='001',
                          private_key=_get_private_key_path())
 
     def test_bad_table_name(self):
-        with pytest.raises(NotFound):
+        with pytest.raises(gbq.GenericGBQException):
             gbq.read_gbq("SELECT * FROM [publicdata:samples.nope]",
                          project_id=_get_project_id(),
                          private_key=_get_private_key_path())
@@ -749,15 +738,14 @@ def test_zero_rows(self):
                          ('is_bot', np.dtype(bool)), ('ts', 'M8[ns]')])
         expected_result = DataFrame(
             page_array, columns=['title', 'id', 'is_bot', 'ts'])
-        tm.assert_frame_equal(expected_result.astype(object),
-                              df.reset_index(drop=True).astype(object))
+        tm.assert_frame_equal(df, expected_result)
 
     def test_legacy_sql(self):
         legacy_sql = "SELECT id FROM [publicdata.samples.wikipedia] LIMIT 10"
 
         # Test that a legacy sql statement fails when
         # setting dialect='standard'
-        with pytest.raises((RuntimeError, BadRequest)):
+        with pytest.raises(gbq.GenericGBQException):
             gbq.read_gbq(legacy_sql, project_id=_get_project_id(),
                          dialect='standard',
                          private_key=_get_private_key_path())
@@ -775,7 +763,7 @@ def test_standard_sql(self):
 
         # Test that a standard sql statement fails when using
         # the legacy SQL dialect (default value)
-        with pytest.raises((RuntimeError, BadRequest)):
+        with pytest.raises(gbq.GenericGBQException):
             gbq.read_gbq(standard_sql, project_id=_get_project_id(),
                          private_key=_get_private_key_path())
 
@@ -803,25 +791,66 @@ def test_invalid_option_for_sql_dialect(self):
 
     def test_query_with_parameters(self):
         sql_statement = "SELECT @param1 + @param2 AS valid_result"
-        config = {"use_legacy_sql": False}
+        config = {
+            'query': {
+                "useLegacySql": False,
+                "parameterMode": "named",
+                "queryParameters": [
+                    {
+                        "name": "param1",
+                        "parameterType": {
+                            "type": "INTEGER"
+                        },
+                        "parameterValue": {
+                            "value": 1
+                        }
+                    },
+                    {
+                        "name": "param2",
+                        "parameterType": {
+                            "type": "INTEGER"
+                        },
+                        "parameterValue": {
+                            "value": 2
+                        }
+                    }
+                ]
+            }
+        }
         # Test that a query that relies on parameters fails
         # when parameters are not supplied via configuration
-        with pytest.raises((RuntimeError, BadRequest)):
+        with pytest.raises(ValueError):
             gbq.read_gbq(sql_statement, project_id=_get_project_id(),
                          private_key=_get_private_key_path())
 
         # Test that the query is successful because we have supplied
-        # the correct query parameters via the 'config' and query_parameters
-        # option
+        # the correct query parameters via the 'config' option
         df = gbq.read_gbq(sql_statement, project_id=_get_project_id(),
-                          configuration=config,
-                          query_parameters=(bigquery.ScalarQueryParameter(
-                                            'param1', 'INT64', 1),
-                                            bigquery.ScalarQueryParameter(
-                                            'param2', 'INT64', 2)),
-                          private_key=_get_private_key_path())
+                          private_key=_get_private_key_path(),
+                          configuration=config)
         tm.assert_frame_equal(df, DataFrame({'valid_result': [3]}))
 
+    def test_query_inside_configuration(self):
+        query_no_use = 'SELECT "PI_WRONG" AS valid_string'
+        query = 'SELECT "PI" AS valid_string'
+        config = {
+            'query': {
+                "query": query,
+                "useQueryCache": False,
+            }
+        }
+        # Test that it can't pass query both
+        # inside config and as parameter
+        with pytest.raises(ValueError):
+            gbq.read_gbq(query_no_use, project_id=_get_project_id(),
+                         private_key=_get_private_key_path(),
+                         configuration=config)
+
+        df = gbq.read_gbq(None, project_id=_get_project_id(),
+                          private_key=_get_private_key_path(),
+                          configuration=config)
+        tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']}))
+
     def test_configuration_without_query(self):
         sql_statement = 'SELECT 1'
         config = {
@@ -845,9 +874,31 @@ def test_configuration_without_query(self):
                          private_key=_get_private_key_path(),
                          configuration=config)
 
+    def test_configuration_raises_value_error_with_multiple_config(self):
+        sql_statement = 'SELECT 1'
+        config = {
+            'query': {
+                "query": sql_statement,
+                "useQueryCache": False,
+            },
+            'load': {
+                "query": sql_statement,
+                "useQueryCache": False,
+            }
+        }
+        # Test that only ValueError is raised with multiple configurations
+        with pytest.raises(ValueError):
+            gbq.read_gbq(sql_statement, project_id=_get_project_id(),
+                         private_key=_get_private_key_path(),
+                         configuration=config)
+
     def test_timeout_configuration(self):
         sql_statement = 'SELECT 1'
-        config = {"timeout_ms": 1}
+        config = {
+            'query': {
+                "timeoutMs": 1
+            }
+        }
         # Test that QueryTimeout error raises
         with pytest.raises(gbq.QueryTimeout):
             gbq.read_gbq(sql_statement, project_id=_get_project_id(),
@@ -926,8 +977,6 @@ def test_upload_data(self):
         gbq.to_gbq(df, self.destination_table + test_id, _get_project_id(),
                    chunksize=10000, private_key=_get_private_key_path())
 
-        sleep(30)  # <- Curses Google!!!
-
         result = gbq.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}"
                               .format(self.destination_table + test_id),
                               project_id=_get_project_id(),
@@ -964,8 +1013,6 @@ def test_upload_data_if_table_exists_append(self):
         gbq.to_gbq(df, self.destination_table + test_id, _get_project_id(),
                    if_exists='append', private_key=_get_private_key_path())
 
-        sleep(30)  # <- Curses Google!!!
-
         result = gbq.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}"
                               .format(self.destination_table + test_id),
                               project_id=_get_project_id(),
@@ -995,8 +1042,6 @@ def test_upload_subset_columns_if_table_exists_append(self):
                    self.destination_table + test_id, _get_project_id(),
                    if_exists='append', private_key=_get_private_key_path())
 
-        sleep(30)  # <- Curses Google!!!
-
         result = gbq.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}"
                               .format(self.destination_table + test_id),
                               project_id=_get_project_id(),
@@ -1029,8 +1074,6 @@ def test_upload_data_if_table_exists_replace(self):
                    _get_project_id(), if_exists='replace',
                    private_key=_get_private_key_path())
 
-        sleep(30)  # <- Curses Google!!!
-
         result = gbq.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}"
                               .format(self.destination_table + test_id),
                               project_id=_get_project_id(),
@@ -1204,10 +1247,14 @@ def test_verify_schema_ignores_field_mode(self):
     def test_retrieve_schema(self):
         # Issue #24 schema function returns the schema in biquery
         test_id = "15"
-        test_schema = {'fields': [{'name': 'A', 'type': 'FLOAT'},
-                                  {'name': 'B', 'type': 'FLOAT'},
-                                  {'name': 'C', 'type': 'STRING'},
-                                  {'name': 'D', 'type': 'TIMESTAMP'}]}
+        test_schema = {
+            'fields': [
+                {'name': 'A', 'type': 'FLOAT', 'mode': 'NULLABLE'},
+                {'name': 'B', 'type': 'FLOAT', 'mode': 'NULLABLE'},
+                {'name': 'C', 'type': 'STRING', 'mode': 'NULLABLE'},
+                {'name': 'D', 'type': 'TIMESTAMP', 'mode': 'NULLABLE'}
+            ]
+        }
 
         self.table.create(TABLE_ID + test_id, test_schema)
         actual = self.sut.schema(self.dataset_prefix + "1", TABLE_ID + test_id)
@@ -1364,8 +1411,6 @@ def test_upload_data(self):
         gbq.to_gbq(df, self.destination_table + test_id, _get_project_id(),
                    chunksize=10000)
 
-        sleep(30)  # <- Curses Google!!!
-
         result = gbq.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}".format(
             self.destination_table + test_id),
             project_id=_get_project_id())
@@ -1422,8 +1467,6 @@ def test_upload_data(self):
         gbq.to_gbq(df, self.destination_table + test_id, _get_project_id(),
                    chunksize=10000, private_key=_get_private_key_contents())
 
-        sleep(30)  # <- Curses Google!!!
-
         result = gbq.read_gbq("SELECT COUNT(*) as num_rows FROM {0}".format(
             self.destination_table + test_id),
             project_id=_get_project_id(),
diff --git a/requirements.txt b/requirements.txt
index f49120c2..88cf967a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,4 @@
 pandas
-httplib2
-google-api-python-client
 google-auth
-google-auth-httplib2
 google-auth-oauthlib
-google-cloud-bigquery==0.26.0
+google-cloud-bigquery
diff --git a/setup.py b/setup.py
index 327c983d..86a40c5e 100644
--- a/setup.py
+++ b/setup.py
@@ -19,12 +19,9 @@ def readme():
 
 INSTALL_REQUIRES = [
     'pandas',
-    'httplib2>=0.9.2',
-    'google-api-python-client>=1.6.0',
     'google-auth>=1.0.0',
-    'google-auth-httplib2>=0.0.1',
     'google-auth-oauthlib>=0.0.1',
-    'google-cloud-bigquery>=0.26.0,<0.28.0',
+    'google-cloud-bigquery>=0.28.0',
 ]
 
 

From cd551bb98252c662dc4f98783982ed97e8447204 Mon Sep 17 00:00:00 2001
From: Jason Ng <jason.q.ng@gmail.com>
Date: Mon, 27 Nov 2017 13:59:39 -0500
Subject: [PATCH 37/42] Indentation

---
 pandas_gbq/gbq.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 15130321..58a3374c 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -476,8 +476,8 @@ def run_query(self, query, **kwargs):
                 if 'query' in config['query']:
                     if query is not None:
                         raise ValueError("Query statement can't be specified "
-                                        "inside config while it is specified "
-                                        "as parameter")
+                                         "inside config while it is specified "
+                                         "as parameter")
                     query = config['query']['query']
                     del config['query']['query']
 

From a6865e0b0eae3f43b15ec238e4b1a571588d2201 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Mon, 27 Nov 2017 12:26:09 -0800
Subject: [PATCH 38/42] Ignore mode property when comparing schemas.

---
 pandas_gbq/gbq.py | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 58a3374c..1893462e 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -620,9 +620,11 @@ def schema(self, dataset_id, table_id):
             table = self.client.get_table(table_ref)
             remote_schema = table.schema
 
-            remote_fields = [{'name': field_remote.name,
-                              'type': field_remote.field_type}
-                             for field_remote in remote_schema]
+            remote_fields = [
+                field_remote.to_api_repr() for field_remote in remote_schema]
+            for field in remote_fields:
+                field['type'] = field['type'].upper()
+                field['mode'] = field['mode'].upper()
 
             return remote_fields
         except self.http_error as ex:
@@ -655,6 +657,14 @@ def verify_schema(self, dataset_id, table_id, schema):
                                key=lambda x: x['name'])
         fields_local = sorted(schema['fields'], key=lambda x: x['name'])
 
+        # Ignore mode when comparing schemas.
+        for field in fields_local:
+            if 'mode' in field:
+                del field['mode']
+        for field in fields_remote:
+            if 'mode' in field:
+                del field['mode']
+
         return fields_remote == fields_local
 
     def schema_is_subset(self, dataset_id, table_id, schema):
@@ -683,6 +693,14 @@ def schema_is_subset(self, dataset_id, table_id, schema):
         fields_remote = self.schema(dataset_id, table_id)
         fields_local = schema['fields']
 
+        # Ignore mode when comparing schemas.
+        for field in fields_local:
+            if 'mode' in field:
+                del field['mode']
+        for field in fields_remote:
+            if 'mode' in field:
+                del field['mode']
+
         return all(field in fields_remote for field in fields_local)
 
     def delete_and_recreate_table(self, dataset_id, table_id, table_schema):

From c636783f1d8d775640f54269b143e56f2dd3e4c6 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Mon, 27 Nov 2017 12:52:28 -0800
Subject: [PATCH 39/42] Document new dependency on google-cloud-bigquery.

---
 docs/source/changelog.rst | 5 +++++
 docs/source/install.rst   | 8 +++++---
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
index dc35067e..0a5661fe 100644
--- a/docs/source/changelog.rst
+++ b/docs/source/changelog.rst
@@ -1,6 +1,11 @@
 Changelog
 =========
 
+0.3.0 / 2017-??-??
+------------------
+
+- Use the `google-cloud-bigquery <https://googlecloudplatform.github.io/google-cloud-python/latest/bigquery/usage.html>`__ library for API calls instead of ``google-api-client`` and ``httplib2``. (:issue:`93`)
+
 0.2.1 / 2017-??-??
 ------------------
 
diff --git a/docs/source/install.rst b/docs/source/install.rst
index 2b701fd2..98f2d79d 100644
--- a/docs/source/install.rst
+++ b/docs/source/install.rst
@@ -37,8 +37,10 @@ Dependencies
 
 This module requires following additional dependencies:
 
-- `httplib2 <https://github.com/httplib2/httplib2>`__: HTTP client
-- `google-api-python-client <http://github.com/google/google-api-python-client>`__: Google's API client
 - `google-auth <https://github.com/GoogleCloudPlatform/google-auth-library-python>`__: authentication and authorization for Google's API
 - `google-auth-oauthlib <https://github.com/GoogleCloudPlatform/google-auth-library-python-oauthlib>`__: integration with `oauthlib <https://github.com/idan/oauthlib>`__ for end-user authentication
-- `google-auth-httplib2 <https://github.com/GoogleCloudPlatform/google-auth-library-python-httplib2>`__: adapter to use ``httplib2`` HTTP client with ``google-auth``
+- `google-cloud-bigquery <http://github.com/GoogleCloudPlatform/google-cloud-python>`__: Google Cloud client library for BigQuery
+
+.. note::
+
+   The dependency on `google-cloud-bigquery <http://github.com/GoogleCloudPlatform/google-cloud-python>`__ is new in version 0.3.0 of ``pandas-gbq``.
\ No newline at end of file

From e959571a05ad48cfb42152d4e550a2b1a78b23f2 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Wed, 29 Nov 2017 16:21:17 -0800
Subject: [PATCH 40/42] Document dependencies for previous verions.

Also says which libraries are no longer required, for easier upgrades.
---
 docs/source/install.rst | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/docs/source/install.rst b/docs/source/install.rst
index 98f2d79d..c64c7939 100644
--- a/docs/source/install.rst
+++ b/docs/source/install.rst
@@ -43,4 +43,11 @@ This module requires following additional dependencies:
 
 .. note::
 
-   The dependency on `google-cloud-bigquery <http://github.com/GoogleCloudPlatform/google-cloud-python>`__ is new in version 0.3.0 of ``pandas-gbq``.
\ No newline at end of file
+   The dependency on `google-cloud-bigquery <http://github.com/GoogleCloudPlatform/google-cloud-python>`__ is new in version 0.3.0 of ``pandas-gbq``.
+   Versions less than 0.3.0 required the following dependencies:
+
+   - `httplib2 <https://github.com/httplib2/httplib2>`__: HTTP client (no longer required)
+   - `google-api-python-client <http://github.com/google/google-api-python-client>`__: Google's API client (no longer required, replaced by `google-cloud-bigquery <http://github.com/GoogleCloudPlatform/google-cloud-python>`__:)
+   - `google-auth <https://github.com/GoogleCloudPlatform/google-auth-library-python>`__: authentication and authorization for Google's API
+   - `google-auth-oauthlib <https://github.com/GoogleCloudPlatform/google-auth-library-python-oauthlib>`__: integration with `oauthlib <https://github.com/idan/oauthlib>`__ for end-user authentication
+   - `google-auth-httplib2 <https://github.com/GoogleCloudPlatform/google-auth-library-python-httplib2>`__: adapter to use ``httplib2`` HTTP client with ``google-auth`` (no longer required)

From 6448abb94eed3a2ba84f5720cc3e660b143d3bfe Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Fri, 8 Dec 2017 09:58:31 -0800
Subject: [PATCH 41/42] Remove print statement used for debugging.

---
 pandas_gbq/gbq.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 3f60f5f0..46a246e5 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -742,7 +742,6 @@ def _parse_data(schema, rows):
         dtype_map.get(field['type'].upper(), object)
         for field in fields
     ]
-    print(fields)
     page_array = np.zeros((len(rows),), dtype=lzip(col_names, col_dtypes))
     for row_num, entries in enumerate(rows):
         for col_num in range(len(col_types)):

From 26d64316711bc2104e009dfc3021a1e01d154c2d Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Wed, 20 Dec 2017 10:15:07 -0800
Subject: [PATCH 42/42] Add deps and StreamingInsertError to changelog.

---
 docs/source/changelog.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
index a876840e..b6684582 100644
--- a/docs/source/changelog.rst
+++ b/docs/source/changelog.rst
@@ -4,7 +4,9 @@ Changelog
 0.3.0 / 2017-??-??
 ------------------
 
-- Use the `google-cloud-bigquery <https://googlecloudplatform.github.io/google-cloud-python/latest/bigquery/usage.html>`__ library for API calls instead of ``google-api-client`` and ``httplib2``. (:issue:`93`)
+- Use the `google-cloud-bigquery <https://googlecloudplatform.github.io/google-cloud-python/latest/bigquery/usage.html>`__ library for API calls. The ``google-cloud-bigquery`` package is a new dependency, and dependencies on ``google-api-python-client`` and ``httplib2`` are removed. See the `installation guide <https://pandas-gbq.readthedocs.io/en/latest/install.html#dependencies>`__ for more details.  (:issue:`93`)
+- :func:`to_gbq` now uses a load job instead of the streaming API. (:issue:`75`)
+- Remove ``StreamingInsertError`` class, as it is no longer used by :func:`to_gbq`. (:issue:`75`)
 
 0.2.1 / 2017-11-27
 ------------------