From e1fbb072947d025fd491c13513faa581e7926a97 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Sat, 30 Jul 2016 11:45:31 -0400 Subject: [PATCH] Allow users to specify whether gbq should use standard SQL #13615 --- doc/source/io.rst | 7 ++++++ doc/source/whatsnew/v0.19.0.txt | 6 +++++ pandas/io/gbq.py | 20 ++++++++++++--- pandas/io/tests/test_gbq.py | 44 +++++++++++++++++++++++++++++++++ 4 files changed, 73 insertions(+), 4 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index ee5734aaf9494..2866371cce61a 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4482,6 +4482,13 @@ destination DataFrame as well as a preferred column order as follows: You can toggle the verbose output via the ``verbose`` flag which defaults to ``True``. +.. note:: + + The ``dialect`` argument can be used to indicate whether to use BigQuery's ``'legacy'`` SQL + or BigQuery's ``'standard'`` SQL (beta). The default value is ``'legacy'``. For more information + on BigQuery's standard SQL, see `BigQuery SQL Reference + `__ + .. _io.bigquery_writer: diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 64e6bc0ab307c..07b127cac942a 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -301,6 +301,12 @@ For ``MultiIndex``, values are dropped if any level is missing by default. Speci ``Index.astype()`` now accepts an optional boolean argument ``copy``, which allows optional copying if the requirements on dtype are satisfied (:issue:`13209`) +.. _whatsnew_0170.gbq: + +Google BigQuery Enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +- The :func:`pandas.io.gbq.read_gbq` method has gained the ``dialect`` argument to allow users to specify whether to use BigQuery's legacy SQL or BigQuery's standard SQL. See the :ref:`docs ` for more details (:issue:`13615`). + .. _whatsnew_0190.enhancements.other: Other enhancements diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 140f5cc6bb6e3..94def5c265195 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -145,13 +145,14 @@ class GbqConnector(object): scope = 'https://www.googleapis.com/auth/bigquery' def __init__(self, project_id, reauth=False, verbose=False, - private_key=None): + private_key=None, dialect='legacy'): _check_google_client_version() _test_google_api_imports() self.project_id = project_id self.reauth = reauth self.verbose = verbose self.private_key = private_key + self.dialect = dialect self.credentials = self.get_credentials() self.service = self.get_service() @@ -334,7 +335,8 @@ def run_query(self, query): job_data = { 'configuration': { 'query': { - 'query': query + 'query': query, + 'useLegacySql': self.dialect == 'legacy' # 'allowLargeResults', 'createDisposition', # 'preserveNulls', destinationTable, useQueryCache } @@ -563,7 +565,7 @@ def _parse_entry(field_value, field_type): def read_gbq(query, project_id=None, index_col=None, col_order=None, - reauth=False, verbose=True, private_key=None): + reauth=False, verbose=True, private_key=None, dialect='legacy'): """Load data from Google BigQuery. THIS IS AN EXPERIMENTAL LIBRARY @@ -601,6 +603,12 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, Service account private key in JSON format. Can be file path or string contents. This is useful for remote server authentication (eg. jupyter iPython notebook on remote host) + dialect : {'legacy', 'standard'}, default 'legacy' + 'legacy' : Use BigQuery's legacy SQL dialect. + 'standard' : Use BigQuery's standard SQL (beta), which is + compliant with the SQL 2011 standard. For more information + see `BigQuery SQL Reference + `__ Returns ------- @@ -612,8 +620,12 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, if not project_id: raise TypeError("Missing required parameter: project_id") + if dialect not in ('legacy', 'standard'): + raise ValueError("'{0}' is not valid for dialect".format(dialect)) + connector = GbqConnector(project_id, reauth=reauth, verbose=verbose, - private_key=private_key) + private_key=private_key, + dialect=dialect) schema, pages = connector.run_query(query) dataframe_list = [] while len(pages) > 0: diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py index 278c5d7215624..0d8512ffb5524 100644 --- a/pandas/io/tests/test_gbq.py +++ b/pandas/io/tests/test_gbq.py @@ -557,6 +557,50 @@ def test_zero_rows(self): expected_result = DataFrame(page_array, columns=['title', 'id']) self.assert_frame_equal(df, expected_result) + def test_legacy_sql(self): + legacy_sql = "SELECT id FROM [publicdata.samples.wikipedia] LIMIT 10" + + # Test that a legacy sql statement fails when + # setting dialect='standard' + with tm.assertRaises(gbq.GenericGBQException): + gbq.read_gbq(legacy_sql, project_id=PROJECT_ID, + dialect='standard') + + # Test that a legacy sql statement succeeds when + # setting dialect='legacy' + df = gbq.read_gbq(legacy_sql, project_id=PROJECT_ID, + dialect='legacy') + self.assertEqual(len(df.drop_duplicates()), 10) + + def test_standard_sql(self): + standard_sql = "SELECT DISTINCT id FROM " \ + "`publicdata.samples.wikipedia` LIMIT 10" + + # Test that a standard sql statement fails when using + # the legacy SQL dialect (default value) + with tm.assertRaises(gbq.GenericGBQException): + gbq.read_gbq(standard_sql, project_id=PROJECT_ID) + + # Test that a standard sql statement succeeds when + # setting dialect='standard' + df = gbq.read_gbq(standard_sql, project_id=PROJECT_ID, + dialect='standard') + self.assertEqual(len(df.drop_duplicates()), 10) + + def test_invalid_option_for_sql_dialect(self): + sql_statement = "SELECT DISTINCT id FROM " \ + "`publicdata.samples.wikipedia` LIMIT 10" + + # Test that an invalid option for `dialect` raises ValueError + with tm.assertRaises(ValueError): + gbq.read_gbq(sql_statement, project_id=PROJECT_ID, + dialect='invalid') + + # Test that a correct option for dialect succeeds + # to make sure ValueError was due to invalid dialect + gbq.read_gbq(sql_statement, project_id=PROJECT_ID, + dialect='standard') + class TestToGBQIntegration(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015