Skip to content

Commit e1fbb07

Browse files
committed
Allow users to specify whether gbq should use standard SQL pandas-dev#13615
1 parent 768bf49 commit e1fbb07

File tree

4 files changed

+73
-4
lines changed

4 files changed

+73
-4
lines changed

doc/source/io.rst

+7
Original file line numberDiff line numberDiff line change
@@ -4482,6 +4482,13 @@ destination DataFrame as well as a preferred column order as follows:
44824482

44834483
You can toggle the verbose output via the ``verbose`` flag which defaults to ``True``.
44844484

4485+
.. note::
4486+
4487+
The ``dialect`` argument can be used to indicate whether to use BigQuery's ``'legacy'`` SQL
4488+
or BigQuery's ``'standard'`` SQL (beta). The default value is ``'legacy'``. For more information
4489+
on BigQuery's standard SQL, see `BigQuery SQL Reference
4490+
<https://cloud.google.com/bigquery/sql-reference/>`__
4491+
44854492
.. _io.bigquery_writer:
44864493

44874494

doc/source/whatsnew/v0.19.0.txt

+6
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,12 @@ For ``MultiIndex``, values are dropped if any level is missing by default. Speci
301301

302302
``Index.astype()`` now accepts an optional boolean argument ``copy``, which allows optional copying if the requirements on dtype are satisfied (:issue:`13209`)
303303

304+
.. _whatsnew_0170.gbq:
305+
306+
Google BigQuery Enhancements
307+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
308+
- The :func:`pandas.io.gbq.read_gbq` method has gained the ``dialect`` argument to allow users to specify whether to use BigQuery's legacy SQL or BigQuery's standard SQL. See the :ref:`docs <io.bigquery_reader>` for more details (:issue:`13615`).
309+
304310
.. _whatsnew_0190.enhancements.other:
305311

306312
Other enhancements

pandas/io/gbq.py

+16-4
Original file line numberDiff line numberDiff line change
@@ -145,13 +145,14 @@ class GbqConnector(object):
145145
scope = 'https://www.googleapis.com/auth/bigquery'
146146

147147
def __init__(self, project_id, reauth=False, verbose=False,
148-
private_key=None):
148+
private_key=None, dialect='legacy'):
149149
_check_google_client_version()
150150
_test_google_api_imports()
151151
self.project_id = project_id
152152
self.reauth = reauth
153153
self.verbose = verbose
154154
self.private_key = private_key
155+
self.dialect = dialect
155156
self.credentials = self.get_credentials()
156157
self.service = self.get_service()
157158

@@ -334,7 +335,8 @@ def run_query(self, query):
334335
job_data = {
335336
'configuration': {
336337
'query': {
337-
'query': query
338+
'query': query,
339+
'useLegacySql': self.dialect == 'legacy'
338340
# 'allowLargeResults', 'createDisposition',
339341
# 'preserveNulls', destinationTable, useQueryCache
340342
}
@@ -563,7 +565,7 @@ def _parse_entry(field_value, field_type):
563565

564566

565567
def read_gbq(query, project_id=None, index_col=None, col_order=None,
566-
reauth=False, verbose=True, private_key=None):
568+
reauth=False, verbose=True, private_key=None, dialect='legacy'):
567569
"""Load data from Google BigQuery.
568570
569571
THIS IS AN EXPERIMENTAL LIBRARY
@@ -601,6 +603,12 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
601603
Service account private key in JSON format. Can be file path
602604
or string contents. This is useful for remote server
603605
authentication (eg. jupyter iPython notebook on remote host)
606+
dialect : {'legacy', 'standard'}, default 'legacy'
607+
'legacy' : Use BigQuery's legacy SQL dialect.
608+
'standard' : Use BigQuery's standard SQL (beta), which is
609+
compliant with the SQL 2011 standard. For more information
610+
see `BigQuery SQL Reference
611+
<https://cloud.google.com/bigquery/sql-reference/>`__
604612
605613
Returns
606614
-------
@@ -612,8 +620,12 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
612620
if not project_id:
613621
raise TypeError("Missing required parameter: project_id")
614622

623+
if dialect not in ('legacy', 'standard'):
624+
raise ValueError("'{0}' is not valid for dialect".format(dialect))
625+
615626
connector = GbqConnector(project_id, reauth=reauth, verbose=verbose,
616-
private_key=private_key)
627+
private_key=private_key,
628+
dialect=dialect)
617629
schema, pages = connector.run_query(query)
618630
dataframe_list = []
619631
while len(pages) > 0:

pandas/io/tests/test_gbq.py

+44
Original file line numberDiff line numberDiff line change
@@ -557,6 +557,50 @@ def test_zero_rows(self):
557557
expected_result = DataFrame(page_array, columns=['title', 'id'])
558558
self.assert_frame_equal(df, expected_result)
559559

560+
def test_legacy_sql(self):
561+
legacy_sql = "SELECT id FROM [publicdata.samples.wikipedia] LIMIT 10"
562+
563+
# Test that a legacy sql statement fails when
564+
# setting dialect='standard'
565+
with tm.assertRaises(gbq.GenericGBQException):
566+
gbq.read_gbq(legacy_sql, project_id=PROJECT_ID,
567+
dialect='standard')
568+
569+
# Test that a legacy sql statement succeeds when
570+
# setting dialect='legacy'
571+
df = gbq.read_gbq(legacy_sql, project_id=PROJECT_ID,
572+
dialect='legacy')
573+
self.assertEqual(len(df.drop_duplicates()), 10)
574+
575+
def test_standard_sql(self):
576+
standard_sql = "SELECT DISTINCT id FROM " \
577+
"`publicdata.samples.wikipedia` LIMIT 10"
578+
579+
# Test that a standard sql statement fails when using
580+
# the legacy SQL dialect (default value)
581+
with tm.assertRaises(gbq.GenericGBQException):
582+
gbq.read_gbq(standard_sql, project_id=PROJECT_ID)
583+
584+
# Test that a standard sql statement succeeds when
585+
# setting dialect='standard'
586+
df = gbq.read_gbq(standard_sql, project_id=PROJECT_ID,
587+
dialect='standard')
588+
self.assertEqual(len(df.drop_duplicates()), 10)
589+
590+
def test_invalid_option_for_sql_dialect(self):
591+
sql_statement = "SELECT DISTINCT id FROM " \
592+
"`publicdata.samples.wikipedia` LIMIT 10"
593+
594+
# Test that an invalid option for `dialect` raises ValueError
595+
with tm.assertRaises(ValueError):
596+
gbq.read_gbq(sql_statement, project_id=PROJECT_ID,
597+
dialect='invalid')
598+
599+
# Test that a correct option for dialect succeeds
600+
# to make sure ValueError was due to invalid dialect
601+
gbq.read_gbq(sql_statement, project_id=PROJECT_ID,
602+
dialect='standard')
603+
560604

561605
class TestToGBQIntegration(tm.TestCase):
562606
# Changes to BigQuery table schema may take up to 2 minutes as of May 2015

0 commit comments

Comments
 (0)