diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 72e7373d0dd33..60c3e4df8d129 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -19,6 +19,11 @@ Other Enhancements - :func:`to_csv` now supports ``compression`` keyword when a file handle is passed. (:issue:`21227`) - :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with :class:`MultiIndex` (:issue:`21115`) - Added support for reading from Google Cloud Storage via the ``gcsfs`` library (:issue:`19454`) +- :func:`to_gbq` and :func:`read_gbq` signature and documentation updated to + reflect changes from the `Pandas-GBQ library version 0.5.0 + `__. + (:issue:`21627`) + .. _whatsnew_0240.api_breaking: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0bf5acf14294a..b553cfdc72c92 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1102,37 +1102,27 @@ def to_dict(self, orient='dict', into=dict): else: raise ValueError("orient '{o}' not understood".format(o=orient)) - def to_gbq(self, destination_table, project_id, chunksize=None, - verbose=None, reauth=False, if_exists='fail', private_key=None, - auth_local_webserver=False, table_schema=None): + def to_gbq(self, destination_table, project_id=None, chunksize=None, + reauth=False, if_exists='fail', private_key=None, + auth_local_webserver=False, table_schema=None, location=None, + progress_bar=True, verbose=None): """ Write a DataFrame to a Google BigQuery table. This function requires the `pandas-gbq package `__. - Authentication to the Google BigQuery service is via OAuth 2.0. - - - If ``private_key`` is provided, the library loads the JSON service - account credentials and uses those to authenticate. - - - If no ``private_key`` is provided, the library tries `application - default credentials`_. - - .. _application default credentials: - https://cloud.google.com/docs/authentication/production#providing_credentials_to_your_application - - - If application default credentials are not found or cannot be used - with BigQuery, the library authenticates with user account - credentials. In this case, you will be asked to grant permissions - for product name 'pandas GBQ'. + See the `How to authenticate with Google BigQuery + `__ + guide for authentication instructions. Parameters ---------- destination_table : str - Name of table to be written, in the form 'dataset.tablename'. - project_id : str - Google BigQuery Account project ID. + Name of table to be written, in the form ``dataset.tablename``. + project_id : str, optional + Google BigQuery Account project ID. Optional when available from + the environment. chunksize : int, optional Number of rows to be inserted in each chunk from the dataframe. Set to ``None`` to load the whole dataframe at once. @@ -1170,8 +1160,21 @@ def to_gbq(self, destination_table, project_id, chunksize=None, BigQuery API documentation on available names of a field. *New in version 0.3.1 of pandas-gbq*. - verbose : boolean, deprecated - *Deprecated in Pandas-GBQ 0.4.0.* Use the `logging module + location : str, optional + Location where the load job should run. See the `BigQuery locations + documentation + `__ for a + list of available locations. The location must match that of the + target dataset. + + *New in version 0.5.0 of pandas-gbq*. + progress_bar : bool, default True + Use the library `tqdm` to show the progress bar for the upload, + chunk by chunk. + + *New in version 0.5.0 of pandas-gbq*. + verbose : bool, deprecated + Deprecated in Pandas-GBQ 0.4.0. Use the `logging module to adjust verbosity instead `__. @@ -1182,10 +1185,12 @@ def to_gbq(self, destination_table, project_id, chunksize=None, """ from pandas.io import gbq return gbq.to_gbq( - self, destination_table, project_id, chunksize=chunksize, - verbose=verbose, reauth=reauth, if_exists=if_exists, - private_key=private_key, auth_local_webserver=auth_local_webserver, - table_schema=table_schema) + self, destination_table, project_id=project_id, + chunksize=chunksize, reauth=reauth, + if_exists=if_exists, private_key=private_key, + auth_local_webserver=auth_local_webserver, + table_schema=table_schema, location=location, + progress_bar=progress_bar, verbose=verbose) @classmethod def from_records(cls, data, index=None, exclude=None, columns=None, diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index c7c16598ee432..87a0e4d5d1747 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -22,34 +22,26 @@ def _try_import(): def read_gbq(query, project_id=None, index_col=None, col_order=None, - reauth=False, verbose=None, private_key=None, dialect='legacy', - **kwargs): + reauth=False, private_key=None, auth_local_webserver=False, + dialect='legacy', location=None, configuration=None, + verbose=None): """ Load data from Google BigQuery. This function requires the `pandas-gbq package `__. - Authentication to the Google BigQuery service is via OAuth 2.0. - - - If "private_key" is not provided: - - By default "application default credentials" are used. - - If default application credentials are not found or are restrictive, - user account credentials are used. In this case, you will be asked to - grant permissions for product name 'pandas GBQ'. - - - If "private_key" is provided: - - Service account credentials will be used to authenticate. + See the `How to authenticate with Google BigQuery + `__ + guide for authentication instructions. Parameters ---------- query : str SQL-Like Query to return data values. - project_id : str - Google BigQuery Account project ID. + project_id : str, optional + Google BigQuery Account project ID. Optional when available from + the environment. index_col : str, optional Name of result column to use for index in results DataFrame. col_order : list(str), optional @@ -62,6 +54,16 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, Service account private key in JSON format. Can be file path or string contents. This is useful for remote server authentication (eg. Jupyter/IPython notebook on remote host). + auth_local_webserver : boolean, default False + Use the `local webserver flow`_ instead of the `console flow`_ + when getting user credentials. + + .. _local webserver flow: + http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server + .. _console flow: + http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console + + *New in version 0.2.0 of pandas-gbq*. dialect : str, default 'legacy' SQL syntax dialect to use. Value can be one of: @@ -74,19 +76,26 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, compliant with the SQL 2011 standard. For more information see `BigQuery Standard SQL Reference `__. - verbose : boolean, deprecated - *Deprecated in Pandas-GBQ 0.4.0.* Use the `logging module - to adjust verbosity instead - `__. - kwargs : dict - Arbitrary keyword arguments. - configuration (dict): query config parameters for job processing. + location : str, optional + Location where the query job should run. See the `BigQuery locations + documentation + `__ for a + list of available locations. The location must match that of any + datasets used in the query. + + *New in version 0.5.0 of pandas-gbq*. + configuration : dict, optional + Query config parameters for job processing. For example: configuration = {'query': {'useQueryCache': False}} - For more information see `BigQuery SQL Reference - `__ + For more information see `BigQuery REST API Reference + `__. + verbose : None, deprecated + Deprecated in Pandas-GBQ 0.4.0. Use the `logging module + to adjust verbosity instead + `__. Returns ------- @@ -100,20 +109,21 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, """ pandas_gbq = _try_import() return pandas_gbq.read_gbq( - query, project_id=project_id, - index_col=index_col, col_order=col_order, - reauth=reauth, verbose=verbose, - private_key=private_key, - dialect=dialect, - **kwargs) + query, project_id=project_id, index_col=index_col, + col_order=col_order, reauth=reauth, verbose=verbose, + private_key=private_key, auth_local_webserver=auth_local_webserver, + dialect=dialect, location=location, configuration=configuration) -def to_gbq(dataframe, destination_table, project_id, chunksize=None, +def to_gbq(dataframe, destination_table, project_id=None, chunksize=None, verbose=None, reauth=False, if_exists='fail', private_key=None, - auth_local_webserver=False, table_schema=None): + auth_local_webserver=False, table_schema=None, location=None, + progress_bar=True): pandas_gbq = _try_import() return pandas_gbq.to_gbq( - dataframe, destination_table, project_id, chunksize=chunksize, - verbose=verbose, reauth=reauth, if_exists=if_exists, - private_key=private_key, auth_local_webserver=auth_local_webserver, - table_schema=table_schema) + dataframe, destination_table, project_id=project_id, + chunksize=chunksize, verbose=verbose, reauth=reauth, + if_exists=if_exists, private_key=private_key, + auth_local_webserver=auth_local_webserver, + table_schema=table_schema, location=location, + progress_bar=progress_bar) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index 58a84ad4d47f8..dc6c319bb3366 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -2,7 +2,6 @@ from datetime import datetime import pytz import platform -from time import sleep import os import numpy as np @@ -48,16 +47,18 @@ def _in_travis_environment(): def _get_project_id(): if _in_travis_environment(): return os.environ.get('GBQ_PROJECT_ID') - else: - return PROJECT_ID + return PROJECT_ID or os.environ.get('GBQ_PROJECT_ID') def _get_private_key_path(): if _in_travis_environment(): return os.path.join(*[os.environ.get('TRAVIS_BUILD_DIR'), 'ci', 'travis_gbq.json']) - else: - return PRIVATE_KEY_JSON_PATH + + private_key_path = PRIVATE_KEY_JSON_PATH + if not private_key_path: + private_key_path = os.environ.get('GBQ_GOOGLE_APPLICATION_CREDENTIALS') + return private_key_path def clean_gbq_environment(private_key=None): @@ -123,11 +124,9 @@ def test_roundtrip(self): test_size = 20001 df = make_mixed_dataframe_v2(test_size) - df.to_gbq(destination_table, _get_project_id(), chunksize=10000, + df.to_gbq(destination_table, _get_project_id(), chunksize=None, private_key=_get_private_key_path()) - sleep(30) # <- Curses Google!!! - result = pd.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}" .format(destination_table), project_id=_get_project_id(),