diff --git a/doc/source/conf.py b/doc/source/conf.py index 43c7c23c5e20d..965b537c15ce5 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -350,6 +350,7 @@ intersphinx_mapping = { 'statsmodels': ('http://www.statsmodels.org/devel/', None), 'matplotlib': ('http://matplotlib.org/', None), + 'pandas-gbq': ('https://pandas-gbq.readthedocs.io/en/latest/', None), 'python': ('https://docs.python.org/3/', None), 'numpy': ('https://docs.scipy.org/doc/numpy/', None), 'scipy': ('https://docs.scipy.org/doc/scipy/reference/', None), diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index ce63cb2473bc4..4f05a6f108add 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -404,7 +404,10 @@ Other Enhancements - :func:`read_html` now accepts a ``displayed_only`` keyword argument to controls whether or not hidden elements are parsed (``True`` by default) (:issue:`20027`) - zip compression is supported via ``compression=zip`` in :func:`DataFrame.to_pickle`, :func:`Series.to_pickle`, :func:`DataFrame.to_csv`, :func:`Series.to_csv`, :func:`DataFrame.to_json`, :func:`Series.to_json`. (:issue:`17778`) - :class:`DataFrame` and :class:`Series` now support matrix multiplication (```@```) operator (:issue:`10259`) for Python>=3.5 - +- Updated ``to_gbq`` and ``read_gbq`` signature and documentation to reflect changes from + the Pandas-GBQ library version 0.4.0. Adds intersphinx mapping to Pandas-GBQ + library. (:issue:`20564`) + .. _whatsnew_0230.api_breaking: Backwards incompatible API changes diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9626079660771..af6b64057e358 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1116,60 +1116,90 @@ def to_dict(self, orient='dict', into=dict): else: raise ValueError("orient '%s' not understood" % orient) - def to_gbq(self, destination_table, project_id, chunksize=10000, - verbose=True, reauth=False, if_exists='fail', private_key=None): - """Write a DataFrame to a Google BigQuery table. - - The main method a user calls to export pandas DataFrame contents to - Google BigQuery table. + def to_gbq(self, destination_table, project_id, chunksize=None, + verbose=None, reauth=False, if_exists='fail', private_key=None, + auth_local_webserver=False, table_schema=None): + """ + Write a DataFrame to a Google BigQuery table. - Google BigQuery API Client Library v2 for Python is used. - Documentation is available `here - `__ + This function requires the `pandas-gbq package + `__. Authentication to the Google BigQuery service is via OAuth 2.0. - - If "private_key" is not provided: + - If ``private_key`` is provided, the library loads the JSON service + account credentials and uses those to authenticate. - By default "application default credentials" are used. + - If no ``private_key`` is provided, the library tries `application + default credentials`_. - If default application credentials are not found or are restrictive, - user account credentials are used. In this case, you will be asked to - grant permissions for product name 'pandas GBQ'. + .. _application default credentials: + https://cloud.google.com/docs/authentication/production#providing_credentials_to_your_application - - If "private_key" is provided: - - Service account credentials will be used to authenticate. + - If application default credentials are not found or cannot be used + with BigQuery, the library authenticates with user account + credentials. In this case, you will be asked to grant permissions + for product name 'pandas GBQ'. Parameters ---------- - dataframe : DataFrame - DataFrame to be written - destination_table : string - Name of table to be written, in the form 'dataset.tablename' + destination_table : str + Name of table to be written, in the form 'dataset.tablename'. project_id : str Google BigQuery Account project ID. - chunksize : int (default 10000) + chunksize : int, optional Number of rows to be inserted in each chunk from the dataframe. - verbose : boolean (default True) - Show percentage complete - reauth : boolean (default False) + Set to ``None`` to load the whole dataframe at once. + reauth : bool, default False Force Google BigQuery to reauthenticate the user. This is useful if multiple accounts are used. - if_exists : {'fail', 'replace', 'append'}, default 'fail' - 'fail': If table exists, do nothing. - 'replace': If table exists, drop it, recreate it, and insert data. - 'append': If table exists, insert data. Create if does not exist. - private_key : str (optional) + if_exists : str, default 'fail' + Behavior when the destination table exists. Value can be one of: + + ``'fail'`` + If table exists, do nothing. + ``'replace'`` + If table exists, drop it, recreate it, and insert data. + ``'append'`` + If table exists, insert data. Create if does not exist. + private_key : str, optional Service account private key in JSON format. Can be file path or string contents. This is useful for remote server - authentication (eg. Jupyter/IPython notebook on remote host) - """ + authentication (eg. Jupyter/IPython notebook on remote host). + auth_local_webserver : bool, default False + Use the `local webserver flow`_ instead of the `console flow`_ + when getting user credentials. + + .. _local webserver flow: + http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server + .. _console flow: + http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console + + *New in version 0.2.0 of pandas-gbq*. + table_schema : list of dicts, optional + List of BigQuery table fields to which according DataFrame + columns conform to, e.g. ``[{'name': 'col1', 'type': + 'STRING'},...]``. If schema is not provided, it will be + generated according to dtypes of DataFrame columns. See + BigQuery API documentation on available names of a field. + + *New in version 0.3.1 of pandas-gbq*. + verbose : boolean, deprecated + *Deprecated in Pandas-GBQ 0.4.0.* Use the `logging module + to adjust verbosity instead + `__. + See Also + -------- + pandas_gbq.to_gbq : This function in the pandas-gbq library. + pandas.read_gbq : Read a DataFrame from Google BigQuery. + """ from pandas.io import gbq - return gbq.to_gbq(self, destination_table, project_id=project_id, - chunksize=chunksize, verbose=verbose, reauth=reauth, - if_exists=if_exists, private_key=private_key) + return gbq.to_gbq( + self, destination_table, project_id, chunksize=chunksize, + verbose=verbose, reauth=reauth, if_exists=if_exists, + private_key=private_key, auth_local_webserver=auth_local_webserver, + table_schema=table_schema) @classmethod def from_records(cls, data, index=None, exclude=None, columns=None, diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index f9bc6ae1a5451..236d70609e76c 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -22,12 +22,10 @@ def _try_import(): def read_gbq(query, project_id=None, index_col=None, col_order=None, - reauth=False, verbose=True, private_key=None, dialect='legacy', + reauth=False, verbose=None, private_key=None, dialect='legacy', **kwargs): - r"""Load data from Google BigQuery. - - The main method a user calls to execute a Query in Google BigQuery - and read results into a pandas DataFrame. + """ + Load data from Google BigQuery. This function requires the `pandas-gbq package `__. @@ -49,32 +47,39 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, Parameters ---------- query : str - SQL-Like Query to return data values + SQL-Like Query to return data values. project_id : str Google BigQuery Account project ID. - index_col : str (optional) - Name of result column to use for index in results DataFrame - col_order : list(str) (optional) + index_col : str, optional + Name of result column to use for index in results DataFrame. + col_order : list(str), optional List of BigQuery column names in the desired order for results - DataFrame - reauth : boolean (default False) + DataFrame. + reauth : boolean, default False Force Google BigQuery to reauthenticate the user. This is useful if multiple accounts are used. - verbose : boolean (default True) - Verbose output - private_key : str (optional) + private_key : str, optional Service account private key in JSON format. Can be file path or string contents. This is useful for remote server - authentication (eg. Jupyter/IPython notebook on remote host) - - dialect : {'legacy', 'standard'}, default 'legacy' - 'legacy' : Use BigQuery's legacy SQL dialect. - 'standard' : Use BigQuery's standard SQL, which is - compliant with the SQL 2011 standard. For more information - see `BigQuery SQL Reference - `__ - - `**kwargs` : Arbitrary keyword arguments + authentication (eg. Jupyter/IPython notebook on remote host). + dialect : str, default 'legacy' + SQL syntax dialect to use. Value can be one of: + + ``'legacy'`` + Use BigQuery's legacy SQL dialect. For more information see + `BigQuery Legacy SQL Reference + `__. + ``'standard'`` + Use BigQuery's standard SQL, which is + compliant with the SQL 2011 standard. For more information + see `BigQuery Standard SQL Reference + `__. + verbose : boolean, deprecated + *Deprecated in Pandas-GBQ 0.4.0.* Use the `logging module + to adjust verbosity instead + `__. + kwargs : dict + Arbitrary keyword arguments. configuration (dict): query config parameters for job processing. For example: @@ -86,8 +91,12 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, Returns ------- df: DataFrame - DataFrame representing results of query + DataFrame representing results of query. + See Also + -------- + pandas_gbq.read_gbq : This function in the pandas-gbq library. + pandas.DataFrame.to_gbq : Write a DataFrame to Google BigQuery. """ pandas_gbq = _try_import() return pandas_gbq.read_gbq( @@ -99,10 +108,12 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, **kwargs) -def to_gbq(dataframe, destination_table, project_id, chunksize=10000, - verbose=True, reauth=False, if_exists='fail', private_key=None): +def to_gbq(dataframe, destination_table, project_id, chunksize=None, + verbose=None, reauth=False, if_exists='fail', private_key=None, + auth_local_webserver=False, table_schema=None): pandas_gbq = _try_import() - pandas_gbq.to_gbq(dataframe, destination_table, project_id, - chunksize=chunksize, - verbose=verbose, reauth=reauth, - if_exists=if_exists, private_key=private_key) + return pandas_gbq.to_gbq( + dataframe, destination_table, project_id, chunksize=chunksize, + verbose=verbose, reauth=reauth, if_exists=if_exists, + private_key=private_key, auth_local_webserver=auth_local_webserver, + table_schema=table_schema)