From ea523875eafd07db1ece1ecab008c79abf6609c6 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 30 Mar 2018 16:49:32 -0700 Subject: [PATCH 01/13] DOC: intersphinx to pandas-gbq Delegates more of the behavior and documentation for `to_gbq` and `read_gbq` methods to the `pandas-gbq` library. This duplicate documentation was getting out of sync. --- doc/source/conf.py | 1 + pandas/core/frame.py | 57 ++++------------------------- pandas/io/gbq.py | 86 ++++++++------------------------------------ 3 files changed, 23 insertions(+), 121 deletions(-) diff --git a/doc/source/conf.py b/doc/source/conf.py index 43c7c23c5e20d..965b537c15ce5 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -350,6 +350,7 @@ intersphinx_mapping = { 'statsmodels': ('http://www.statsmodels.org/devel/', None), 'matplotlib': ('http://matplotlib.org/', None), + 'pandas-gbq': ('https://pandas-gbq.readthedocs.io/en/latest/', None), 'python': ('https://docs.python.org/3/', None), 'numpy': ('https://docs.scipy.org/doc/numpy/', None), 'scipy': ('https://docs.scipy.org/doc/scipy/reference/', None), diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9626079660771..6294c3bf0efe4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1116,60 +1116,17 @@ def to_dict(self, orient='dict', into=dict): else: raise ValueError("orient '%s' not understood" % orient) - def to_gbq(self, destination_table, project_id, chunksize=10000, - verbose=True, reauth=False, if_exists='fail', private_key=None): - """Write a DataFrame to a Google BigQuery table. - - The main method a user calls to export pandas DataFrame contents to - Google BigQuery table. - - Google BigQuery API Client Library v2 for Python is used. - Documentation is available `here - `__ - - Authentication to the Google BigQuery service is via OAuth 2.0. - - - If "private_key" is not provided: - - By default "application default credentials" are used. - - If default application credentials are not found or are restrictive, - user account credentials are used. In this case, you will be asked to - grant permissions for product name 'pandas GBQ'. - - - If "private_key" is provided: + def to_gbq(self, *args, **kwargs): + """ + Write a DataFrame to a Google BigQuery table. - Service account credentials will be used to authenticate. + This function requires the `pandas-gbq package + `__. - Parameters - ---------- - dataframe : DataFrame - DataFrame to be written - destination_table : string - Name of table to be written, in the form 'dataset.tablename' - project_id : str - Google BigQuery Account project ID. - chunksize : int (default 10000) - Number of rows to be inserted in each chunk from the dataframe. - verbose : boolean (default True) - Show percentage complete - reauth : boolean (default False) - Force Google BigQuery to reauthenticate the user. This is useful - if multiple accounts are used. - if_exists : {'fail', 'replace', 'append'}, default 'fail' - 'fail': If table exists, do nothing. - 'replace': If table exists, drop it, recreate it, and insert data. - 'append': If table exists, insert data. Create if does not exist. - private_key : str (optional) - Service account private key in JSON format. Can be file path - or string contents. This is useful for remote server - authentication (eg. Jupyter/IPython notebook on remote host) + See: :meth:`pandas_gbq.to_gbq` """ - from pandas.io import gbq - return gbq.to_gbq(self, destination_table, project_id=project_id, - chunksize=chunksize, verbose=verbose, reauth=reauth, - if_exists=if_exists, private_key=private_key) + return gbq.to_gbq(self, *args, **kwargs) @classmethod def from_records(cls, data, index=None, exclude=None, columns=None, diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index f9bc6ae1a5451..4753b218aec94 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -21,88 +21,32 @@ def _try_import(): return pandas_gbq -def read_gbq(query, project_id=None, index_col=None, col_order=None, - reauth=False, verbose=True, private_key=None, dialect='legacy', - **kwargs): - r"""Load data from Google BigQuery. - - The main method a user calls to execute a Query in Google BigQuery - and read results into a pandas DataFrame. +def read_gbq(*args, **kwargs): + """ + Load data from Google BigQuery. This function requires the `pandas-gbq package `__. - Authentication to the Google BigQuery service is via OAuth 2.0. - - - If "private_key" is not provided: - - By default "application default credentials" are used. - - If default application credentials are not found or are restrictive, - user account credentials are used. In this case, you will be asked to - grant permissions for product name 'pandas GBQ'. - - - If "private_key" is provided: - - Service account credentials will be used to authenticate. - - Parameters - ---------- - query : str - SQL-Like Query to return data values - project_id : str - Google BigQuery Account project ID. - index_col : str (optional) - Name of result column to use for index in results DataFrame - col_order : list(str) (optional) - List of BigQuery column names in the desired order for results - DataFrame - reauth : boolean (default False) - Force Google BigQuery to reauthenticate the user. This is useful - if multiple accounts are used. - verbose : boolean (default True) - Verbose output - private_key : str (optional) - Service account private key in JSON format. Can be file path - or string contents. This is useful for remote server - authentication (eg. Jupyter/IPython notebook on remote host) - - dialect : {'legacy', 'standard'}, default 'legacy' - 'legacy' : Use BigQuery's legacy SQL dialect. - 'standard' : Use BigQuery's standard SQL, which is - compliant with the SQL 2011 standard. For more information - see `BigQuery SQL Reference - `__ - - `**kwargs` : Arbitrary keyword arguments - configuration (dict): query config parameters for job processing. - For example: - - configuration = {'query': {'useQueryCache': False}} - - For more information see `BigQuery SQL Reference - `__ + See :meth:`pandas_gbq.read_gbq`. Returns ------- df: DataFrame DataFrame representing results of query - """ pandas_gbq = _try_import() - return pandas_gbq.read_gbq( - query, project_id=project_id, - index_col=index_col, col_order=col_order, - reauth=reauth, verbose=verbose, - private_key=private_key, - dialect=dialect, - **kwargs) + return pandas_gbq.read_gbq(*args, **kwargs) -def to_gbq(dataframe, destination_table, project_id, chunksize=10000, - verbose=True, reauth=False, if_exists='fail', private_key=None): +def to_gbq(*args, **kwargs): + """ + Write a DataFrame to a Google BigQuery table. + + This function requires the `pandas-gbq package + `__. + + See :meth:`pandas_gbq.to_gbq`. + """ pandas_gbq = _try_import() - pandas_gbq.to_gbq(dataframe, destination_table, project_id, - chunksize=chunksize, - verbose=verbose, reauth=reauth, - if_exists=if_exists, private_key=private_key) + pandas_gbq.to_gbq(*args, **kwargs) From b1b14790d86e9f2c4babdf84256646d4fbbf03fc Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 2 Apr 2018 11:14:10 -0700 Subject: [PATCH 02/13] DOC: add detailed docs for read_gbq, to_gbq back. --- pandas/core/frame.py | 73 +++++++++++++++++++++- pandas/io/gbq.py | 143 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 206 insertions(+), 10 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6294c3bf0efe4..3c8e00af5a5d8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1116,17 +1116,84 @@ def to_dict(self, orient='dict', into=dict): else: raise ValueError("orient '%s' not understood" % orient) - def to_gbq(self, *args, **kwargs): + def to_gbq( + self, destination_table, project_id, chunksize=10000, + verbose=True, reauth=False, if_exists='fail', private_key=None, + **kwargs): """ Write a DataFrame to a Google BigQuery table. This function requires the `pandas-gbq package `__. - See: :meth:`pandas_gbq.to_gbq` + Authentication to the Google BigQuery service is via OAuth 2.0. + + - If "private_key" is not provided: + + By default "application default credentials" are used. + + If default application credentials are not found or are restrictive, + user account credentials are used. In this case, you will be asked to + grant permissions for product name 'pandas GBQ'. + + - If "private_key" is provided: + + Service account credentials will be used to authenticate. + + Parameters + ---------- + destination_table : string + Name of table to be written, in the form 'dataset.tablename'. + project_id : str + Google BigQuery Account project ID. + chunksize : int (default 10000) + Number of rows to be inserted in each chunk from the dataframe. + Set to ``None`` to load the whole dataframe at once. + verbose : boolean (default True) + Show percentage complete. + reauth : boolean (default False) + Force Google BigQuery to reauthenticate the user. This is useful + if multiple accounts are used. + if_exists : {'fail', 'replace', 'append'}, default 'fail' + Behavior when the destination table exists. + 'fail': If table exists, do nothing. + 'replace': If table exists, drop it, recreate it, and insert data. + 'append': If table exists, insert data. Create if does not exist. + private_key : str (optional) + Service account private key in JSON format. Can be file path + or string contents. This is useful for remote server + authentication (eg. Jupyter/IPython notebook on remote host). + kwargs : dict + Arbitrary keyword arguments. + + auth_local_webserver (boolean): default False + Use the [local webserver flow] instead of the [console flow] when + getting user credentials. + + .. [local webserver flow] + http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server + .. [console flow] + http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console + .. versionadded:: pandas-gbq 0.2.0 + table_schema (list of dicts): + List of BigQuery table fields to which according DataFrame columns + conform to, e.g. `[{'name': 'col1', 'type': 'STRING'},...]`. If + schema is not provided, it will be generated according to dtypes + of DataFrame columns. See BigQuery API documentation on available + names of a field. + .. versionadded:: pandas-gbq 0.3.1 + + See Also + -------- + pandas_gbq.to_gbq + pandas.io.to_gbq """ from pandas.io import gbq - return gbq.to_gbq(self, *args, **kwargs) + return gbq.to_gbq( + self, destination_table, project_id, chunksize=chunksize, + verbose=verbose, reauth=reauth, if_exists=if_exists, + private_key=private_key, **kwargs) + @classmethod def from_records(cls, data, index=None, exclude=None, columns=None, diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 4753b218aec94..f306aa0806ad8 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -21,32 +21,161 @@ def _try_import(): return pandas_gbq -def read_gbq(*args, **kwargs): +def read_gbq( + query, project_id=None, index_col=None, col_order=None, reauth=False, + verbose=True, private_key=None, dialect='legacy', **kwargs): """ Load data from Google BigQuery. This function requires the `pandas-gbq package `__. - See :meth:`pandas_gbq.read_gbq`. + Authentication to the Google BigQuery service is via OAuth 2.0. + + - If "private_key" is not provided: + + By default "application default credentials" are used. + + If default application credentials are not found or are restrictive, + user account credentials are used. In this case, you will be asked to + grant permissions for product name 'pandas GBQ'. + + - If "private_key" is provided: + + Service account credentials will be used to authenticate. + + Parameters + ---------- + query : str + SQL-Like Query to return data values. + project_id : str + Google BigQuery Account project ID. + index_col : str (optional) + Name of result column to use for index in results DataFrame. + col_order : list(str) (optional) + List of BigQuery column names in the desired order for results + DataFrame. + reauth : boolean (default False) + Force Google BigQuery to reauthenticate the user. This is useful + if multiple accounts are used. + verbose : boolean (default True) + Verbose output. + private_key : str (optional) + Service account private key in JSON format. Can be file path + or string contents. This is useful for remote server + authentication (eg. Jupyter/IPython notebook on remote host). + dialect : {'legacy', 'standard'}, default 'legacy' + SQL syntax dialect to use. + 'legacy' : Use BigQuery's legacy SQL dialect. + 'standard' : Use BigQuery's standard SQL, which is + compliant with the SQL 2011 standard. For more information + see `BigQuery SQL Reference + `__. + kwargs : dict + Arbitrary keyword arguments. + configuration (dict): query config parameters for job processing. + For example: + + configuration = {'query': {'useQueryCache': False}} + + For more information see `BigQuery SQL Reference + `__ Returns ------- df: DataFrame - DataFrame representing results of query + DataFrame representing results of query. + + See Also + -------- + pandas_gbq.read_gbq """ pandas_gbq = _try_import() - return pandas_gbq.read_gbq(*args, **kwargs) + return pandas_gbq.read_gbq( + query, project_id=project_id, + index_col=index_col, col_order=col_order, + reauth=reauth, verbose=verbose, + private_key=private_key, + dialect=dialect, + **kwargs) -def to_gbq(*args, **kwargs): +def to_gbq( + dataframe, destination_table, project_id, chunksize=10000, + verbose=True, reauth=False, if_exists='fail', private_key=None, + **kwargs): """ Write a DataFrame to a Google BigQuery table. This function requires the `pandas-gbq package `__. - See :meth:`pandas_gbq.to_gbq`. + Authentication to the Google BigQuery service is via OAuth 2.0. + + - If "private_key" is not provided: + + By default "application default credentials" are used. + + If default application credentials are not found or are restrictive, + user account credentials are used. In this case, you will be asked to + grant permissions for product name 'pandas GBQ'. + + - If "private_key" is provided: + + Service account credentials will be used to authenticate. + + Parameters + ---------- + dataframe : DataFrame + DataFrame to be written. + destination_table : string + Name of table to be written, in the form 'dataset.tablename'. + project_id : str + Google BigQuery Account project ID. + chunksize : int (default 10000) + Number of rows to be inserted in each chunk from the dataframe. + Set to ``None`` to load the whole dataframe at once. + verbose : boolean (default True) + Show percentage complete. + reauth : boolean (default False) + Force Google BigQuery to reauthenticate the user. This is useful + if multiple accounts are used. + if_exists : {'fail', 'replace', 'append'}, default 'fail' + Behavior when the destination table exists. + 'fail': If table exists, do nothing. + 'replace': If table exists, drop it, recreate it, and insert data. + 'append': If table exists, insert data. Create if does not exist. + private_key : str (optional) + Service account private key in JSON format. Can be file path + or string contents. This is useful for remote server + authentication (eg. Jupyter/IPython notebook on remote host). + kwargs : dict + Arbitrary keyword arguments. + + auth_local_webserver (boolean): default False + Use the [local webserver flow] instead of the [console flow] when + getting user credentials. + + .. [local webserver flow] + http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server + .. [console flow] + http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console + .. versionadded:: pandas-gbq 0.2.0 + table_schema (list of dicts): + List of BigQuery table fields to which according DataFrame columns + conform to, e.g. `[{'name': 'col1', 'type': 'STRING'},...]`. If + schema is not provided, it will be generated according to dtypes + of DataFrame columns. See BigQuery API documentation on available + names of a field. + .. versionadded:: pandas-gbq 0.3.1 + + See Also + -------- + pandas_gbq.to_gbq + pandas.DataFrame.to_gbq """ pandas_gbq = _try_import() - pandas_gbq.to_gbq(*args, **kwargs) + pandas_gbq.to_gbq( + dataframe, destination_table, project_id, chunksize=chunksize, + verbose=verbose, reauth=reauth, if_exists=if_exists, + private_key=private_key, **kwargs) From 042922784aa5fbf73927cdbdb57b4eae715eaede Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 2 Apr 2018 11:15:50 -0700 Subject: [PATCH 03/13] CLN: line lengths. --- pandas/core/frame.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3c8e00af5a5d8..0dc5cef1cc70f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1167,8 +1167,8 @@ def to_gbq( Arbitrary keyword arguments. auth_local_webserver (boolean): default False - Use the [local webserver flow] instead of the [console flow] when - getting user credentials. + Use the [local webserver flow] instead of the [console flow] + when getting user credentials. .. [local webserver flow] http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server @@ -1176,11 +1176,11 @@ def to_gbq( http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console .. versionadded:: pandas-gbq 0.2.0 table_schema (list of dicts): - List of BigQuery table fields to which according DataFrame columns - conform to, e.g. `[{'name': 'col1', 'type': 'STRING'},...]`. If - schema is not provided, it will be generated according to dtypes - of DataFrame columns. See BigQuery API documentation on available - names of a field. + List of BigQuery table fields to which according DataFrame + columns conform to, e.g. `[{'name': 'col1', 'type': + 'STRING'},...]`. If schema is not provided, it will be + generated according to dtypes of DataFrame columns. See + BigQuery API documentation on available names of a field. .. versionadded:: pandas-gbq 0.3.1 See Also From bee847bbd7a9b2c74ddf643c267d1e444d71479a Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 2 Apr 2018 11:18:03 -0700 Subject: [PATCH 04/13] DOC: remove dead link to pandas.io.to_gbq --- pandas/core/frame.py | 1 - pandas/io/gbq.py | 70 -------------------------------------------- 2 files changed, 71 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0dc5cef1cc70f..08aa160a7ee10 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1186,7 +1186,6 @@ def to_gbq( See Also -------- pandas_gbq.to_gbq - pandas.io.to_gbq """ from pandas.io import gbq return gbq.to_gbq( diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index f306aa0806ad8..8d97b125165ff 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -104,76 +104,6 @@ def to_gbq( dataframe, destination_table, project_id, chunksize=10000, verbose=True, reauth=False, if_exists='fail', private_key=None, **kwargs): - """ - Write a DataFrame to a Google BigQuery table. - - This function requires the `pandas-gbq package - `__. - - Authentication to the Google BigQuery service is via OAuth 2.0. - - - If "private_key" is not provided: - - By default "application default credentials" are used. - - If default application credentials are not found or are restrictive, - user account credentials are used. In this case, you will be asked to - grant permissions for product name 'pandas GBQ'. - - - If "private_key" is provided: - - Service account credentials will be used to authenticate. - - Parameters - ---------- - dataframe : DataFrame - DataFrame to be written. - destination_table : string - Name of table to be written, in the form 'dataset.tablename'. - project_id : str - Google BigQuery Account project ID. - chunksize : int (default 10000) - Number of rows to be inserted in each chunk from the dataframe. - Set to ``None`` to load the whole dataframe at once. - verbose : boolean (default True) - Show percentage complete. - reauth : boolean (default False) - Force Google BigQuery to reauthenticate the user. This is useful - if multiple accounts are used. - if_exists : {'fail', 'replace', 'append'}, default 'fail' - Behavior when the destination table exists. - 'fail': If table exists, do nothing. - 'replace': If table exists, drop it, recreate it, and insert data. - 'append': If table exists, insert data. Create if does not exist. - private_key : str (optional) - Service account private key in JSON format. Can be file path - or string contents. This is useful for remote server - authentication (eg. Jupyter/IPython notebook on remote host). - kwargs : dict - Arbitrary keyword arguments. - - auth_local_webserver (boolean): default False - Use the [local webserver flow] instead of the [console flow] when - getting user credentials. - - .. [local webserver flow] - http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server - .. [console flow] - http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console - .. versionadded:: pandas-gbq 0.2.0 - table_schema (list of dicts): - List of BigQuery table fields to which according DataFrame columns - conform to, e.g. `[{'name': 'col1', 'type': 'STRING'},...]`. If - schema is not provided, it will be generated according to dtypes - of DataFrame columns. See BigQuery API documentation on available - names of a field. - .. versionadded:: pandas-gbq 0.3.1 - - See Also - -------- - pandas_gbq.to_gbq - pandas.DataFrame.to_gbq - """ pandas_gbq = _try_import() pandas_gbq.to_gbq( dataframe, destination_table, project_id, chunksize=chunksize, From 2e5b14829b34d9fc785f036b3be67daa63e2996f Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 2 Apr 2018 11:22:32 -0700 Subject: [PATCH 05/13] DOC: update versionadded string for pandas-gbq --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 08aa160a7ee10..4f2e3f781d4a0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1174,14 +1174,14 @@ def to_gbq( http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server .. [console flow] http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console - .. versionadded:: pandas-gbq 0.2.0 + .. versionadded:: pandas-gbq-0.2.0 table_schema (list of dicts): List of BigQuery table fields to which according DataFrame columns conform to, e.g. `[{'name': 'col1', 'type': 'STRING'},...]`. If schema is not provided, it will be generated according to dtypes of DataFrame columns. See BigQuery API documentation on available names of a field. - .. versionadded:: pandas-gbq 0.3.1 + .. versionadded:: pandas-gbq-0.3.1 See Also -------- From cb178d9e711f27fcbfc650c1f221df07e085a726 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 3 Apr 2018 13:53:34 -0700 Subject: [PATCH 06/13] Add pandas-gbq args to function signature directly. --- pandas/core/frame.py | 46 +++++++++++++++++++++++--------------------- pandas/io/gbq.py | 9 +++++---- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4f2e3f781d4a0..d29f514420bb0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1119,7 +1119,7 @@ def to_dict(self, orient='dict', into=dict): def to_gbq( self, destination_table, project_id, chunksize=10000, verbose=True, reauth=False, if_exists='fail', private_key=None, - **kwargs): + auth_local_webserver=False, table_schema=None): """ Write a DataFrame to a Google BigQuery table. @@ -1142,6 +1142,8 @@ def to_gbq( Parameters ---------- + dataframe : DataFrame + DataFrame to be written to Google BigQuery. destination_table : string Name of table to be written, in the form 'dataset.tablename'. project_id : str @@ -1163,35 +1165,35 @@ def to_gbq( Service account private key in JSON format. Can be file path or string contents. This is useful for remote server authentication (eg. Jupyter/IPython notebook on remote host). - kwargs : dict - Arbitrary keyword arguments. - - auth_local_webserver (boolean): default False - Use the [local webserver flow] instead of the [console flow] - when getting user credentials. - - .. [local webserver flow] - http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server - .. [console flow] - http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console - .. versionadded:: pandas-gbq-0.2.0 - table_schema (list of dicts): - List of BigQuery table fields to which according DataFrame - columns conform to, e.g. `[{'name': 'col1', 'type': - 'STRING'},...]`. If schema is not provided, it will be - generated according to dtypes of DataFrame columns. See - BigQuery API documentation on available names of a field. - .. versionadded:: pandas-gbq-0.3.1 + auth_local_webserver : boolean (default False) + Use the [local webserver flow] instead of the [console flow] + when getting user credentials. + + .. [local webserver flow] + http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server + .. [console flow] + http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console + + *New in version 0.2.0 of pandas-gbq*. + table_schema : list of dicts (optional) + List of BigQuery table fields to which according DataFrame + columns conform to, e.g. `[{'name': 'col1', 'type': + 'STRING'},...]`. If schema is not provided, it will be + generated according to dtypes of DataFrame columns. See + BigQuery API documentation on available names of a field. + + *New in version 0.3.1 of pandas-gbq*. See Also -------- - pandas_gbq.to_gbq + pandas_gbq.to_gbq : This function in the pandas-gbq library. """ from pandas.io import gbq return gbq.to_gbq( self, destination_table, project_id, chunksize=chunksize, verbose=verbose, reauth=reauth, if_exists=if_exists, - private_key=private_key, **kwargs) + private_key=private_key, auth_local_webserver=auth_local_webserver, + table_schema=table_schema) @classmethod diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 8d97b125165ff..fd80f23d8a9c9 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -88,7 +88,7 @@ def read_gbq( See Also -------- - pandas_gbq.read_gbq + pandas_gbq.read_gbq : This function in the pandas-gbq library. """ pandas_gbq = _try_import() return pandas_gbq.read_gbq( @@ -103,9 +103,10 @@ def read_gbq( def to_gbq( dataframe, destination_table, project_id, chunksize=10000, verbose=True, reauth=False, if_exists='fail', private_key=None, - **kwargs): + auth_local_webserver=False, table_schema=None): pandas_gbq = _try_import() - pandas_gbq.to_gbq( + return pandas_gbq.to_gbq( dataframe, destination_table, project_id, chunksize=chunksize, verbose=verbose, reauth=reauth, if_exists=if_exists, - private_key=private_key, **kwargs) + private_key=private_key, auth_local_webserver=auth_local_webserver, + table_schema=table_schema) From f09d38e861adb4a6bb7708d1742611ede2b5127c Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 3 Apr 2018 14:08:13 -0700 Subject: [PATCH 07/13] DOC: cross-reference to_gbq and read_gbq --- pandas/core/frame.py | 1 + pandas/io/gbq.py | 1 + 2 files changed, 2 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d29f514420bb0..fcab178e2edf1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1187,6 +1187,7 @@ def to_gbq( See Also -------- pandas_gbq.to_gbq : This function in the pandas-gbq library. + pandas.read_gbq : Read a DataFrame from Google BigQuery. """ from pandas.io import gbq return gbq.to_gbq( diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index fd80f23d8a9c9..0d53559960beb 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -89,6 +89,7 @@ def read_gbq( See Also -------- pandas_gbq.read_gbq : This function in the pandas-gbq library. + pandas.DataFrame.to_gbq : Write a DataFrame to Google BigQuery. """ pandas_gbq = _try_import() return pandas_gbq.read_gbq( From b6fdf378805faa2044c13bbf2793422a9b4c0610 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 4 Apr 2018 12:49:24 -0700 Subject: [PATCH 08/13] Fix lint error. --- pandas/core/frame.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index fcab178e2edf1..f1a16ed3352a2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1196,7 +1196,6 @@ def to_gbq( private_key=private_key, auth_local_webserver=auth_local_webserver, table_schema=table_schema) - @classmethod def from_records(cls, data, index=None, exclude=None, columns=None, coerce_float=False, nrows=None): From 37b2a08af9ced867af4c969b588a9a8086a2e510 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 4 Apr 2018 14:55:04 -0700 Subject: [PATCH 09/13] DOC: render BigQuery auth flows as links Also, adjust line breaks per review comments. --- pandas/core/frame.py | 65 +++++++++++++++++++++++--------------------- pandas/io/gbq.py | 13 ++++----- 2 files changed, 40 insertions(+), 38 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f1a16ed3352a2..80ef93236fd06 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1116,10 +1116,9 @@ def to_dict(self, orient='dict', into=dict): else: raise ValueError("orient '%s' not understood" % orient) - def to_gbq( - self, destination_table, project_id, chunksize=10000, - verbose=True, reauth=False, if_exists='fail', private_key=None, - auth_local_webserver=False, table_schema=None): + def to_gbq(self, destination_table, project_id, chunksize=None, + verbose=True, reauth=False, if_exists='fail', private_key=None, + auth_local_webserver=False, table_schema=None): """ Write a DataFrame to a Google BigQuery table. @@ -1128,57 +1127,61 @@ def to_gbq( Authentication to the Google BigQuery service is via OAuth 2.0. - - If "private_key" is not provided: + - If ``private_key`` is provided, the library loads the JSON service + account credentials and uses those to authenticate. - By default "application default credentials" are used. + - If no ``private_key`` is provided, the library tries `application + default credentials`_. - If default application credentials are not found or are restrictive, - user account credentials are used. In this case, you will be asked to - grant permissions for product name 'pandas GBQ'. + .. _application default credentials: + https://cloud.google.com/docs/authentication/production#providing_credentials_to_your_application - - If "private_key" is provided: - - Service account credentials will be used to authenticate. + - If application default credentials are not found or cannot be used + with BigQuery, the library authenticates with user account + credentials. In this case, you will be asked to grant permissions + for product name 'pandas GBQ'. Parameters ---------- - dataframe : DataFrame - DataFrame to be written to Google BigQuery. - destination_table : string + destination_table : str Name of table to be written, in the form 'dataset.tablename'. project_id : str Google BigQuery Account project ID. - chunksize : int (default 10000) + chunksize : int, optional Number of rows to be inserted in each chunk from the dataframe. Set to ``None`` to load the whole dataframe at once. - verbose : boolean (default True) + verbose : bool, default True Show percentage complete. - reauth : boolean (default False) + reauth : bool, default False Force Google BigQuery to reauthenticate the user. This is useful if multiple accounts are used. - if_exists : {'fail', 'replace', 'append'}, default 'fail' - Behavior when the destination table exists. - 'fail': If table exists, do nothing. - 'replace': If table exists, drop it, recreate it, and insert data. - 'append': If table exists, insert data. Create if does not exist. - private_key : str (optional) + if_exists : str, default 'fail' + Behavior when the destination table exists. Value can be one of: + + ``'fail'`` + If table exists, do nothing. + ``'replace'`` + If table exists, drop it, recreate it, and insert data. + ``'append'`` + If table exists, insert data. Create if does not exist. + private_key : str, optional Service account private key in JSON format. Can be file path or string contents. This is useful for remote server authentication (eg. Jupyter/IPython notebook on remote host). - auth_local_webserver : boolean (default False) - Use the [local webserver flow] instead of the [console flow] + auth_local_webserver : bool, default False + Use the `local webserver flow`_ instead of the `console flow`_ when getting user credentials. - .. [local webserver flow] + .. _local webserver flow: http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server - .. [console flow] + .. _console flow: http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console *New in version 0.2.0 of pandas-gbq*. - table_schema : list of dicts (optional) + table_schema : list of dicts, optional List of BigQuery table fields to which according DataFrame - columns conform to, e.g. `[{'name': 'col1', 'type': - 'STRING'},...]`. If schema is not provided, it will be + columns conform to, e.g. ``[{'name': 'col1', 'type': + 'STRING'},...]``. If schema is not provided, it will be generated according to dtypes of DataFrame columns. See BigQuery API documentation on available names of a field. diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 0d53559960beb..ab37e8c2fa27b 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -21,9 +21,9 @@ def _try_import(): return pandas_gbq -def read_gbq( - query, project_id=None, index_col=None, col_order=None, reauth=False, - verbose=True, private_key=None, dialect='legacy', **kwargs): +def read_gbq(query, project_id=None, index_col=None, col_order=None, + reauth=False, verbose=True, private_key=None, dialect='legacy', + **kwargs): """ Load data from Google BigQuery. @@ -101,10 +101,9 @@ def read_gbq( **kwargs) -def to_gbq( - dataframe, destination_table, project_id, chunksize=10000, - verbose=True, reauth=False, if_exists='fail', private_key=None, - auth_local_webserver=False, table_schema=None): +def to_gbq(dataframe, destination_table, project_id, chunksize=None, + verbose=True, reauth=False, if_exists='fail', private_key=None, + auth_local_webserver=False, table_schema=None): pandas_gbq = _try_import() return pandas_gbq.to_gbq( dataframe, destination_table, project_id, chunksize=chunksize, From f6c38f0eecc68fa0671641279e248dd3a9fd4ab7 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 5 Apr 2018 11:53:18 -0700 Subject: [PATCH 10/13] DOC: pandas-gbq to whatsnew --- doc/source/whatsnew/v0.23.0.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index ce63cb2473bc4..33abfe5b6e948 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -924,6 +924,9 @@ read the `NumFOCUS blogpost`_ recapping the sprint. to functions, methods and classes. (:issue:`18941`, :issue:`18948`, :issue:`18973`, :issue:`19017`) - Added a reference to :func:`DataFrame.assign` in the concatenate section of the merging documentation (:issue:`18665`) +- Updated ``to_gbq`` and ``read_gbq`` documentation to reflect changes from + the Pandas-GBQ library. Adds intersphinx mapping to Pandas-GBQ library. + (:issue:`20564`) .. _whatsnew_0230.bug_fixes: From 0889d6d960df8b1e55b3ab2ff6267838fd9e7606 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 5 Apr 2018 14:55:34 -0700 Subject: [PATCH 11/13] DOC: sync pandas-gbq changes from 0.4.0 (deprecate verbosity) --- pandas/core/frame.py | 8 +++++--- pandas/io/gbq.py | 37 ++++++++++++++++++++++--------------- 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 80ef93236fd06..af6b64057e358 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1117,7 +1117,7 @@ def to_dict(self, orient='dict', into=dict): raise ValueError("orient '%s' not understood" % orient) def to_gbq(self, destination_table, project_id, chunksize=None, - verbose=True, reauth=False, if_exists='fail', private_key=None, + verbose=None, reauth=False, if_exists='fail', private_key=None, auth_local_webserver=False, table_schema=None): """ Write a DataFrame to a Google BigQuery table. @@ -1150,8 +1150,6 @@ def to_gbq(self, destination_table, project_id, chunksize=None, chunksize : int, optional Number of rows to be inserted in each chunk from the dataframe. Set to ``None`` to load the whole dataframe at once. - verbose : bool, default True - Show percentage complete. reauth : bool, default False Force Google BigQuery to reauthenticate the user. This is useful if multiple accounts are used. @@ -1186,6 +1184,10 @@ def to_gbq(self, destination_table, project_id, chunksize=None, BigQuery API documentation on available names of a field. *New in version 0.3.1 of pandas-gbq*. + verbose : boolean, deprecated + *Deprecated in Pandas-GBQ 0.4.0.* Use the `logging module + to adjust verbosity instead + `__. See Also -------- diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index ab37e8c2fa27b..236d70609e76c 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -22,7 +22,7 @@ def _try_import(): def read_gbq(query, project_id=None, index_col=None, col_order=None, - reauth=False, verbose=True, private_key=None, dialect='legacy', + reauth=False, verbose=None, private_key=None, dialect='legacy', **kwargs): """ Load data from Google BigQuery. @@ -50,27 +50,34 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, SQL-Like Query to return data values. project_id : str Google BigQuery Account project ID. - index_col : str (optional) + index_col : str, optional Name of result column to use for index in results DataFrame. - col_order : list(str) (optional) + col_order : list(str), optional List of BigQuery column names in the desired order for results DataFrame. - reauth : boolean (default False) + reauth : boolean, default False Force Google BigQuery to reauthenticate the user. This is useful if multiple accounts are used. - verbose : boolean (default True) - Verbose output. - private_key : str (optional) + private_key : str, optional Service account private key in JSON format. Can be file path or string contents. This is useful for remote server authentication (eg. Jupyter/IPython notebook on remote host). - dialect : {'legacy', 'standard'}, default 'legacy' - SQL syntax dialect to use. - 'legacy' : Use BigQuery's legacy SQL dialect. - 'standard' : Use BigQuery's standard SQL, which is - compliant with the SQL 2011 standard. For more information - see `BigQuery SQL Reference - `__. + dialect : str, default 'legacy' + SQL syntax dialect to use. Value can be one of: + + ``'legacy'`` + Use BigQuery's legacy SQL dialect. For more information see + `BigQuery Legacy SQL Reference + `__. + ``'standard'`` + Use BigQuery's standard SQL, which is + compliant with the SQL 2011 standard. For more information + see `BigQuery Standard SQL Reference + `__. + verbose : boolean, deprecated + *Deprecated in Pandas-GBQ 0.4.0.* Use the `logging module + to adjust verbosity instead + `__. kwargs : dict Arbitrary keyword arguments. configuration (dict): query config parameters for job processing. @@ -102,7 +109,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, def to_gbq(dataframe, destination_table, project_id, chunksize=None, - verbose=True, reauth=False, if_exists='fail', private_key=None, + verbose=None, reauth=False, if_exists='fail', private_key=None, auth_local_webserver=False, table_schema=None): pandas_gbq = _try_import() return pandas_gbq.to_gbq( From aa99d472d6d5d1e951cc1ef8073e43d10b2a5d79 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 5 Apr 2018 16:20:54 -0700 Subject: [PATCH 12/13] DOC: clarify Pandas-GBQ version in whatsnew. --- doc/source/whatsnew/v0.23.0.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 33abfe5b6e948..ff2ba77922261 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -925,8 +925,8 @@ read the `NumFOCUS blogpost`_ recapping the sprint. (:issue:`18941`, :issue:`18948`, :issue:`18973`, :issue:`19017`) - Added a reference to :func:`DataFrame.assign` in the concatenate section of the merging documentation (:issue:`18665`) - Updated ``to_gbq`` and ``read_gbq`` documentation to reflect changes from - the Pandas-GBQ library. Adds intersphinx mapping to Pandas-GBQ library. - (:issue:`20564`) + the Pandas-GBQ library version 0.4.0. Adds intersphinx mapping to Pandas-GBQ + library. (:issue:`20564`) .. _whatsnew_0230.bug_fixes: From 1ab09342b807dc46191745be5c2647c9f7cdbacf Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 9 Apr 2018 10:39:21 +0200 Subject: [PATCH 13/13] move whatsnew --- doc/source/whatsnew/v0.23.0.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index ff2ba77922261..4f05a6f108add 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -404,7 +404,10 @@ Other Enhancements - :func:`read_html` now accepts a ``displayed_only`` keyword argument to controls whether or not hidden elements are parsed (``True`` by default) (:issue:`20027`) - zip compression is supported via ``compression=zip`` in :func:`DataFrame.to_pickle`, :func:`Series.to_pickle`, :func:`DataFrame.to_csv`, :func:`Series.to_csv`, :func:`DataFrame.to_json`, :func:`Series.to_json`. (:issue:`17778`) - :class:`DataFrame` and :class:`Series` now support matrix multiplication (```@```) operator (:issue:`10259`) for Python>=3.5 - +- Updated ``to_gbq`` and ``read_gbq`` signature and documentation to reflect changes from + the Pandas-GBQ library version 0.4.0. Adds intersphinx mapping to Pandas-GBQ + library. (:issue:`20564`) + .. _whatsnew_0230.api_breaking: Backwards incompatible API changes @@ -924,9 +927,6 @@ read the `NumFOCUS blogpost`_ recapping the sprint. to functions, methods and classes. (:issue:`18941`, :issue:`18948`, :issue:`18973`, :issue:`19017`) - Added a reference to :func:`DataFrame.assign` in the concatenate section of the merging documentation (:issue:`18665`) -- Updated ``to_gbq`` and ``read_gbq`` documentation to reflect changes from - the Pandas-GBQ library version 0.4.0. Adds intersphinx mapping to Pandas-GBQ - library. (:issue:`20564`) .. _whatsnew_0230.bug_fixes: