forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgbq.py
113 lines (93 loc) · 4.01 KB
/
gbq.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
""" Google BigQuery support """
def _try_import():
# since pandas is a dependency of pandas-gbq
# we need to import on first use
try:
import pandas_gbq
except ImportError:
# give a nice error message
raise ImportError("Load data from Google BigQuery\n"
"\n"
"the pandas-gbq package is not installed\n"
"see the docs: https://pandas-gbq.readthedocs.io\n"
"\n"
"you can install via pip or conda:\n"
"pip install pandas-gbq\n"
"conda install pandas-gbq -c conda-forge\n")
return pandas_gbq
def read_gbq(
query, project_id=None, index_col=None, col_order=None, reauth=False,
verbose=True, private_key=None, dialect='legacy', **kwargs):
"""
Load data from Google BigQuery.
This function requires the `pandas-gbq package
<https://pandas-gbq.readthedocs.io>`__.
Authentication to the Google BigQuery service is via OAuth 2.0.
- If "private_key" is not provided:
By default "application default credentials" are used.
If default application credentials are not found or are restrictive,
user account credentials are used. In this case, you will be asked to
grant permissions for product name 'pandas GBQ'.
- If "private_key" is provided:
Service account credentials will be used to authenticate.
Parameters
----------
query : str
SQL-Like Query to return data values.
project_id : str
Google BigQuery Account project ID.
index_col : str (optional)
Name of result column to use for index in results DataFrame.
col_order : list(str) (optional)
List of BigQuery column names in the desired order for results
DataFrame.
reauth : boolean (default False)
Force Google BigQuery to reauthenticate the user. This is useful
if multiple accounts are used.
verbose : boolean (default True)
Verbose output.
private_key : str (optional)
Service account private key in JSON format. Can be file path
or string contents. This is useful for remote server
authentication (eg. Jupyter/IPython notebook on remote host).
dialect : {'legacy', 'standard'}, default 'legacy'
SQL syntax dialect to use.
'legacy' : Use BigQuery's legacy SQL dialect.
'standard' : Use BigQuery's standard SQL, which is
compliant with the SQL 2011 standard. For more information
see `BigQuery SQL Reference
<https://cloud.google.com/bigquery/sql-reference/>`__.
kwargs : dict
Arbitrary keyword arguments.
configuration (dict): query config parameters for job processing.
For example:
configuration = {'query': {'useQueryCache': False}}
For more information see `BigQuery SQL Reference
<https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query>`__
Returns
-------
df: DataFrame
DataFrame representing results of query.
See Also
--------
pandas_gbq.read_gbq : This function in the pandas-gbq library.
pandas.DataFrame.to_gbq : Write a DataFrame to Google BigQuery.
"""
pandas_gbq = _try_import()
return pandas_gbq.read_gbq(
query, project_id=project_id,
index_col=index_col, col_order=col_order,
reauth=reauth, verbose=verbose,
private_key=private_key,
dialect=dialect,
**kwargs)
def to_gbq(
dataframe, destination_table, project_id, chunksize=10000,
verbose=True, reauth=False, if_exists='fail', private_key=None,
auth_local_webserver=False, table_schema=None):
pandas_gbq = _try_import()
return pandas_gbq.to_gbq(
dataframe, destination_table, project_id, chunksize=chunksize,
verbose=verbose, reauth=reauth, if_exists=if_exists,
private_key=private_key, auth_local_webserver=auth_local_webserver,
table_schema=table_schema)