Skip to content

Commit 42c3eaf

Browse files
shahar1Taragolis
andauthored
Fix BigQuery connection and add docs (#38430)
Co-authored-by: Andrey Anshin <[email protected]>
1 parent a92c47b commit 42c3eaf

File tree

4 files changed

+128
-17
lines changed

4 files changed

+128
-17
lines changed

airflow/providers/google/cloud/hooks/bigquery.py

+59-17
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import uuid
2929
from copy import deepcopy
3030
from datetime import datetime, timedelta
31+
from functools import cached_property
3132
from typing import TYPE_CHECKING, Any, Iterable, Mapping, NoReturn, Sequence, Union, cast
3233

3334
from aiohttp import ClientSession as ClientSession
@@ -103,14 +104,49 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
103104
conn_type = "gcpbigquery"
104105
hook_name = "Google Bigquery"
105106

107+
@classmethod
108+
def get_connection_form_widgets(cls) -> dict[str, Any]:
109+
"""Return connection widgets to add to connection form."""
110+
from flask_appbuilder.fieldwidgets import BS3TextFieldWidget
111+
from flask_babel import lazy_gettext
112+
from wtforms import validators
113+
from wtforms.fields.simple import BooleanField, StringField
114+
115+
from airflow.www.validators import ValidJson
116+
117+
connection_form_widgets = super().get_connection_form_widgets()
118+
connection_form_widgets["use_legacy_sql"] = BooleanField(lazy_gettext("Use Legacy SQL"), default=True)
119+
connection_form_widgets["location"] = StringField(
120+
lazy_gettext("Location"), widget=BS3TextFieldWidget()
121+
)
122+
connection_form_widgets["priority"] = StringField(
123+
lazy_gettext("Priority"),
124+
default="INTERACTIVE",
125+
widget=BS3TextFieldWidget(),
126+
validators=[validators.AnyOf(["INTERACTIVE", "BATCH"])],
127+
)
128+
connection_form_widgets["api_resource_configs"] = StringField(
129+
lazy_gettext("API Resource Configs"), widget=BS3TextFieldWidget(), validators=[ValidJson()]
130+
)
131+
connection_form_widgets["labels"] = StringField(
132+
lazy_gettext("Labels"), widget=BS3TextFieldWidget(), validators=[ValidJson()]
133+
)
134+
connection_form_widgets["labels"] = StringField(
135+
lazy_gettext("Labels"), widget=BS3TextFieldWidget(), validators=[ValidJson()]
136+
)
137+
return connection_form_widgets
138+
139+
@classmethod
140+
def get_ui_field_behaviour(cls) -> dict[str, Any]:
141+
"""Return custom field behaviour."""
142+
return super().get_ui_field_behaviour()
143+
106144
def __init__(
107145
self,
108-
gcp_conn_id: str = GoogleBaseHook.default_conn_name,
109146
use_legacy_sql: bool = True,
110147
location: str | None = None,
111148
priority: str = "INTERACTIVE",
112149
api_resource_configs: dict | None = None,
113-
impersonation_chain: str | Sequence[str] | None = None,
114150
impersonation_scopes: str | Sequence[str] | None = None,
115151
labels: dict | None = None,
116152
**kwargs,
@@ -120,18 +156,25 @@ def __init__(
120156
"The `delegate_to` parameter has been deprecated before and finally removed in this version"
121157
" of Google Provider. You MUST convert it to `impersonate_chain`"
122158
)
123-
super().__init__(
124-
gcp_conn_id=gcp_conn_id,
125-
impersonation_chain=impersonation_chain,
126-
)
127-
self.use_legacy_sql = use_legacy_sql
128-
self.location = location
129-
self.priority = priority
159+
super().__init__(**kwargs)
160+
self.use_legacy_sql: bool = self._get_field("use_legacy_sql", use_legacy_sql)
161+
self.location: str | None = self._get_field("location", location)
162+
self.priority: str = self._get_field("priority", priority)
130163
self.running_job_id: str | None = None
131-
self.api_resource_configs: dict = api_resource_configs or {}
132-
self.labels = labels
133-
self.credentials_path = "bigquery_hook_credentials.json"
134-
self.impersonation_scopes = impersonation_scopes
164+
self.api_resource_configs: dict = self._get_field("api_resource_configs", api_resource_configs or {})
165+
self.labels = self._get_field("labels", labels or {})
166+
self.impersonation_scopes: str | Sequence[str] | None = impersonation_scopes
167+
168+
@cached_property
169+
@deprecated(
170+
reason=(
171+
"`BigQueryHook.credentials_path` property is deprecated and will be removed in the future. "
172+
"This property used for obtaining credentials path but no longer in actual use. "
173+
),
174+
category=AirflowProviderDeprecationWarning,
175+
)
176+
def credentials_path(self) -> str:
177+
return "bigquery_hook_credentials.json"
135178

136179
def get_conn(self) -> BigQueryConnection:
137180
"""Get a BigQuery PEP 249 connection object."""
@@ -172,18 +215,17 @@ def get_uri(self) -> str:
172215
"""Override from ``DbApiHook`` for ``get_sqlalchemy_engine()``."""
173216
return f"bigquery://{self.project_id}"
174217

175-
def get_sqlalchemy_engine(self, engine_kwargs=None):
218+
def get_sqlalchemy_engine(self, engine_kwargs: dict | None = None):
176219
"""Create an SQLAlchemy engine object.
177220
178221
:param engine_kwargs: Kwargs used in :func:`~sqlalchemy.create_engine`.
179222
"""
180223
if engine_kwargs is None:
181224
engine_kwargs = {}
182-
extras = self.get_connection(self.gcp_conn_id).extra_dejson
183-
credentials_path = get_field(extras, "key_path")
225+
credentials_path = get_field(self.extras, "key_path")
184226
if credentials_path:
185227
return create_engine(self.get_uri(), credentials_path=credentials_path, **engine_kwargs)
186-
keyfile_dict = get_field(extras, "keyfile_dict")
228+
keyfile_dict = get_field(self.extras, "keyfile_dict")
187229
if keyfile_dict:
188230
keyfile_content = keyfile_dict if isinstance(keyfile_dict, dict) else json.loads(keyfile_dict)
189231
return create_engine(self.get_uri(), credentials_info=keyfile_content, **engine_kwargs)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
.. Licensed to the Apache Software Foundation (ASF) under one
2+
or more contributor license agreements. See the NOTICE file
3+
distributed with this work for additional information
4+
regarding copyright ownership. The ASF licenses this file
5+
to you under the Apache License, Version 2.0 (the
6+
"License"); you may not use this file except in compliance
7+
with the License. You may obtain a copy of the License at
8+
9+
.. http://www.apache.org/licenses/LICENSE-2.0
10+
11+
.. Unless required by applicable law or agreed to in writing,
12+
software distributed under the License is distributed on an
13+
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
KIND, either express or implied. See the License for the
15+
specific language governing permissions and limitations
16+
under the License.
17+
18+
19+
20+
.. _howto/connection:gcpbigquery:
21+
22+
Google Cloud BigQuery Connection
23+
================================
24+
25+
The Google Cloud BigQuery connection type enables integration with the Google Cloud BigQuery.
26+
As it is built on the top of Google Cloud Connection (i.e., BigQuery hook inherits from
27+
GCP base hook), the basic authentication methods and parameters are exactly the same as the Google Cloud Connection.
28+
Extra parameters that are specific to BigQuery will be covered in this document.
29+
30+
31+
Configuring the Connection
32+
--------------------------
33+
.. note::
34+
Please refer to :ref:`Google Cloud Connection docs<howto/connection:gcp:configuring_the_connection>`
35+
for information regarding the basic authentication parameters.
36+
37+
Impersonation Scopes
38+
39+
40+
Use Legacy SQL
41+
Whether or not the connection should utilize legacy SQL.
42+
43+
Location
44+
One of `BigQuery locations <https://cloud.google.com/bigquery/docs/locations>`_ where the dataset resides.
45+
If None, it utilizes the default location configured in the BigQuery service.
46+
47+
Priority
48+
Should be either "INTERACTIVE" or "BATCH",
49+
see `running queries docs <https://cloud.google.com/bigquery/docs/running-queries>`_.
50+
Interactive query jobs, which are jobs that BigQuery runs on demand.
51+
Batch query jobs, which are jobs that BigQuery waits to run until idle compute resources are available.
52+
53+
API Resource Configs
54+
A dictionary containing parameters for configuring the Google BigQuery Jobs API.
55+
These configurations are applied according to the specifications outlined in the
56+
`BigQuery Jobs API documentation <https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs>`_.
57+
For example, you can specify configurations such as {'query': {'useQueryCache': False}}.
58+
This parameter is useful when you need to provide additional parameters that are not directly supported by the
59+
BigQueryHook.
60+
61+
Labels
62+
A dictionary of labels to be applied on the BigQuery job.

docs/apache-airflow-providers-google/connections/gcp.rst

+2
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ For example:
8282
8383
export AIRFLOW_CONN_GOOGLE_CLOUD_DEFAULT='google-cloud-platform://'
8484
85+
.. _howto/connection:gcp:configuring_the_connection:
86+
8587
Configuring the Connection
8688
--------------------------
8789

tests/providers/google/cloud/hooks/test_bigquery.py

+5
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,11 @@ def test_delegate_to_runtime_error():
7777

7878
@pytest.mark.db_test
7979
class TestBigQueryHookMethods(_BigQueryBaseTestClass):
80+
def test_credentials_path_derprecation(self):
81+
with pytest.warns(AirflowProviderDeprecationWarning):
82+
credentials_path = self.hook.credentials_path
83+
assert credentials_path == "bigquery_hook_credentials.json"
84+
8085
@mock.patch("airflow.providers.google.cloud.hooks.bigquery.BigQueryConnection")
8186
@mock.patch("airflow.providers.google.cloud.hooks.bigquery.BigQueryHook._authorize")
8287
@mock.patch("airflow.providers.google.cloud.hooks.bigquery.build")

0 commit comments

Comments
 (0)