Skip to content

Commit f37772a

Browse files
shahar1lucasfcnuneseladkaljbampton
authored
GoogleDriveHook: Add folder_id param to upload_file (#29477)
* add folder_id param to upload_file --------- Co-authored-by: Lucas Fernando Nunes <[email protected]> Co-authored-by: eladkal <[email protected]> Co-authored-by: John Bampton <[email protected]>
1 parent 0222f7d commit f37772a

File tree

4 files changed

+48
-14
lines changed

4 files changed

+48
-14
lines changed

airflow/providers/google/suite/hooks/drive.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,9 @@ def get_conn(self) -> Any:
7272
self._conn = build("drive", self.api_version, http=http_authorized, cache_discovery=False)
7373
return self._conn
7474

75-
def _ensure_folders_exists(self, path: str) -> str:
75+
def _ensure_folders_exists(self, path: str, folder_id: str) -> str:
7676
service = self.get_conn()
77-
current_parent = "root"
77+
current_parent = folder_id
7878
folders = path.split("/")
7979
depth = 0
8080
# First tries to enter directories
@@ -88,7 +88,13 @@ def _ensure_folders_exists(self, path: str) -> str:
8888
]
8989
result = (
9090
service.files()
91-
.list(q=" and ".join(conditions), spaces="drive", fields="files(id, name)")
91+
.list(
92+
q=" and ".join(conditions),
93+
spaces="drive",
94+
fields="files(id, name)",
95+
includeItemsFromAllDrives=True,
96+
supportsAllDrives=True,
97+
)
9298
.execute(num_retries=self.num_retries)
9399
)
94100
files = result.get("files", [])
@@ -110,7 +116,11 @@ def _ensure_folders_exists(self, path: str) -> str:
110116
}
111117
file = (
112118
service.files()
113-
.create(body=file_metadata, fields="id")
119+
.create(
120+
body=file_metadata,
121+
fields="id",
122+
supportsAllDrives=True,
123+
)
114124
.execute(num_retries=self.num_retries)
115125
)
116126
self.log.info("Created %s directory", current_folder)
@@ -202,6 +212,7 @@ def upload_file(
202212
remote_location: str,
203213
chunk_size: int = 100 * 1024 * 1024,
204214
resumable: bool = False,
215+
folder_id: str = "root",
205216
) -> str:
206217
"""
207218
Uploads a file that is available locally to a Google Drive service.
@@ -215,14 +226,15 @@ def upload_file(
215226
or to -1.
216227
:param resumable: True if this is a resumable upload. False means upload
217228
in a single request.
229+
:param folder_id: The base/root folder id for remote_location (part of the drive URL of a folder).
218230
:return: File ID
219231
"""
220232
service = self.get_conn()
221233
directory_path, _, file_name = remote_location.rpartition("/")
222234
if directory_path:
223-
parent = self._ensure_folders_exists(directory_path)
235+
parent = self._ensure_folders_exists(path=directory_path, folder_id=folder_id)
224236
else:
225-
parent = "root"
237+
parent = folder_id
226238

227239
file_metadata = {"name": file_name, "parents": [parent]}
228240
media = MediaFileUpload(local_location, chunksize=chunk_size, resumable=resumable)

airflow/providers/google/suite/transfers/local_to_drive.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ class LocalFilesystemToGoogleDriveOperator(BaseOperator):
6363
If set as a sequence, the identities from the list must grant
6464
Service Account Token Creator IAM role to the directly preceding identity, with first
6565
account from the list granting this role to the originating account
66+
:param folder_id: The base/root folder id for each local path in the Drive folder
6667
:return: Remote file ids after upload
6768
"""
6869

@@ -82,6 +83,7 @@ def __init__(
8283
resumable: bool = False,
8384
delegate_to: str | None = None,
8485
impersonation_chain: str | Sequence[str] | None = None,
86+
folder_id: str = "root",
8587
**kwargs,
8688
) -> None:
8789
super().__init__(**kwargs)
@@ -94,6 +96,7 @@ def __init__(
9496
self.resumable = resumable
9597
self.delegate_to = delegate_to
9698
self.impersonation_chain = impersonation_chain
99+
self.folder_id = folder_id
97100

98101
def execute(self, context: Context) -> list[str]:
99102
hook = GoogleDriveHook(
@@ -113,6 +116,7 @@ def execute(self, context: Context) -> list[str]:
113116
remote_location=str(Path(self.drive_folder) / Path(local_path).name),
114117
chunk_size=self.chunk_size,
115118
resumable=self.resumable,
119+
folder_id=self.folder_id,
116120
)
117121

118122
remote_file_ids.append(remote_file_id)

tests/providers/google/suite/hooks/test_drive.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -53,19 +53,21 @@ def test_ensure_folders_exists_when_no_folder_exists(self, mock_get_conn):
5353
{"id": "ID_4"},
5454
]
5555

56-
result_value = self.gdrive_hook._ensure_folders_exists("AAA/BBB/CCC/DDD")
56+
result_value = self.gdrive_hook._ensure_folders_exists(path="AAA/BBB/CCC/DDD", folder_id="root")
5757

5858
mock_get_conn.assert_has_calls(
5959
[
6060
mock.call()
6161
.files()
6262
.list(
63+
fields="files(id, name)",
64+
includeItemsFromAllDrives=True,
6365
q=(
6466
"trashed=false and mimeType='application/vnd.google-apps.folder' "
6567
"and name='AAA' and 'root' in parents"
6668
),
6769
spaces="drive",
68-
fields="files(id, name)",
70+
supportsAllDrives=True,
6971
),
7072
mock.call()
7173
.files()
@@ -76,6 +78,7 @@ def test_ensure_folders_exists_when_no_folder_exists(self, mock_get_conn):
7678
"parents": ["root"],
7779
},
7880
fields="id",
81+
supportsAllDrives=True,
7982
),
8083
mock.call()
8184
.files()
@@ -86,6 +89,7 @@ def test_ensure_folders_exists_when_no_folder_exists(self, mock_get_conn):
8689
"parents": ["ID_1"],
8790
},
8891
fields="id",
92+
supportsAllDrives=True,
8993
),
9094
mock.call()
9195
.files()
@@ -96,6 +100,7 @@ def test_ensure_folders_exists_when_no_folder_exists(self, mock_get_conn):
96100
"parents": ["ID_2"],
97101
},
98102
fields="id",
103+
supportsAllDrives=True,
99104
),
100105
mock.call()
101106
.files()
@@ -106,6 +111,7 @@ def test_ensure_folders_exists_when_no_folder_exists(self, mock_get_conn):
106111
"parents": ["ID_3"],
107112
},
108113
fields="id",
114+
supportsAllDrives=True,
109115
),
110116
],
111117
any_order=True,
@@ -125,20 +131,22 @@ def test_ensure_folders_exists_when_some_folders_exists(self, mock_get_conn):
125131
{"id": "ID_4"},
126132
]
127133

128-
result_value = self.gdrive_hook._ensure_folders_exists("AAA/BBB/CCC/DDD")
134+
result_value = self.gdrive_hook._ensure_folders_exists(path="AAA/BBB/CCC/DDD", folder_id="root")
129135

130136
mock_get_conn.assert_has_calls(
131137
[
132138
*[
133139
mock.call()
134140
.files()
135141
.list(
142+
fields="files(id, name)",
143+
includeItemsFromAllDrives=True,
136144
q=(
137145
"trashed=false and mimeType='application/vnd.google-apps.folder' "
138146
f"and name='{d}' and '{key}' in parents"
139147
),
140148
spaces="drive",
141-
fields="files(id, name)",
149+
supportsAllDrives=True,
142150
)
143151
for d, key in [("AAA", "root"), ("BBB", "ID_1"), ("CCC", "ID_2")]
144152
],
@@ -151,6 +159,7 @@ def test_ensure_folders_exists_when_some_folders_exists(self, mock_get_conn):
151159
"parents": ["ID_2"],
152160
},
153161
fields="id",
162+
supportsAllDrives=True,
154163
),
155164
mock.call()
156165
.files()
@@ -161,6 +170,7 @@ def test_ensure_folders_exists_when_some_folders_exists(self, mock_get_conn):
161170
"parents": ["ID_3"],
162171
},
163172
fields="id",
173+
supportsAllDrives=True,
164174
),
165175
],
166176
any_order=True,
@@ -177,20 +187,22 @@ def test_ensure_folders_exists_when_all_folders_exists(self, mock_get_conn):
177187
{"files": [{"id": "ID_4"}]},
178188
]
179189

180-
result_value = self.gdrive_hook._ensure_folders_exists("AAA/BBB/CCC/DDD")
190+
result_value = self.gdrive_hook._ensure_folders_exists(path="AAA/BBB/CCC/DDD", folder_id="root")
181191

182192
mock_get_conn.assert_has_calls(
183193
[
184194
*[
185195
mock.call()
186196
.files()
187197
.list(
198+
fields="files(id, name)",
199+
includeItemsFromAllDrives=True,
188200
q=(
189201
"trashed=false and mimeType='application/vnd.google-apps.folder' "
190202
f"and name='{d}' and '{key}' in parents"
191203
),
192204
spaces="drive",
193-
fields="files(id, name)",
205+
supportsAllDrives=True,
194206
)
195207
for d, key in [("AAA", "root"), ("BBB", "ID_1"), ("CCC", "ID_2"), ("DDD", "ID_3")]
196208
],
@@ -327,7 +339,7 @@ def test_upload_file_to_subdirectory(
327339

328340
return_value = self.gdrive_hook.upload_file("local_path", "AA/BB/CC/remote_path")
329341

330-
mock_ensure_folders_exists.assert_called_once_with("AA/BB/CC")
342+
mock_ensure_folders_exists.assert_called_once_with(path="AA/BB/CC", folder_id="root")
331343
mock_get_conn.assert_has_calls(
332344
[
333345
mock.call()

tests/providers/google/suite/transfers/test_local_to_drive.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,11 @@ def test_execute(self, mock_hook):
3333
context = {}
3434
mock_hook.return_value.upload_file.return_value = REMOTE_FILE_IDS
3535
op = LocalFilesystemToGoogleDriveOperator(
36-
task_id="test_task", local_paths=LOCAL_PATHS, drive_folder=DRIVE_FOLDER, gcp_conn_id=GCP_CONN_ID
36+
task_id="test_task",
37+
local_paths=LOCAL_PATHS,
38+
drive_folder=DRIVE_FOLDER,
39+
gcp_conn_id=GCP_CONN_ID,
40+
folder_id="some_folder_id",
3741
)
3842
op.execute(context)
3943

@@ -43,12 +47,14 @@ def test_execute(self, mock_hook):
4347
remote_location="test_folder/test1",
4448
chunk_size=100 * 1024 * 1024,
4549
resumable=False,
50+
folder_id="some_folder_id",
4651
),
4752
mock.call(
4853
local_location="test2",
4954
remote_location="test_folder/test2",
5055
chunk_size=100 * 1024 * 1024,
5156
resumable=False,
57+
folder_id="some_folder_id",
5258
),
5359
]
5460
mock_hook.return_value.upload_file.assert_has_calls(calls)

0 commit comments

Comments
 (0)