Skip to content

Commit 6ac1ed6

Browse files
axe312gerwardpeet
andauthored
fix: ensure remote file downloads are queued in all cases (#34414)
* fix: move queue from remote file node creation to remote file fetching * reduce number of concurrent requests per CPU core to 50 * rename worker function to mark it as worker * improve typings * refactor: remove GATSBY_EXPERIMENTAL_REMOTE_FILE_PLACEHOLDER flag * fix typing? Co-authored-by: Ward Peeters <[email protected]>
1 parent 201c181 commit 6ac1ed6

File tree

4 files changed

+72
-115
lines changed

4 files changed

+72
-115
lines changed

packages/gatsby-core-utils/package.json

+1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
"@babel/runtime": "^7.15.4",
3333
"ci-info": "2.0.0",
3434
"configstore": "^5.0.1",
35+
"fastq": "^1.13.0",
3536
"file-type": "^16.5.3",
3637
"fs-extra": "^10.0.0",
3738
"got": "^11.8.3",

packages/gatsby-core-utils/src/fetch-remote-file.ts

+58-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ import {
1010
} from "./filename-utils"
1111
import type { IncomingMessage } from "http"
1212
import type { GatsbyCache } from "gatsby"
13+
import Queue from "fastq"
14+
import type { queue, done } from "fastq"
1315

1416
export interface IFetchRemoteFileOptions {
1517
url: string
@@ -72,9 +74,64 @@ const ERROR_CODES_TO_RETRY = [
7274
`ERR_GOT_REQUEST_ERROR`,
7375
]
7476

77+
/********************
78+
* Queue Management *
79+
********************/
80+
81+
const GATSBY_CONCURRENT_DOWNLOAD = process.env.GATSBY_CONCURRENT_DOWNLOAD
82+
? parseInt(process.env.GATSBY_CONCURRENT_DOWNLOAD, 10) || 0
83+
: 50
84+
85+
const q: queue<IFetchRemoteFileOptions, string> = Queue(
86+
fetchWorker,
87+
GATSBY_CONCURRENT_DOWNLOAD
88+
)
89+
90+
/**
91+
* fetchWorker
92+
* --
93+
* Handle fetch requests that are pushed in to the Queue
94+
*/
95+
async function fetchWorker(
96+
task: IFetchRemoteFileOptions,
97+
cb: done<string>
98+
): Promise<void> {
99+
try {
100+
const node = await fetchFile(task)
101+
return void cb(null, node)
102+
} catch (e) {
103+
return void cb(e)
104+
}
105+
}
106+
107+
/**
108+
* pushTask
109+
* --
110+
* pushes a task in to the Queue and the processing cache
111+
*
112+
* Promisfy a task in queue
113+
* @param {CreateRemoteFileNodePayload} task
114+
* @return {Promise<Object>}
115+
*/
116+
async function pushTask(task: IFetchRemoteFileOptions): Promise<string> {
117+
return new Promise((resolve, reject) => {
118+
q.push(task, (err, node) => {
119+
if (!err) {
120+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
121+
resolve(node!)
122+
} else {
123+
reject(err)
124+
}
125+
})
126+
})
127+
}
75128
let fetchCache = new Map()
76129
let latestBuildId = ``
77130

131+
/***************************
132+
* Fetch remote file logic *
133+
***************************/
134+
78135
export async function fetchRemoteFile(
79136
args: IFetchRemoteFileOptions
80137
): Promise<string> {
@@ -91,7 +148,7 @@ export async function fetchRemoteFile(
91148
}
92149

93150
// Create file fetch promise and store it into cache
94-
const fetchPromise = fetchFile(args)
151+
const fetchPromise = pushTask(args)
95152
fetchCache.set(args.url, fetchPromise)
96153

97154
return fetchPromise.catch(err => {

packages/gatsby-source-filesystem/package.json

-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
"dependencies": {
1010
"@babel/runtime": "^7.15.4",
1111
"chokidar": "^3.5.2",
12-
"fastq": "^1.13.0",
1312
"file-type": "^16.5.3",
1413
"fs-extra": "^10.0.0",
1514
"gatsby-core-utils": "^3.7.0-next.0",

packages/gatsby-source-filesystem/src/create-remote-file-node.js

+13-113
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,6 @@
1-
const fs = require(`fs-extra`)
2-
const {
3-
createContentDigest,
4-
fetchRemoteFile,
5-
createFilePath,
6-
} = require(`gatsby-core-utils`)
7-
const path = require(`path`)
1+
const { fetchRemoteFile } = require(`gatsby-core-utils`)
82
const { isWebUri } = require(`valid-url`)
9-
const Queue = require(`fastq`)
103
const { createFileNode } = require(`./create-file-node`)
11-
const { getRemoteFileExtension } = require(`./utils`)
12-
13-
let showFlagWarning = !!process.env.GATSBY_EXPERIMENTAL_REMOTE_FILE_PLACEHOLDER
144

155
/********************
166
* Type Definitions *
@@ -46,41 +36,6 @@ let showFlagWarning = !!process.env.GATSBY_EXPERIMENTAL_REMOTE_FILE_PLACEHOLDER
4636
* @param {Reporter} [options.reporter]
4737
*/
4838

49-
/********************
50-
* Queue Management *
51-
********************/
52-
53-
const GATSBY_CONCURRENT_DOWNLOAD = process.env.GATSBY_CONCURRENT_DOWNLOAD
54-
? parseInt(process.env.GATSBY_CONCURRENT_DOWNLOAD, 10) || 0
55-
: 200
56-
57-
const queue = Queue(pushToQueue, GATSBY_CONCURRENT_DOWNLOAD)
58-
59-
/**
60-
* @callback {Queue~queueCallback}
61-
* @param {*} error
62-
* @param {*} result
63-
*/
64-
65-
/**
66-
* pushToQueue
67-
* --
68-
* Handle tasks that are pushed in to the Queue
69-
*
70-
*
71-
* @param {CreateRemoteFileNodePayload} task
72-
* @param {Queue~queueCallback} cb
73-
* @return {Promise<null>}
74-
*/
75-
async function pushToQueue(task, cb) {
76-
try {
77-
const node = await processRemoteNode(task)
78-
return cb(null, node)
79-
} catch (e) {
80-
return cb(e)
81-
}
82-
}
83-
8439
/******************
8540
* Core Functions *
8641
******************/
@@ -104,25 +59,14 @@ async function processRemoteNode({
10459
ext,
10560
name,
10661
}) {
107-
let filename
108-
if (process.env.GATSBY_EXPERIMENTAL_REMOTE_FILE_PLACEHOLDER) {
109-
filename = await fetchPlaceholder({
110-
fromPath: process.env.GATSBY_EXPERIMENTAL_REMOTE_FILE_PLACEHOLDER,
111-
url,
112-
cache,
113-
ext,
114-
name,
115-
})
116-
} else {
117-
filename = await fetchRemoteFile({
118-
url,
119-
cache,
120-
auth,
121-
httpHeaders,
122-
ext,
123-
name,
124-
})
125-
}
62+
const filename = await fetchRemoteFile({
63+
url,
64+
cache,
65+
auth,
66+
httpHeaders,
67+
ext,
68+
name,
69+
})
12670

12771
// Create the file node.
12872
const fileNode = await createFileNode(filename, createNodeId, {})
@@ -138,42 +82,10 @@ async function processRemoteNode({
13882
return fileNode
13983
}
14084

141-
async function fetchPlaceholder({ fromPath, url, cache, ext, name }) {
142-
const pluginCacheDir = cache.directory
143-
const digest = createContentDigest(url)
144-
145-
if (!ext) {
146-
ext = getRemoteFileExtension(url)
147-
}
148-
149-
const filename = createFilePath(path.join(pluginCacheDir, digest), name, ext)
150-
fs.copySync(fromPath, filename)
151-
return filename
152-
}
153-
15485
/**
15586
* Index of promises resolving to File node from remote url
15687
*/
15788
const processingCache = {}
158-
/**
159-
* pushTask
160-
* --
161-
* pushes a task in to the Queue and the processing cache
162-
*
163-
* Promisfy a task in queue
164-
* @param {CreateRemoteFileNodePayload} task
165-
* @return {Promise<Object>}
166-
*/
167-
const pushTask = task =>
168-
new Promise((resolve, reject) => {
169-
queue.push(task, (err, node) => {
170-
if (!err) {
171-
resolve(node)
172-
} else {
173-
reject(`failed to process ${task.url}\n${err}`)
174-
}
175-
})
176-
})
17789

17890
/***************
17991
* Entry Point *
@@ -202,20 +114,6 @@ module.exports = function createRemoteFileNode({
202114
ext = null,
203115
name = null,
204116
}) {
205-
if (showFlagWarning) {
206-
showFlagWarning = false
207-
// Note: This will use a placeholder image as the default for every file that is downloaded through this API.
208-
// That may break certain cases, in particular when the file is not meant to be an image or when the image
209-
// is expected to be of a particular type that is other than the placeholder. This API is meant to bypass
210-
// the remote download for local testing only.
211-
console.info(
212-
`GATSBY_EXPERIMENTAL_REMOTE_FILE_PLACEHOLDER: Any file downloaded by \`createRemoteFileNode\` will use the same placeholder image and skip the remote fetch. Note: This is an experimental flag that can change/disappear at any point.`
213-
)
214-
console.info(
215-
`GATSBY_EXPERIMENTAL_REMOTE_FILE_PLACEHOLDER: File to use: \`${process.env.GATSBY_EXPERIMENTAL_REMOTE_FILE_PLACEHOLDER}\``
216-
)
217-
}
218-
219117
// validation of the input
220118
// without this it's notoriously easy to pass in the wrong `createNodeId`
221119
// see gatsbyjs/gatsby#6643
@@ -245,11 +143,13 @@ module.exports = function createRemoteFileNode({
245143

246144
if (!url || isWebUri(url) === undefined) {
247145
return Promise.reject(
248-
`url passed to createRemoteFileNode is either missing or not a proper web uri: ${url}`
146+
new Error(
147+
`url passed to createRemoteFileNode is either missing or not a proper web uri: ${url}`
148+
)
249149
)
250150
}
251151

252-
const fileDownloadPromise = pushTask({
152+
const fileDownloadPromise = processRemoteNode({
253153
url,
254154
cache,
255155
createNode,

0 commit comments

Comments
 (0)