Skip to content

Commit a7d6614

Browse files
authored
fix(gatsby-source-filesystem): fix broken stream with gzipped files (#28913) (#28995)
(cherry picked from commit a8b516f)
1 parent c15af99 commit a7d6614

File tree

7 files changed

+413
-18
lines changed

7 files changed

+413
-18
lines changed

packages/gatsby-source-filesystem/package.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@
2525
"@babel/cli": "^7.12.1",
2626
"@babel/core": "^7.12.3",
2727
"babel-preset-gatsby-package": "^0.10.0",
28-
"cross-env": "^7.0.3"
28+
"cross-env": "^7.0.3",
29+
"msw": "^0.25.0"
2930
},
3031
"homepage": "https://github.com/gatsbyjs/gatsby/tree/master/packages/gatsby-source-filesystem#readme",
3132
"keywords": [
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,268 @@
1+
import * as path from "path"
2+
import * as zlib from "zlib"
3+
import * as os from "os"
4+
import { rest } from "msw"
5+
import { setupServer } from "msw/node"
6+
import { Writable } from "stream"
7+
import got from "got"
8+
import createRemoteFileNode from "../create-remote-file-node"
9+
10+
const fs = jest.requireActual(`fs-extra`)
11+
12+
const gotStream = jest.spyOn(got, `stream`)
13+
const urlCount = new Map()
14+
15+
async function getFileSize(file) {
16+
const stat = await fs.stat(file)
17+
18+
return stat.size
19+
}
20+
21+
/**
22+
* A utility to help create file responses
23+
* - Url with attempts will use maxBytes for x amount of time until it delivers the full response
24+
* - MaxBytes indicates how much bytes we'll be sending
25+
*
26+
* @param {string} file File path on disk
27+
* @param {Object} req Is the request object from msw
28+
* @param {{ compress: boolean}} options Options for the getFilecontent (use gzip or not)
29+
*/
30+
async function getFileContent(file, req, options = {}) {
31+
const cacheKey = req.url.origin + req.url.pathname
32+
const maxRetry = req.url.searchParams.get(`attempts`)
33+
const maxBytes = req.url.searchParams.get(`maxBytes`)
34+
const currentRetryCount = urlCount.get(cacheKey) || 0
35+
urlCount.set(cacheKey, currentRetryCount + 1)
36+
37+
let fileContentBuffer = await fs.readFile(file)
38+
if (options.compress) {
39+
fileContentBuffer = zlib.deflateSync(fileContentBuffer)
40+
}
41+
42+
const content = await new Promise(resolve => {
43+
const fileStream = fs.createReadStream(file, {
44+
end:
45+
currentRetryCount < Number(maxRetry)
46+
? Number(maxBytes)
47+
: Number.MAX_SAFE_INTEGER,
48+
})
49+
50+
const writableStream = new Writable()
51+
const result = []
52+
writableStream._write = (chunk, encoding, next) => {
53+
result.push(chunk)
54+
55+
next()
56+
}
57+
58+
writableStream.on(`finish`, () => {
59+
resolve(Buffer.concat(result))
60+
})
61+
62+
// eslint-disable-next-line no-unused-vars
63+
let stream = fileStream
64+
if (options.compress) {
65+
stream = stream.pipe(zlib.createDeflate())
66+
}
67+
68+
stream.pipe(writableStream)
69+
})
70+
71+
return {
72+
content,
73+
contentLength:
74+
req.url.searchParams.get(`contentLength`) === `false`
75+
? undefined
76+
: fileContentBuffer.length,
77+
}
78+
}
79+
80+
const server = setupServer(
81+
rest.get(`http://external.com/logo.svg`, async (req, res, ctx) => {
82+
const { content, contentLength } = await getFileContent(
83+
path.join(__dirname, `./fixtures/gatsby-logo.svg`),
84+
req
85+
)
86+
87+
return res(
88+
ctx.set(`Content-Type`, `image/svg+xml`),
89+
ctx.set(`Content-Length`, contentLength),
90+
ctx.status(200),
91+
ctx.body(content)
92+
)
93+
}),
94+
rest.get(`http://external.com/logo-gzip.svg`, async (req, res, ctx) => {
95+
const { content, contentLength } = await getFileContent(
96+
path.join(__dirname, `./fixtures/gatsby-logo.svg`),
97+
req,
98+
{
99+
compress: true,
100+
}
101+
)
102+
103+
return res(
104+
ctx.set(`Content-Type`, `image/svg+xml`),
105+
ctx.set(`content-encoding`, `gzip`),
106+
ctx.set(`Content-Length`, contentLength),
107+
ctx.status(200),
108+
ctx.body(content)
109+
)
110+
}),
111+
rest.get(`http://external.com/dog.jpg`, async (req, res, ctx) => {
112+
const { content, contentLength } = await getFileContent(
113+
path.join(__dirname, `./fixtures/dog-thumbnail.jpg`),
114+
req
115+
)
116+
117+
return res(
118+
ctx.set(`Content-Type`, `image/svg+xml`),
119+
ctx.set(`Content-Length`, contentLength),
120+
ctx.status(200),
121+
ctx.body(content)
122+
)
123+
})
124+
)
125+
126+
function createMockCache() {
127+
const tmpDir = fs.mkdtempSync(
128+
path.join(os.tmpdir(), `gatsby-source-filesystem-`)
129+
)
130+
131+
return {
132+
get: jest.fn(),
133+
set: jest.fn(),
134+
directory: tmpDir,
135+
}
136+
}
137+
138+
const reporter = jest.fn(() => {
139+
return {}
140+
})
141+
142+
describe(`create-remote-file-node`, () => {
143+
let cache
144+
145+
beforeAll(() => {
146+
cache = createMockCache()
147+
// Establish requests interception layer before all tests.
148+
server.listen()
149+
})
150+
afterAll(() => {
151+
if (cache) {
152+
fs.removeSync(cache.directory)
153+
}
154+
155+
// Clean up after all tests are done, preventing this
156+
// interception layer from affecting irrelevant tests.
157+
server.close()
158+
})
159+
160+
beforeEach(() => {
161+
gotStream.mockClear()
162+
urlCount.clear()
163+
})
164+
165+
it(`downloads and create a file`, async () => {
166+
const fileNode = await createRemoteFileNode({
167+
url: `http://external.com/logo.svg`,
168+
store: {},
169+
getCache: () => cache,
170+
createNode: jest.fn(),
171+
createNodeId: jest.fn(),
172+
reporter,
173+
})
174+
175+
expect(fileNode.base).toBe(`logo.svg`)
176+
expect(fileNode.size).toBe(
177+
await getFileSize(path.join(__dirname, `./fixtures/gatsby-logo.svg`))
178+
)
179+
expect(gotStream).toBeCalledTimes(1)
180+
})
181+
182+
it(`downloads and create a gzip file`, async () => {
183+
const fileNode = await createRemoteFileNode({
184+
url: `http://external.com/logo-gzip.svg`,
185+
store: {},
186+
getCache: () => cache,
187+
createNode: jest.fn(),
188+
createNodeId: jest.fn(),
189+
reporter,
190+
})
191+
192+
expect(fileNode.base).toBe(`logo-gzip.svg`)
193+
expect(fileNode.size).toBe(
194+
await getFileSize(path.join(__dirname, `./fixtures/gatsby-logo.svg`))
195+
)
196+
expect(gotStream).toBeCalledTimes(1)
197+
})
198+
199+
it(`downloads and create a file`, async () => {
200+
const fileNode = await createRemoteFileNode({
201+
url: `http://external.com/dog.jpg`,
202+
store: {},
203+
getCache: () => cache,
204+
createNode: jest.fn(),
205+
createNodeId: jest.fn(),
206+
reporter,
207+
})
208+
209+
expect(fileNode.base).toBe(`dog.jpg`)
210+
expect(fileNode.size).toBe(
211+
await getFileSize(path.join(__dirname, `./fixtures/dog-thumbnail.jpg`))
212+
)
213+
expect(gotStream).toBeCalledTimes(1)
214+
})
215+
216+
it(`doesn't retry when no content-length is given`, async () => {
217+
const fileNode = await createRemoteFileNode({
218+
url: `http://external.com/logo-gzip.svg?attempts=1&maxBytes=300&contentLength=false`,
219+
store: {},
220+
getCache: () => cache,
221+
createNode: jest.fn(),
222+
createNodeId: jest.fn(),
223+
reporter,
224+
})
225+
226+
expect(fileNode.base).toBe(`logo-gzip.svg`)
227+
expect(fileNode.size).not.toBe(
228+
await getFileSize(path.join(__dirname, `./fixtures/gatsby-logo.svg`))
229+
)
230+
expect(gotStream).toBeCalledTimes(1)
231+
})
232+
233+
describe(`retries the download`, () => {
234+
it(`Retries when gzip compression file is incomplete`, async () => {
235+
const fileNode = await createRemoteFileNode({
236+
url: `http://external.com/logo-gzip.svg?attempts=1&maxBytes=300`,
237+
store: {},
238+
getCache: () => cache,
239+
createNode: jest.fn(),
240+
createNodeId: jest.fn(),
241+
reporter,
242+
})
243+
244+
expect(fileNode.base).toBe(`logo-gzip.svg`)
245+
expect(fileNode.size).toBe(
246+
await getFileSize(path.join(__dirname, `./fixtures/gatsby-logo.svg`))
247+
)
248+
expect(gotStream).toBeCalledTimes(2)
249+
})
250+
251+
it(`Retries when binary file is incomplete`, async () => {
252+
const fileNode = await createRemoteFileNode({
253+
url: `http://external.com/dog.jpg?attempts=1&maxBytes=300`,
254+
store: {},
255+
getCache: () => cache,
256+
createNode: jest.fn(),
257+
createNodeId: jest.fn(),
258+
reporter,
259+
})
260+
261+
expect(fileNode.base).toBe(`dog.jpg`)
262+
expect(fileNode.size).toBe(
263+
await getFileSize(path.join(__dirname, `./fixtures/dog-thumbnail.jpg`))
264+
)
265+
expect(gotStream).toBeCalledTimes(2)
266+
})
267+
})
268+
})

packages/gatsby-source-filesystem/src/__tests__/create-remote-file-node.js

+9-6
Original file line numberDiff line numberDiff line change
@@ -100,11 +100,7 @@ describe(`create-remote-file-node`, () => {
100100
describe(`valid url`, () => {
101101
let uuid = 0
102102

103-
const setup = (
104-
args = {},
105-
type = `response`,
106-
response = { statusCode: 200 }
107-
) => {
103+
const setup = (args = {}, response = { statusCode: 200 }) => {
108104
const url = `https://images.whatever.com/real-image-trust-me-${uuid}.png`
109105

110106
const gotMock = {
@@ -121,14 +117,21 @@ describe(`create-remote-file-node`, () => {
121117
got.stream.mockReturnValueOnce({
122118
pipe: jest.fn(() => gotMock),
123119
on: jest.fn((mockType, mockCallback) => {
124-
if (mockType === type) {
120+
if (mockType === `response`) {
125121
// got throws on 404/500 so we mimic this behaviour
126122
if (response.statusCode === 404) {
127123
throw new Error(`Response code 404 (Not Found)`)
128124
}
129125

130126
mockCallback(response)
131127
}
128+
if (mockType === `downloadProgress`) {
129+
mockCallback({
130+
progress: 1,
131+
transferred: 1,
132+
total: 1,
133+
})
134+
}
132135

133136
return gotMock
134137
}),
Loading
Loading

packages/gatsby-source-filesystem/src/create-remote-file-node.js

+11-3
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,14 @@ const requestRemoteNode = (url, headers, tmpFilename, httpOpts, attempt = 1) =>
159159
},
160160
...httpOpts,
161161
})
162+
163+
let haveAllBytesBeenWritten = false
164+
responseStream.on(`downloadProgress`, progress => {
165+
if (progress.transferred === progress.total || progress.total === null) {
166+
haveAllBytesBeenWritten = true
167+
}
168+
})
169+
162170
const fsWriteStream = fs.createWriteStream(tmpFilename)
163171
responseStream.pipe(fsWriteStream)
164172

@@ -180,12 +188,12 @@ const requestRemoteNode = (url, headers, tmpFilename, httpOpts, attempt = 1) =>
180188

181189
responseStream.on(`response`, response => {
182190
resetTimeout()
183-
const contentLength =
184-
response.headers && Number(response.headers[`content-length`])
185191

186192
fsWriteStream.on(`finish`, () => {
193+
fsWriteStream.close()
194+
187195
// We have an incomplete download
188-
if (contentLength && contentLength !== fsWriteStream.bytesWritten) {
196+
if (!haveAllBytesBeenWritten) {
189197
fs.removeSync(tmpFilename)
190198

191199
if (attempt < INCOMPLETE_RETRY_LIMIT) {

0 commit comments

Comments
 (0)