Skip to content

Commit 9d322a4

Browse files
authored
refactor(gatsby): new dirty tracking implementation for queries (#27504)
* Remove dead code * Consistently use job hash for static-query identification on the frontend (and queryId in node) * New reducer to track the state of queries * tmp: output newly calculated dirty queries (still using old calculation for actual query running to compare) * Add new `QUERY_START` action * Remove redundant component-data-dependencies reducer (now handled in the queries reducer) * Actually use the new query tracking (and remove the old one) * Fix data-tracking test * Shape of tracked component state should match component reducer * remove page-component machine (as we track query state in `queries` reducer now) * Remove DELETE_COMPONENTS_DEPENDENCIES action * Cleanup * Cleanup * Re-enable previously skipped test * Cleanup * Do-not re-run queries with babel extraction errors * WIP: tests for the queries reducer * Track babel errors per component (not per page/static query) * tests for the queries reducer * rename test * Cleanup / update snapshots * Add missing snapshot * fix integration tests? * Revert "fix integration tests?" This reverts commit 066a4e1 * Restore DELETE_COMPONENTS_DEPENDENCIES as a no-op for BC * Take into account deletePage/createPage pattern in onCreatePage * Update test snapshot * Do not mark page query as dirty when component has babel errors * Use flag constants vs. literal values in tests * Rename FLAG_ERROR_BABEL to FLAG_ERROR_EXTRACTION
1 parent e824841 commit 9d322a4

23 files changed

+1322
-954
lines changed

packages/gatsby/src/bootstrap/page-hot-reloader.ts

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { emitter, store } from "../redux"
22
import apiRunnerNode from "../utils/api-runner-node"
33
import { boundActionCreators } from "../redux/actions"
4-
const { deletePage, deleteComponentsDependencies } = boundActionCreators
4+
const { deletePage } = boundActionCreators
55
import report from "gatsby-cli/lib/reporter"
66
import {
77
ICreateNodeAction,
@@ -39,7 +39,6 @@ const runCreatePages = async (): Promise<void> => {
3939
page.updatedAt < timestamp &&
4040
page.path !== `/404.html`
4141
) {
42-
deleteComponentsDependencies([page.path])
4342
deletePage(page)
4443
}
4544
})

packages/gatsby/src/query/__tests__/data-tracking.js

+13-11
Original file line numberDiff line numberDiff line change
@@ -200,14 +200,18 @@ const setup = async ({ restart = isFirstRun, clearCache = false } = {}) => {
200200
})
201201

202202
Object.entries(staticQueries).forEach(([id, query]) => {
203-
store.dispatch({
204-
type: `REPLACE_STATIC_QUERY`,
205-
payload: {
206-
id: `sq--${id}`,
207-
hash: `sq--${id}`,
208-
query,
209-
},
210-
})
203+
// Mimic real code behavior by only calling this action when static query text changes
204+
const lastQuery = mockPersistedState.staticQueryComponents?.get(`sq--${id}`)
205+
if (lastQuery?.query !== query) {
206+
store.dispatch({
207+
type: `REPLACE_STATIC_QUERY`,
208+
payload: {
209+
id: `sq--${id}`,
210+
hash: `sq--${id}`,
211+
query,
212+
},
213+
})
214+
}
211215
})
212216

213217
const queryIds = queryUtil.calcInitialDirtyQueryIds(store.getState())
@@ -1238,9 +1242,7 @@ describe(`query caching between builds`, () => {
12381242
expect(staticQueriesThatRan).toEqual([])
12391243
}, 999999)
12401244

1241-
// TO-DO: this is known issue - we always rerun queries for pages with no dependencies
1242-
// this mean that we will retry to rerun them every time we restart gatsby
1243-
it.skip(`rerunning should not run any queries (with restart)`, async () => {
1245+
it(`rerunning should not run any queries (with restart)`, async () => {
12441246
const {
12451247
pathsOfPagesWithQueriesThatRan,
12461248
staticQueriesThatRan,

packages/gatsby/src/query/index.js

+28-277
Original file line numberDiff line numberDiff line change
@@ -1,152 +1,36 @@
1-
// @flow
2-
31
const _ = require(`lodash`)
4-
const Queue = require(`better-queue`)
5-
// const convertHrtime = require(`convert-hrtime`)
6-
const { store, emitter } = require(`../redux`)
7-
const { boundActionCreators } = require(`../redux/actions`)
8-
const report = require(`gatsby-cli/lib/reporter`)
2+
const { store } = require(`../redux`)
3+
const { hasFlag, FLAG_ERROR_EXTRACTION } = require(`../redux/reducers/queries`)
94
const queryQueue = require(`./queue`)
10-
const { GraphQLRunner } = require(`./graphql-runner`)
11-
const pageDataUtil = require(`../utils/page-data`)
12-
13-
const seenIdsWithoutDataDependencies = new Set()
14-
let queuedDirtyActions = []
15-
const extractedQueryIds = new Set()
16-
17-
// Remove pages from seenIdsWithoutDataDependencies when they're deleted
18-
// so their query will be run again if they're created again.
19-
emitter.on(`DELETE_PAGE`, action => {
20-
seenIdsWithoutDataDependencies.delete(action.payload.path)
21-
})
22-
23-
emitter.on(`CREATE_NODE`, action => {
24-
queuedDirtyActions.push(action)
25-
})
26-
27-
emitter.on(`DELETE_NODE`, action => {
28-
queuedDirtyActions.push({ payload: action.payload })
29-
})
305

31-
// ///////////////////////////////////////////////////////////////////
32-
// Calculate dirty static/page queries
33-
34-
const popExtractedQueries = () => {
35-
const queries = [...extractedQueryIds]
36-
extractedQueryIds.clear()
37-
return queries
38-
}
39-
40-
const findIdsWithoutDataDependencies = state => {
41-
const allTrackedIds = new Set()
42-
const boundAddToTrackedIds = allTrackedIds.add.bind(allTrackedIds)
43-
state.componentDataDependencies.nodes.forEach(dependenciesOnNode => {
44-
dependenciesOnNode.forEach(boundAddToTrackedIds)
45-
})
46-
state.componentDataDependencies.connections.forEach(
47-
dependenciesOnConnection => {
48-
dependenciesOnConnection.forEach(boundAddToTrackedIds)
6+
/**
7+
* Calculates the set of dirty query IDs (page.paths, or staticQuery.id's).
8+
*
9+
* Dirty state is tracked in `queries` reducer, here we simply filter
10+
* them from all tracked queries.
11+
*/
12+
const calcDirtyQueryIds = state => {
13+
const { trackedQueries, trackedComponents, deletedQueries } = state.queries
14+
15+
const queriesWithBabelErrors = new Set()
16+
for (const component of trackedComponents.values()) {
17+
if (hasFlag(component.errors, FLAG_ERROR_EXTRACTION)) {
18+
for (const queryId of component.pages) {
19+
queriesWithBabelErrors.add(queryId)
20+
}
4921
}
50-
)
51-
52-
// Get list of paths not already tracked and run the queries for these
53-
// paths.
54-
const notTrackedIds = new Set(
55-
[
56-
...Array.from(state.pages.values(), p => p.path),
57-
...[...state.staticQueryComponents.values()].map(c => c.id),
58-
].filter(
59-
x => !allTrackedIds.has(x) && !seenIdsWithoutDataDependencies.has(x)
60-
)
61-
)
62-
63-
// Add new IDs to our seen array so we don't keep trying to run queries for them.
64-
// Pages without queries can't be tracked.
65-
for (const notTrackedId of notTrackedIds) {
66-
seenIdsWithoutDataDependencies.add(notTrackedId)
6722
}
68-
69-
return notTrackedIds
70-
}
71-
72-
const popNodeQueries = state => {
73-
const actions = _.uniq(queuedDirtyActions, a => a.payload.id)
74-
const uniqDirties = actions.reduce((dirtyIds, action) => {
75-
const node = action.payload
76-
77-
if (!node || !node.id || !node.internal.type) return dirtyIds
78-
79-
// Find components that depend on this node so are now dirty.
80-
if (state.componentDataDependencies.nodes.has(node.id)) {
81-
state.componentDataDependencies.nodes.get(node.id).forEach(n => {
82-
if (n) {
83-
dirtyIds.add(n)
84-
}
85-
})
23+
// Note: trackedQueries contains both - page and static query ids
24+
const dirtyQueryIds = []
25+
for (const [queryId, query] of trackedQueries) {
26+
if (deletedQueries.has(queryId)) {
27+
continue
8628
}
87-
88-
// Find connections that depend on this node so are now invalid.
89-
if (state.componentDataDependencies.connections.has(node.internal.type)) {
90-
state.componentDataDependencies.connections
91-
.get(node.internal.type)
92-
.forEach(n => {
93-
if (n) {
94-
dirtyIds.add(n)
95-
}
96-
})
29+
if (query.dirty > 0 && !queriesWithBabelErrors.has(queryId)) {
30+
dirtyQueryIds.push(queryId)
9731
}
98-
99-
return dirtyIds
100-
}, new Set())
101-
102-
boundActionCreators.deleteComponentsDependencies([...uniqDirties])
103-
104-
queuedDirtyActions = []
105-
return uniqDirties
106-
}
107-
108-
const popNodeAndDepQueries = state => {
109-
const nodeQueries = popNodeQueries(state)
110-
111-
const noDepQueries = findIdsWithoutDataDependencies(state)
112-
113-
return _.uniq([...nodeQueries, ...noDepQueries])
114-
}
115-
116-
/**
117-
* Calculates the set of dirty query IDs (page.paths, or
118-
* staticQuery.hash's). These are queries that:
119-
*
120-
* - depend on nodes or node collections (via
121-
* `actions.createPageDependency`) that have changed.
122-
* - do NOT have node dependencies. Since all queries should return
123-
* data, then this implies that node dependencies have not been
124-
* tracked, and therefore these queries haven't been run before
125-
* - have been recently extracted (see `./query-watcher.js`)
126-
*
127-
* Note, this function pops queries off internal queues, so it's up
128-
* to the caller to reference the results
129-
*/
130-
131-
const calcDirtyQueryIds = state =>
132-
_.union(popNodeAndDepQueries(state), popExtractedQueries())
133-
134-
/**
135-
* Same as `calcDirtyQueryIds`, except that we only include extracted
136-
* queries that depend on nodes or haven't been run yet. We do this
137-
* because the page component reducer/machine always enqueues
138-
* extractedQueryIds but during bootstrap we may not want to run those
139-
* page queries if their data hasn't changed since the last time we
140-
* ran Gatsby.
141-
*/
142-
const calcInitialDirtyQueryIds = state => {
143-
const nodeAndNoDepQueries = popNodeAndDepQueries(state)
144-
145-
const extractedQueriesThatNeedRunning = _.intersection(
146-
popExtractedQueries(),
147-
nodeAndNoDepQueries
148-
)
149-
return _.union(extractedQueriesThatNeedRunning, nodeAndNoDepQueries)
32+
}
33+
return dirtyQueryIds
15034
}
15135

15236
/**
@@ -176,37 +60,14 @@ const createStaticQueryJob = (state, queryId) => {
17660
const component = state.staticQueryComponents.get(queryId)
17761
const { hash, id, query, componentPath } = component
17862
return {
179-
id: hash,
63+
id: queryId,
18064
hash,
18165
query,
18266
componentPath,
18367
context: { path: id },
18468
}
18569
}
18670

187-
/**
188-
* Creates activity object which:
189-
* - creates actual progress activity if there are any queries that need to be run
190-
* - creates activity-like object that just cancels pending activity if there are no queries to run
191-
*/
192-
const createQueryRunningActivity = (queryJobsCount, parentSpan) => {
193-
if (queryJobsCount) {
194-
const activity = report.createProgress(`run queries`, queryJobsCount, 0, {
195-
id: `query-running`,
196-
parentSpan,
197-
})
198-
activity.start()
199-
return activity
200-
} else {
201-
return {
202-
done: () => {
203-
report.completeActivity(`query-running`)
204-
},
205-
tick: () => {},
206-
}
207-
}
208-
}
209-
21071
const processStaticQueries = async (
21172
queryIds,
21273
{ state, activity, graphqlRunner, graphqlTracing }
@@ -258,120 +119,10 @@ const createPageQueryJob = (state, page) => {
258119
}
259120
}
260121

261-
// ///////////////////////////////////////////////////////////////////
262-
// Listener for gatsby develop
263-
264-
// Initialized via `startListening`
265-
let listenerQueue
266-
267-
/**
268-
* Run any dirty queries. See `calcQueries` for what constitutes a
269-
* dirty query
270-
*/
271-
const runQueuedQueries = () => {
272-
if (listenerQueue) {
273-
const state = store.getState()
274-
const { staticQueryIds, pageQueryIds } = groupQueryIds(
275-
calcDirtyQueryIds(state)
276-
)
277-
const pages = _.filter(pageQueryIds.map(id => state.pages.get(id)))
278-
const queryJobs = [
279-
...staticQueryIds.map(id => createStaticQueryJob(state, id)),
280-
...pages.map(page => createPageQueryJob(state, page)),
281-
]
282-
listenerQueue.push(queryJobs)
283-
}
284-
}
285-
286-
/**
287-
* Starts a background process that processes any dirty queries
288-
* whenever one of the following occurs:
289-
*
290-
* 1. A node has changed (but only after the api call has finished
291-
* running)
292-
* 2. A component query (e.g. by editing a React Component) has
293-
* changed
294-
*
295-
* For what constitutes a dirty query, see `calcQueries`
296-
*/
297-
298-
const startListeningToDevelopQueue = ({ graphqlTracing } = {}) => {
299-
// We use a queue to process batches of queries so that they are
300-
// processed consecutively
301-
let graphqlRunner = null
302-
const developQueue = queryQueue.createDevelopQueue(() => {
303-
if (!graphqlRunner) {
304-
graphqlRunner = new GraphQLRunner(store, { graphqlTracing })
305-
}
306-
return graphqlRunner
307-
})
308-
listenerQueue = new Queue((queryJobs, callback) => {
309-
const activity = createQueryRunningActivity(queryJobs.length)
310-
311-
const onFinish = (...arg) => {
312-
pageDataUtil.enqueueFlush()
313-
activity.done()
314-
return callback(...arg)
315-
}
316-
317-
return queryQueue
318-
.processBatch(developQueue, queryJobs, activity)
319-
.then(() => onFinish(null))
320-
.catch(onFinish)
321-
})
322-
323-
emitter.on(`API_RUNNING_START`, () => {
324-
report.pendingActivity({ id: `query-running` })
325-
})
326-
327-
emitter.on(`API_RUNNING_QUEUE_EMPTY`, runQueuedQueries)
328-
;[
329-
`DELETE_CACHE`,
330-
`CREATE_NODE`,
331-
`DELETE_NODE`,
332-
`DELETE_NODES`,
333-
`SET_SCHEMA_COMPOSER`,
334-
`SET_SCHEMA`,
335-
`ADD_FIELD_TO_NODE`,
336-
`ADD_CHILD_NODE_TO_PARENT_NODE`,
337-
].forEach(eventType => {
338-
emitter.on(eventType, event => {
339-
graphqlRunner = null
340-
})
341-
})
342-
}
343-
344-
const enqueueExtractedQueryId = pathname => {
345-
extractedQueryIds.add(pathname)
346-
}
347-
348-
const getPagesForComponent = componentPath => {
349-
const state = store.getState()
350-
return [...state.pages.values()].filter(
351-
p => p.componentPath === componentPath
352-
)
353-
}
354-
355-
const enqueueExtractedPageComponent = componentPath => {
356-
const pages = getPagesForComponent(componentPath)
357-
// Remove page data dependencies before re-running queries because
358-
// the changing of the query could have changed the data dependencies.
359-
// Re-running the queries will add back data dependencies.
360-
boundActionCreators.deleteComponentsDependencies(
361-
pages.map(p => p.path || p.id)
362-
)
363-
pages.forEach(page => enqueueExtractedQueryId(page.path))
364-
runQueuedQueries()
365-
}
366-
367122
module.exports = {
368-
calcInitialDirtyQueryIds,
123+
calcInitialDirtyQueryIds: calcDirtyQueryIds,
369124
calcDirtyQueryIds,
370125
processPageQueries,
371126
processStaticQueries,
372127
groupQueryIds,
373-
startListeningToDevelopQueue,
374-
runQueuedQueries,
375-
enqueueExtractedQueryId,
376-
enqueueExtractedPageComponent,
377128
}

0 commit comments

Comments
 (0)