Skip to content

Commit 5278e1e

Browse files
authored
feat(gatsby): switch from arrays to node iterators (#31718)
1 parent 3ee9008 commit 5278e1e

File tree

11 files changed

+223
-100
lines changed

11 files changed

+223
-100
lines changed
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import { IGatsbyIterable } from "../types"
2+
3+
export class GatsbyIterable<T> implements IGatsbyIterable<T> {
4+
constructor(private source: Iterator<T>) {}
5+
6+
[Symbol.iterator](): Iterator<T> {
7+
return this.source
8+
}
9+
10+
concat<U>(other: Iterable<U>): GatsbyIterable<T | U> {
11+
return new GatsbyIterable(concatSequence(this, other))
12+
}
13+
14+
map<U>(fn: (entry: T) => U): GatsbyIterable<U> {
15+
return new GatsbyIterable(mapSequence(this, fn))
16+
}
17+
18+
filter(predicate: (entry: T) => unknown): GatsbyIterable<T> {
19+
return new GatsbyIterable<T>(filterSequence(this, predicate))
20+
}
21+
22+
forEach(callback: (entry: T) => unknown): void {
23+
for (const value of this) {
24+
callback(value)
25+
}
26+
}
27+
}
28+
29+
function* mapSequence<T, U>(
30+
source: Iterable<T>,
31+
fn: (arg: T) => U
32+
): Iterator<U> {
33+
for (const value of source) {
34+
yield fn(value)
35+
}
36+
}
37+
38+
function* filterSequence<T>(
39+
source: Iterable<T>,
40+
predicate: (arg: T) => unknown
41+
): Iterator<T> {
42+
for (const value of source) {
43+
if (predicate(value)) {
44+
yield value
45+
}
46+
}
47+
}
48+
49+
function* concatSequence<T, U = T>(
50+
first: Iterable<T>,
51+
second: Iterable<U>
52+
): Iterator<U | T> {
53+
for (const value of first) {
54+
yield value
55+
}
56+
for (const value of second) {
57+
yield value
58+
}
59+
}

packages/gatsby/src/datastore/in-memory/in-memory-datastore.ts

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,32 @@
1-
import { IDataStore } from "../types"
1+
import { IDataStore, IGatsbyIterable } from "../types"
22
import { store } from "../../redux"
33
import { IGatsbyNode } from "../../redux/types"
4+
import { GatsbyIterable } from "../common/iterable"
45

56
/**
67
* @deprecated
78
*/
89
function getNodes(): Array<IGatsbyNode> {
9-
const nodes = store.getState().nodes
10-
if (nodes) {
11-
return Array.from(nodes.values())
12-
} else {
13-
return []
14-
}
10+
const nodes = store.getState().nodes ?? new Map()
11+
return Array.from(nodes.values())
1512
}
1613

1714
/**
1815
* @deprecated
1916
*/
2017
function getNodesByType(type: string): Array<IGatsbyNode> {
21-
const nodes = store.getState().nodesByType.get(type)
22-
if (nodes) {
23-
return Array.from(nodes.values())
24-
} else {
25-
return []
26-
}
18+
const nodes = store.getState().nodesByType.get(type) ?? new Map()
19+
return Array.from(nodes.values())
20+
}
21+
22+
function iterateNodes(): IGatsbyIterable<IGatsbyNode> {
23+
const nodes = store.getState().nodes ?? new Map()
24+
return new GatsbyIterable(nodes.values())
25+
}
26+
27+
function iterateNodesByType(type: string): IGatsbyIterable<IGatsbyNode> {
28+
const nodes = store.getState().nodesByType.get(type) ?? new Map()
29+
return new GatsbyIterable(nodes.values())
2730
}
2831

2932
function getNode(id: string): IGatsbyNode | undefined {
@@ -59,6 +62,8 @@ export function setupInMemoryStore(): IDataStore {
5962
getTypes,
6063
countNodes,
6164
ready,
65+
iterateNodes,
66+
iterateNodesByType,
6267

6368
// deprecated:
6469
getNodes,

packages/gatsby/src/datastore/in-memory/indexing.ts

Lines changed: 61 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { store } from "../../redux"
22
import { IGatsbyNode } from "../../redux/types"
33
import { IDbQueryElemMatch } from "../common/query"
4-
import { getNodes, getNodesByType } from "../"
4+
import { getDataStore } from "../"
55

66
// Only list supported ops here. "CacheableFilterOp"
77
export type FilterOp = // TODO: merge with DbComparator ?
@@ -205,19 +205,23 @@ export const ensureIndexByQuery = (
205205
// it's probably faster to loop through all nodes. Perhaps. Maybe.
206206

207207
if (nodeTypeNames.length === 1) {
208-
getNodesByType(nodeTypeNames[0]).forEach(node => {
209-
addNodeToFilterCache(node, filterPath, filterCache, resolvedNodesCache)
210-
})
208+
getDataStore()
209+
.iterateNodesByType(nodeTypeNames[0])
210+
.forEach(node => {
211+
addNodeToFilterCache(node, filterPath, filterCache, resolvedNodesCache)
212+
})
211213
} else {
212214
// Here we must first filter for the node type
213215
// This loop is expensive at scale (!)
214-
getNodes().forEach(node => {
215-
if (!nodeTypeNames.includes(node.internal.type)) {
216-
return
217-
}
216+
getDataStore()
217+
.iterateNodes()
218+
.forEach(node => {
219+
if (!nodeTypeNames.includes(node.internal.type)) {
220+
return
221+
}
218222

219-
addNodeToFilterCache(node, filterPath, filterCache, resolvedNodesCache)
220-
})
223+
addNodeToFilterCache(node, filterPath, filterCache, resolvedNodesCache)
224+
})
221225
}
222226

223227
postIndexingMetaSetup(filterCache, op)
@@ -245,22 +249,9 @@ export function ensureEmptyFilterCache(
245249
})
246250

247251
if (nodeTypeNames.length === 1) {
248-
getNodesByType(nodeTypeNames[0]).forEach(node => {
249-
if (!node.__gatsby_resolved) {
250-
const typeName = node.internal.type
251-
const resolvedNodes = resolvedNodesCache.get(typeName)
252-
const resolved = resolvedNodes?.get(node.id)
253-
if (resolved !== undefined) {
254-
node.__gatsby_resolved = resolved
255-
}
256-
}
257-
orderedByCounter.push(node)
258-
})
259-
} else {
260-
// Here we must first filter for the node type
261-
// This loop is expensive at scale (!)
262-
getNodes().forEach(node => {
263-
if (nodeTypeNames.includes(node.internal.type)) {
252+
getDataStore()
253+
.iterateNodesByType(nodeTypeNames[0])
254+
.forEach(node => {
264255
if (!node.__gatsby_resolved) {
265256
const typeName = node.internal.type
266257
const resolvedNodes = resolvedNodesCache.get(typeName)
@@ -270,8 +261,25 @@ export function ensureEmptyFilterCache(
270261
}
271262
}
272263
orderedByCounter.push(node)
273-
}
274-
})
264+
})
265+
} else {
266+
// Here we must first filter for the node type
267+
// This loop is expensive at scale (!)
268+
getDataStore()
269+
.iterateNodes()
270+
.forEach(node => {
271+
if (nodeTypeNames.includes(node.internal.type)) {
272+
if (!node.__gatsby_resolved) {
273+
const typeName = node.internal.type
274+
const resolvedNodes = resolvedNodesCache.get(typeName)
275+
const resolved = resolvedNodes?.get(node.id)
276+
if (resolved !== undefined) {
277+
node.__gatsby_resolved = resolved
278+
}
279+
}
280+
orderedByCounter.push(node)
281+
}
282+
})
275283
}
276284

277285
// Since each node can only have one type, we shouldn't have to be concerned
@@ -363,30 +371,34 @@ export const ensureIndexByElemMatch = (
363371
filtersCache.set(filterCacheKey, filterCache)
364372

365373
if (nodeTypeNames.length === 1) {
366-
getNodesByType(nodeTypeNames[0]).forEach(node => {
367-
addNodeToBucketWithElemMatch(
368-
node,
369-
node,
370-
filter,
371-
filterCache,
372-
resolvedNodesCache
373-
)
374-
})
374+
getDataStore()
375+
.iterateNodesByType(nodeTypeNames[0])
376+
.forEach(node => {
377+
addNodeToBucketWithElemMatch(
378+
node,
379+
node,
380+
filter,
381+
filterCache,
382+
resolvedNodesCache
383+
)
384+
})
375385
} else {
376386
// Expensive at scale
377-
getNodes().forEach(node => {
378-
if (!nodeTypeNames.includes(node.internal.type)) {
379-
return
380-
}
387+
getDataStore()
388+
.iterateNodes()
389+
.forEach(node => {
390+
if (!nodeTypeNames.includes(node.internal.type)) {
391+
return
392+
}
381393

382-
addNodeToBucketWithElemMatch(
383-
node,
384-
node,
385-
filter,
386-
filterCache,
387-
resolvedNodesCache
388-
)
389-
})
394+
addNodeToBucketWithElemMatch(
395+
node,
396+
node,
397+
filter,
398+
filterCache,
399+
resolvedNodesCache
400+
)
401+
})
390402
}
391403

392404
postIndexingMetaSetup(filterCache, op)

packages/gatsby/src/datastore/lmdb/lmdb-datastore.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
import { ArrayLikeIterable, RootDatabase, open } from "lmdb-store"
1+
import { RootDatabase, open } from "lmdb-store"
22
// import { performance } from "perf_hooks"
33
import { ActionsUnion, IGatsbyNode } from "../../redux/types"
44
import { updateNodes } from "./updates/nodes"
55
import { updateNodesByType } from "./updates/nodes-by-type"
6-
import { IDataStore, ILmdbDatabases } from "../types"
6+
import { IDataStore, IGatsbyIterable, ILmdbDatabases } from "../types"
77
import { emitter, replaceReducer } from "../../redux"
88

99
const rootDbFile =
@@ -73,7 +73,7 @@ function getNodesByType(type: string): Array<IGatsbyNode> {
7373
return result ?? []
7474
}
7575

76-
function iterateNodes(): ArrayLikeIterable<IGatsbyNode> {
76+
function iterateNodes(): IGatsbyIterable<IGatsbyNode> {
7777
// Additionally fetching items by id to leverage lmdb-store cache
7878
const nodesDb = getDatabases().nodes
7979
return nodesDb
@@ -82,7 +82,7 @@ function iterateNodes(): ArrayLikeIterable<IGatsbyNode> {
8282
.filter(Boolean)
8383
}
8484

85-
function iterateNodesByType(type: string): ArrayLikeIterable<IGatsbyNode> {
85+
function iterateNodesByType(type: string): IGatsbyIterable<IGatsbyNode> {
8686
const nodesByType = getDatabases().nodesByType
8787
return nodesByType
8888
.getValues(type)

packages/gatsby/src/datastore/types.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,22 @@ export interface ILmdbDatabases {
99
nodesByType: Database<NodeId, NodeType>
1010
}
1111

12+
// Note: this type is compatible with lmdb-store ArrayLikeIterable
13+
export interface IGatsbyIterable<T> extends Iterable<T> {
14+
[Symbol.iterator](): Iterator<T>
15+
map<U>(fn: (entry: T) => U): IGatsbyIterable<U>
16+
// concat<U>(other: Iterable<U>): Iterable<T | U>
17+
filter(predicate: (entry: T) => any): IGatsbyIterable<T>
18+
forEach(callback: (entry: T) => any): void
19+
}
20+
1221
export interface IDataStore {
1322
getNode(id: string): IGatsbyNode | undefined
1423
getTypes(): Array<string>
1524
countNodes(typeName?: string): number
1625
ready(): Promise<void>
26+
iterateNodes(): IGatsbyIterable<IGatsbyNode>
27+
iterateNodesByType(type: string): IGatsbyIterable<IGatsbyNode>
1728

1829
/** @deprecated */
1930
getNodes(): Array<IGatsbyNode>

packages/gatsby/src/schema/index.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
const tracer = require(`opentracing`).globalTracer()
44
const { store } = require(`../redux`)
5-
const { getDataStore, getNodesByType, getTypes } = require(`../datastore`)
5+
const { getDataStore, getTypes } = require(`../datastore`)
66
const { createSchemaComposer } = require(`./schema-composer`)
77
const { buildSchema, rebuildSchemaWithSitePage } = require(`./schema`)
88
const { builtInFieldExtensions } = require(`./extensions`)
@@ -65,7 +65,7 @@ const buildInferenceMetadata = ({ types }) =>
6565
type: `BUILD_TYPE_METADATA`,
6666
payload: {
6767
typeName,
68-
nodes: getNodesByType(typeName),
68+
nodes: getDataStore().iterateNodesByType(typeName),
6969
},
7070
})
7171
if (typeNames.length > 0) {

packages/gatsby/src/schema/infer/add-inferred-fields.js

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@ import { isFile } from "./is-file"
88
import { isDate } from "../types/date"
99
import { addDerivedType } from "../types/derived-types"
1010
import { is32BitInteger } from "../../utils/is-32-bit-integer"
11-
import { printDirectives } from "../print"
12-
const { getNode, getNodes } = require(`../../datastore`)
11+
const { getDataStore } = require(`../../datastore`)
1312

1413
const addInferredFields = ({
1514
schemaComposer,
@@ -212,17 +211,30 @@ const getFieldConfigFromFieldNameConvention = ({
212211
const path = key.split(`___NODE___`)[1]
213212
// Allow linking by nested fields, e.g. `author___NODE___contact___email`
214213
const foreignKey = path && path.replace(/___/g, `.`)
214+
const linkedTypesSet = new Set()
215+
216+
if (foreignKey) {
217+
// TODO: deprecate foreign keys like this (e.g. author___NODE___contact___email)
218+
// and recommend using schema customization instead
219+
const linkedValues = new Set(value.linkedNodes)
220+
getDataStore()
221+
.iterateNodes()
222+
.forEach(node => {
223+
const value = _.get(node, foreignKey)
224+
if (linkedValues.has(value)) {
225+
linkedTypesSet.add(node.internal.type)
226+
}
227+
})
228+
} else {
229+
value.linkedNodes.forEach(id => {
230+
const node = getDataStore().getNode(id)
231+
if (node) {
232+
linkedTypesSet.add(node.internal.type)
233+
}
234+
})
235+
}
215236

216-
const getNodeBy = value =>
217-
foreignKey
218-
? getNodes().find(node => _.get(node, foreignKey) === value)
219-
: getNode(value)
220-
221-
const linkedNodes = value.linkedNodes.map(getNodeBy)
222-
223-
const linkedTypes = _.uniq(
224-
linkedNodes.filter(Boolean).map(node => node.internal.type)
225-
)
237+
const linkedTypes = [...linkedTypesSet]
226238

227239
invariant(
228240
linkedTypes.length,

0 commit comments

Comments
 (0)