Skip to content

Commit ac5881e

Browse files
authored
perf(localSearch): add concurrency pooling, cleanup logic, improve performance (#3374)
1 parent d1ff294 commit ac5881e

File tree

3 files changed

+76
-76
lines changed

3 files changed

+76
-76
lines changed

Diff for: src/node/plugins/localSearchPlugin.ts

+42-72
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import _debug from 'debug'
22
import fs from 'fs-extra'
33
import MiniSearch from 'minisearch'
4+
import pMap from 'p-map'
45
import path from 'path'
56
import type { Plugin, ViteDevServer } from 'vite'
67
import type { SiteConfig } from '../config'
@@ -53,15 +54,18 @@ export async function localSearchPlugin(
5354

5455
const options = siteConfig.site.themeConfig.search.options || {}
5556

56-
function render(file: string) {
57+
async function render(file: string) {
58+
if (!fs.existsSync(file)) return ''
5759
const { srcDir, cleanUrls = false } = siteConfig
5860
const relativePath = slash(path.relative(srcDir, file))
5961
const env: MarkdownEnv = { path: file, relativePath, cleanUrls }
60-
let src = fs.readFileSync(file, 'utf-8')
61-
src = processIncludes(srcDir, src, file, [])
62-
if (options._render) return options._render(src, env, md)
63-
const html = md.render(src, env)
64-
return env.frontmatter?.search === false ? '' : html
62+
const md_raw = await fs.promises.readFile(file, 'utf-8')
63+
const md_src = processIncludes(srcDir, md_raw, file, [])
64+
if (options._render) return await options._render(md_src, env, md)
65+
else {
66+
const html = md.render(md_src, env)
67+
return env.frontmatter?.search === false ? '' : html
68+
}
6569
}
6670

6771
const indexByLocales = new Map<string, MiniSearch<IndexObject>>()
@@ -85,11 +89,6 @@ export async function localSearchPlugin(
8589
return siteData?.localeIndex ?? 'root'
8690
}
8791

88-
function getIndexForPath(file: string) {
89-
const locale = getLocaleForPath(file)
90-
return getIndexByLocale(locale)
91-
}
92-
9392
let server: ViteDevServer | undefined
9493

9594
function onIndexUpdated() {
@@ -123,43 +122,39 @@ export async function localSearchPlugin(
123122
return id
124123
}
125124

126-
async function indexAllFiles(files: string[]) {
127-
const documentsByLocale = new Map<string, IndexObject[]>()
128-
await Promise.all(
129-
files
130-
.filter((file) => fs.existsSync(file))
131-
.map(async (file) => {
132-
const fileId = getDocId(file)
133-
const sections = splitPageIntoSections(render(file))
134-
if (sections.length === 0) return
135-
const locale = getLocaleForPath(file)
136-
let documents = documentsByLocale.get(locale)
137-
if (!documents) {
138-
documents = []
139-
documentsByLocale.set(locale, documents)
140-
}
141-
documents.push(
142-
...sections.map((section) => ({
143-
id: `${fileId}#${section.anchor}`,
144-
text: section.text,
145-
title: section.titles.at(-1)!,
146-
titles: section.titles.slice(0, -1)
147-
}))
148-
)
149-
})
150-
)
151-
for (const [locale, documents] of documentsByLocale) {
152-
const index = getIndexByLocale(locale)
153-
index.removeAll()
154-
await index.addAllAsync(documents)
125+
async function indexFile(page: string) {
126+
const file = path.join(siteConfig.srcDir, page)
127+
// get file metadata
128+
const fileId = getDocId(file)
129+
const locale = getLocaleForPath(file)
130+
const index = getIndexByLocale(locale)
131+
// retrieve file and split into "sections"
132+
const html = await render(file)
133+
const sections =
134+
// user provided generator
135+
(await options.miniSearch?._splitIntoSections?.(file, html)) ??
136+
// default implementation
137+
splitPageIntoSections(html)
138+
// add sections to the locale index
139+
for await (const section of sections) {
140+
if (!section || !(section.text || section.titles)) break
141+
const { anchor, text, titles } = section
142+
const id = anchor ? [fileId, anchor].join('#') : fileId
143+
index.add({
144+
id,
145+
text,
146+
title: titles.at(-1)!,
147+
titles: titles.slice(0, -1)
148+
})
155149
}
156-
debug(`🔍️ Indexed ${files.length} files`)
157150
}
158151

159152
async function scanForBuild() {
160-
await indexAllFiles(
161-
siteConfig.pages.map((f) => path.join(siteConfig.srcDir, f))
162-
)
153+
debug('🔍️ Indexing files for search...')
154+
await pMap(siteConfig.pages, indexFile, {
155+
concurrency: siteConfig.buildConcurrency
156+
})
157+
debug('✅ Indexing finished...')
163158
}
164159

165160
return {
@@ -214,25 +209,8 @@ export async function localSearchPlugin(
214209

215210
async handleHotUpdate({ file }) {
216211
if (file.endsWith('.md')) {
217-
const fileId = getDocId(file)
218-
if (!fs.existsSync(file)) return
219-
const index = getIndexForPath(file)
220-
const sections = splitPageIntoSections(render(file))
221-
if (sections.length === 0) return
222-
for (const section of sections) {
223-
const id = `${fileId}#${section.anchor}`
224-
if (index.has(id)) {
225-
index.discard(id)
226-
}
227-
index.add({
228-
id,
229-
text: section.text,
230-
title: section.titles.at(-1)!,
231-
titles: section.titles.slice(0, -1)
232-
})
233-
}
212+
await indexFile(file)
234213
debug('🔍️ Updated', file)
235-
236214
onIndexUpdated()
237215
}
238216
}
@@ -242,20 +220,13 @@ export async function localSearchPlugin(
242220
const headingRegex = /<h(\d*).*?>(.*?<a.*? href="#.*?".*?>.*?<\/a>)<\/h\1>/gi
243221
const headingContentRegex = /(.*?)<a.*? href="#(.*?)".*?>.*?<\/a>/i
244222

245-
interface PageSection {
246-
anchor: string
247-
titles: string[]
248-
text: string
249-
}
250-
251223
/**
252224
* Splits HTML into sections based on headings
253225
*/
254-
function splitPageIntoSections(html: string) {
226+
function* splitPageIntoSections(html: string) {
255227
const result = html.split(headingRegex)
256228
result.shift()
257229
let parentTitles: string[] = []
258-
const sections: PageSection[] = []
259230
for (let i = 0; i < result.length; i += 3) {
260231
const level = parseInt(result[i]) - 1
261232
const heading = result[i + 1]
@@ -266,14 +237,13 @@ function splitPageIntoSections(html: string) {
266237
if (!title || !content) continue
267238
const titles = parentTitles.slice(0, level)
268239
titles[level] = title
269-
sections.push({ anchor, titles, text: getSearchableText(content) })
240+
yield { anchor, titles, text: getSearchableText(content) }
270241
if (level === 0) {
271242
parentTitles = [title]
272243
} else {
273244
parentTitles[level] = title
274245
}
275246
}
276-
return sections
277247
}
278248

279249
function getSearchableText(content: string) {

Diff for: types/default-theme.d.ts

+28-4
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,11 @@ import type MarkdownIt from 'markdown-it'
22
import type { Options as MiniSearchOptions } from 'minisearch'
33
import type { ComputedRef, Ref } from 'vue'
44
import type { DocSearchProps } from './docsearch.js'
5-
import type { LocalSearchTranslations } from './local-search.js'
6-
import type { MarkdownEnv, PageData } from './shared.js'
5+
import type {
6+
LocalSearchTranslations,
7+
PageSplitSection
8+
} from './local-search.js'
9+
import type { Awaitable, MarkdownEnv, PageData } from './shared.js'
710

811
export namespace DefaultTheme {
912
export interface Config {
@@ -422,13 +425,34 @@ export namespace DefaultTheme {
422425
* @see https://lucaong.github.io/minisearch/modules/_minisearch_.html#searchoptions-1
423426
*/
424427
searchOptions?: MiniSearchOptions['searchOptions']
425-
}
426428

429+
/**
430+
* Overrides the default regex based page splitter.
431+
* Supports async generator, making it possible to run in true parallel
432+
* (when used along with `node:child_process` or `worker_threads`)
433+
* ---
434+
* This should be especially useful for scalability reasons.
435+
* ---
436+
* @param {string} path - absolute path to the markdown source file
437+
* @param {string} html - document page rendered as html
438+
*/
439+
_splitIntoSections?: (
440+
path: string,
441+
html: string
442+
) =>
443+
| AsyncGenerator<PageSplitSection>
444+
| Generator<PageSplitSection>
445+
| Awaitable<PageSplitSection[]>
446+
}
427447
/**
428448
* Allows transformation of content before indexing (node only)
429449
* Return empty string to skip indexing
430450
*/
431-
_render?: (src: string, env: MarkdownEnv, md: MarkdownIt) => string
451+
_render?: (
452+
src: string,
453+
env: MarkdownEnv,
454+
md: MarkdownIt
455+
) => Awaitable<string>
432456
}
433457

434458
// algolia -------------------------------------------------------------------

Diff for: types/local-search.d.ts

+6
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,9 @@ export interface FooterTranslations {
2525
closeText?: string
2626
closeKeyAriaLabel?: string
2727
}
28+
29+
export interface PageSplitSection {
30+
anchor?: string
31+
titles: string[]
32+
text: string
33+
}

0 commit comments

Comments
 (0)