Files
Verso/services/web/frontend/js/features/ide-react/references/reference-indexer.ts
T
Alf Eaton d8c33cc34c Allow multiple concurrent reference searches (#33739)
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
GitOrigin-RevId: 403d4f5900a8c4ccdc64032d365adb285a191b71
2026-05-20 08:06:23 +00:00

146 lines
4.2 KiB
TypeScript

import { ProjectSnapshot } from '@/infrastructure/project-snapshot'
import { generateSHA1Hash } from '@/shared/utils/sha1'
import { AdvancedReferenceSearchResult, Changes } from './types'
import { debugConsole } from '@/utils/debugging'
import type { ReferenceWorkerResponse } from './references.worker'
import { v4 as uuid } from 'uuid'
const ONE_MB = 1024 * 1024
const MAX_BIB_DATA_SIZE = 6 * ONE_MB
export class ReferenceIndexer {
private fileIndexHash: Map<string, string> = new Map()
private worker: Worker
private updateResolve: ((result: Set<string>) => void) | null = null
private searchResolvers = new Map<
string,
(result: AdvancedReferenceSearchResult) => void
>()
constructor() {
this.worker = new Worker(
/* webpackChunkName: "references-worker" */
new URL('./references.worker.ts', import.meta.url),
{ type: 'module' }
)
this.worker.addEventListener('message', evt => this.handleMessage(evt))
}
private handleMessage(event: MessageEvent) {
const data = event.data as ReferenceWorkerResponse
if (data.type === 'searchResult') {
const searchResolver = this.searchResolvers.get(data.id)
if (searchResolver) {
searchResolver(data.result)
this.searchResolvers.delete(data.id)
}
} else if (data.type === 'updateKeys' && this.updateResolve) {
this.updateResolve(data.keys)
this.updateResolve = null
} else {
debugConsole.warn('Received unknown message from worker:', data.type)
}
}
async updateFromSnapshot(
snapshot: Pick<
ProjectSnapshot,
| 'getDocPaths'
| 'getDocContents'
| 'getBinaryFilePathsWithHash'
| 'getBinaryFileContents'
>,
{
dataLimit = MAX_BIB_DATA_SIZE,
signal,
}: { dataLimit?: number; signal: AbortSignal }
): Promise<Set<string>> {
const nextFileHashIndex = new Map(this.fileIndexHash)
const previousPaths = new Set(this.fileIndexHash.keys())
let dataBudget = dataLimit
const docs = snapshot
.getDocPaths()
.filter(path => path.toLowerCase().endsWith('.bib'))
const changes: Changes = { updates: [], deletes: [] }
for (const path of docs) {
previousPaths.delete(path)
if (dataBudget <= 0) {
continue
}
const content = snapshot.getDocContents(path)?.slice(0, dataBudget)
if (content == null) {
continue
}
dataBudget -= content.length
const hash = generateSHA1Hash(content)
const possibleMatch = nextFileHashIndex.get(path)
if (possibleMatch === undefined || possibleMatch !== hash) {
// New or changed file
nextFileHashIndex.set(path, hash)
changes.updates.push({ path, content })
}
}
const files = snapshot
.getBinaryFilePathsWithHash()
.filter(({ path }) => path.toLowerCase().endsWith('.bib'))
.sort((a, b) => a.size - b.size)
for (const { path, hash, size } of files) {
if (signal.aborted) {
debugConsole.warn('Aborted indexing references due to signal')
return new Set()
}
previousPaths.delete(path)
if (nextFileHashIndex.get(path) === hash) {
dataBudget -= size
// Already indexed
continue
}
if (dataBudget <= 0) {
continue
}
const content = await snapshot.getBinaryFileContents(path, {
maxSize: dataBudget,
})
dataBudget -= content.length
nextFileHashIndex.set(path, hash)
changes.updates.push({ path, content })
}
previousPaths.forEach(path => {
// Deleted file
changes.deletes.push(path)
nextFileHashIndex.delete(path)
})
if (dataBudget <= 0) {
debugConsole.warn('Data budget exceeded while updating references index')
}
this.fileIndexHash = nextFileHashIndex
this.worker.postMessage({
type: 'update',
changes,
})
return new Promise(resolve => {
this.updateResolve = resolve
})
}
async search(
query: string,
id: string = uuid()
): Promise<AdvancedReferenceSearchResult> {
this.worker.postMessage({ id, type: 'search', query })
const { promise, resolve } =
Promise.withResolvers<AdvancedReferenceSearchResult>()
this.searchResolvers.set(id, resolve)
return promise
}
}