[clsi] migrate convert project to document to compile from history (#33985)

* [clsi] add request flag for isCompileFromHistory

* [clsi] derive cacheKey for history snapshot from compile dir

* [clsi] migrate convert project to document to compile from history

* [clsi] address review feedback

* [web] determine root doc at the time of converting the project

* [web] wait for flush before starting document conversion

* [saas-e2e] add tests for root doc override when converting project

GitOrigin-RevId: 71c578030949b89f3a74e7f7ab882dfa9c98c17a
This commit is contained in:
Jakob Ackermann
2026-05-28 08:40:49 +02:00
committed by Copybot
parent 666788be70
commit 7e4820f0b0
13 changed files with 215 additions and 68 deletions
+2 -2
View File
@@ -107,7 +107,7 @@ async function doCompile(request, stats, timings) {
let resourceList, baseHistoryVersion
try {
if (request.rawChangeOperations) {
if (request.isCompileFromHistory) {
;({ resourceList, baseHistoryVersion } =
await HistoryResourceWriter.syncResourcesToDisk(
projectId,
@@ -862,7 +862,7 @@ function _emitMetrics(request, status, stats, timings) {
if (timings.compileE2E != null) {
ClsiMetrics.e2eCompileDurationSeconds.observe(
{
compileFromHistory: !!request.rawChangeOperations,
compileFromHistory: request.isCompileFromHistory,
compile: request.metricsOpts.compile,
group: request.compileGroup,
},
+47 -6
View File
@@ -3,6 +3,9 @@ import logger from '@overleaf/logger'
import { expressify } from '@overleaf/promise-utils'
import fs from 'node:fs/promises'
import fsSync from 'node:fs'
import Metrics from '@overleaf/metrics'
import * as HistoryResourceWriter from './HistoryResourceWriter.js'
import Errors from './Errors.js'
import ConversionManager from './ConversionManager.js'
import ConversionOutputCleaner from './ConversionOutputCleaner.js'
import OutputCacheManager from './OutputCacheManager.js'
@@ -80,6 +83,7 @@ async function convertProjectToDocument(req, res) {
return res.sendStatus(404)
}
const { user_id: userId, project_id: projectId } = req.params
const type = req.query.type
if (!Object.hasOwn(CONVERSION_CONFIGS, type)) {
return res.sendStatus(400)
@@ -87,27 +91,58 @@ async function convertProjectToDocument(req, res) {
const config = CONVERSION_CONFIGS[type]
const request = await RequestParser.promises.parse(req.body)
request.project_id = req.params.project_id
request.user_id = req.params.user_id
request.project_id = projectId
request.user_id = userId
request.metricsOpts = {}
const responseFormat = req.query.responseFormat === 'json' ? 'json' : 'stream'
const conversionId = crypto.randomUUID()
const conversionDir = Path.join(Settings.path.compilesDir, conversionId)
const conversionCacheDir = Path.join(Settings.path.clsiCacheDir, conversionId)
const projectCacheDir = Path.join(Settings.path.clsiCacheDir, projectId)
const cleanupDirs = [conversionCacheDir, conversionDir]
logger.debug(
{
projectId: request.project_id,
userId: request.user_id,
projectId,
userId,
rootResourcePath: request.rootResourcePath,
type,
},
'syncing resources for project-to-document conversion'
)
Metrics.inc('convert_project_to_document', 1, {
compileFromHistory: request.isCompileFromHistory,
method: type,
})
try {
await ResourceWriter.promises.syncResourcesToDisk(request, conversionDir)
if (await fs.mkdir(projectCacheDir, { recursive: true })) {
// Newly created. Cleanup behind us.
cleanupDirs.push(projectCacheDir)
}
if (request.isCompileFromHistory) {
await fs.mkdir(conversionDir)
try {
await HistoryResourceWriter.syncResourcesToDisk(
projectId,
userId,
request,
conversionDir,
{}
)
} catch (err) {
if (err instanceof Errors.MissingUpdatesError) {
return res.status(409).json({
baseHistoryVersion: err.info.baseHistoryVersion,
})
}
throw err
}
} else {
await ResourceWriter.promises.syncResourcesToDisk(request, conversionDir)
}
const documentPath =
await ConversionManager.promises.convertLaTeXToDocumentInDirWithLock(
@@ -160,7 +195,13 @@ async function convertProjectToDocument(req, res) {
throw err
}
} finally {
await fs.rm(conversionDir, { recursive: true, force: true }).catch(() => {})
for (const dir of cleanupDirs) {
try {
await fs.rm(dir, { recursive: true, force: true })
} catch (err) {
logger.warn({ err, dir }, 'cleanup failed')
}
}
}
}
+43 -31
View File
@@ -34,31 +34,28 @@ export const clearCacheCb = callbackify(clearCache)
/**
* @param {string} projectId
* @param {string} userId
* @param {string} cacheKey
* @return {Promise<void>}
*/
export async function clearCache(projectId, userId) {
const { dir } = snapshotPath(projectId, userId)
export async function clearCache(projectId, userId, cacheKey) {
const { dir } = snapshotPath(cacheKey)
try {
await fs.promises.rm(dir, { recursive: true, force: true })
} catch (err) {
if (isENOENT(err)) return
logger.warn(
{ err, projectId, userId },
{ err, projectId, userId, cacheKey },
'compile from cache: failed to clear history cache'
)
}
}
/**
* @param {string} projectId
* @param {string} userId
* @param {string} cacheKey
* @return {{ dir: string, path: string, resyncPath: string }}
*/
function snapshotPath(projectId, userId) {
const dir = Path.join(
Settings.path.clsiCacheDir,
userId ? `${projectId}-${userId}` : projectId
)
function snapshotPath(cacheKey) {
const dir = Path.join(Settings.path.clsiCacheDir, cacheKey)
const path = Path.join(dir, 'history.json.gz')
const resyncPath = Path.join(dir, 'history-resync.json.gz')
@@ -76,6 +73,7 @@ function isENOENT(err) {
/**
* @param {string} projectId
* @param {string} userId
* @param {string} cacheKey
* @param {number} remoteBaseVersion
* @param {boolean} populateClsiCache
* @return {Promise<{rawSnapshot: import('overleaf-editor-core/lib/types.js').RawSnapshot, globalBlobs: string[], fullSync: boolean,localBaseVersion: number, dirty: string[]}>}
@@ -83,10 +81,11 @@ function isENOENT(err) {
async function loadSnapshot(
projectId,
userId,
cacheKey,
remoteBaseVersion,
populateClsiCache
) {
const { path, resyncPath } = snapshotPath(projectId, userId)
const { path, resyncPath } = snapshotPath(cacheKey)
let maxLocalBaseVersion = -1
for (const candidate of [path, resyncPath]) {
try {
@@ -100,7 +99,7 @@ async function loadSnapshot(
)
} else if (!isENOENT(err)) {
logger.warn(
{ err, projectId, userId },
{ err, projectId, userId, cacheKey },
'compile from cache: cannot read history from disk'
)
}
@@ -111,6 +110,7 @@ async function loadSnapshot(
return await loadSnapshotFromClsiCache(
projectId,
userId,
cacheKey,
remoteBaseVersion
)
} catch (err) {
@@ -121,7 +121,7 @@ async function loadSnapshot(
)
} else if (!isENOENT(err)) {
logger.warn(
{ err, projectId, userId },
{ err, projectId, userId, cacheKey },
'compile from cache: cannot download from clsi-cache'
)
}
@@ -135,11 +135,17 @@ async function loadSnapshot(
/**
* @param {string} projectId
* @param {string} userId
* @param {string} cacheKey
* @param {number} remoteBaseVersion
* @return {Promise<{rawSnapshot: import('overleaf-editor-core/lib/types.js').RawSnapshot, globalBlobs: string[], fullSync: boolean,localBaseVersion: number, dirty: string[]}>}
*/
async function loadSnapshotFromClsiCache(projectId, userId, remoteBaseVersion) {
const { dir, resyncPath } = snapshotPath(projectId, userId)
async function loadSnapshotFromClsiCache(
projectId,
userId,
cacheKey,
remoteBaseVersion
) {
const { dir, resyncPath } = snapshotPath(cacheKey)
await fs.promises.mkdir(dir, { recursive: true })
const ok = await CLSICacheHandler.downloadHistorySnapshot(
projectId,
@@ -182,8 +188,7 @@ async function loadSnapshotFromFile(path, remoteBaseVersion, fullSync) {
}
/**
* @param {string} projectId
* @param {string} userId
* @param {string} cacheKey
* @param {Snapshot} snapshot
* @param {number} localBaseVersion
* @param {string[]} globalBlobs
@@ -191,14 +196,13 @@ async function loadSnapshotFromFile(path, remoteBaseVersion, fullSync) {
* @return {Promise<void>}
*/
async function saveSnapshot(
projectId,
userId,
cacheKey,
snapshot,
localBaseVersion,
globalBlobs,
dirty
) {
const { dir, path } = snapshotPath(projectId, userId)
const { dir, path } = snapshotPath(cacheKey)
await fs.promises.mkdir(dir, { recursive: true })
const tmp = path + '~'
await fs.promises.writeFile(
@@ -221,16 +225,17 @@ async function saveSnapshot(
/**
* @param {string} projectId
* @param {string} userId
* @param {string} cacheKey
* @return {Promise<void>}
*/
async function deleteResyncSnapshot(projectId, userId) {
const { resyncPath } = snapshotPath(projectId, userId)
async function deleteResyncSnapshot(projectId, userId, cacheKey) {
const { resyncPath } = snapshotPath(cacheKey)
try {
await fs.promises.unlink(resyncPath)
} catch (err) {
if (!isENOENT(err)) {
logger.warn(
{ err, projectId, userId },
{ err, projectId, userId, cacheKey },
'compile from cache: failed to clear history-resync.json.gz'
)
}
@@ -367,6 +372,10 @@ export async function syncResourcesToDisk(
compileDir,
timings
) {
// - logged in user: <project-id>-<user-id>
// - anonymous user: <project-id>
// - conversion job: <uuid>
const cacheKey = Path.basename(compileDir)
const remoteBaseVersion = request.baseHistoryVersion
let rawSnapshot, globalBlobs, localBaseVersion, source, dirty, fullSync
try {
@@ -374,24 +383,25 @@ export async function syncResourcesToDisk(
await loadSnapshot(
projectId,
userId,
cacheKey,
remoteBaseVersion,
request.populateClsiCache
))
source = fullSync ? 'clsi-cache' : 'local'
logger.debug(
{ projectId, userId, localBaseVersion, remoteBaseVersion },
{ projectId, userId, cacheKey, localBaseVersion, remoteBaseVersion },
'compile from cache: using existing snapshot'
)
} catch (err) {
if (!request.rawSnapshot) throw err
if (!(err instanceof Errors.MissingUpdatesError)) {
logger.warn(
{ err, projectId, userId },
{ err, projectId, userId, cacheKey },
'compile from cache: bad local history state during full resync'
)
}
logger.debug(
{ projectId, userId },
{ projectId, userId, cacheKey },
'compile from cache: using incoming snapshot'
)
source = 'remote'
@@ -424,7 +434,10 @@ export async function syncResourcesToDisk(
const changedPaths = []
if (fullSync) {
changedPaths.push(...snapshot.getFilePathnames())
logger.debug({ projectId, userId }, 'compile from cache: full sync')
logger.debug(
{ projectId, userId, cacheKey },
'compile from cache: full sync'
)
} else {
const dedupe = new Set(dirty)
if (request.draft) {
@@ -448,7 +461,7 @@ export async function syncResourcesToDisk(
}
changedPaths.push(...dedupe)
logger.debug(
{ projectId, userId, changedPaths },
{ projectId, userId, cacheKey, changedPaths },
'compile from cache: incremental sync'
)
}
@@ -541,8 +554,7 @@ export async function syncResourcesToDisk(
const baseHistoryVersion = localBaseVersion + changes.length
if (fullSync || changes.length || wasDirty || dirty.length) {
await saveSnapshot(
projectId,
userId,
cacheKey,
snapshot,
baseHistoryVersion,
globalBlobs,
@@ -550,7 +562,7 @@ export async function syncResourcesToDisk(
)
}
if (fullSync) {
await deleteResyncSnapshot(projectId, userId)
await deleteResyncSnapshot(projectId, userId, cacheKey)
}
return {
baseHistoryVersion,
@@ -219,7 +219,8 @@ export default ProjectPersistenceManager = {
logger.debug({ projectId, userId }, 'clearing project for user')
return CompileManager.clearProject(projectId, userId, function (error) {
if (error) return callback(error)
HistoryResourceWriter.clearCacheCb(projectId, userId, error => {
const cacheKey = userId ? `${projectId}-${userId}` : projectId
HistoryResourceWriter.clearCacheCb(projectId, userId, cacheKey, error => {
if (error) return callback(error)
ProjectPersistenceManager.clearProjectFromCache(
projectId,
+1
View File
@@ -162,6 +162,7 @@ function parse(body, callback) {
// The snapshot and changes are validated when loading them in editor-core.
response.rawSnapshot = compile.rawSnapshot
response.rawChangeOperations = compile.rawChangeOperations
response.isCompileFromHistory = !!response.rawChangeOperations
// v1 conversions / submissions
if (compile.filestoreBlobPrefix) {
@@ -18,7 +18,11 @@ describe('ConversionController', function () {
ctx.documentStat = { size: 5678 }
ctx.Settings = {
enablePandocConversions: true,
path: { compilesDir: '/compiles', outputDir: '/output' },
path: {
compilesDir: '/compiles',
outputDir: '/output',
clsiCacheDir: '/cache',
},
}
ctx.OutputCacheManager = {
CACHE_SUBDIR: 'generated-files',
@@ -43,6 +47,13 @@ describe('ConversionController', function () {
syncResourcesToDisk: sinon.stub().resolves(),
},
}
ctx.HistoryResourceWriter = {
promises: {
syncResourcesToDisk: sinon.stub().resolves(),
},
}
ctx.RequestParser = {
promises: {
parse: sinon.stub().resolves(ctx.parsedRequest),
@@ -87,6 +98,11 @@ describe('ConversionController', function () {
default: ctx.ResourceWriter,
}))
vi.doMock(
'../../../app/js/HistoryResourceWriter',
() => ctx.HistoryResourceWriter
)
vi.doMock('../../../app/js/RequestParser', () => ({
default: ctx.RequestParser,
}))
@@ -1174,19 +1174,14 @@ function _finaliseRequest(projectId, options, project, docs, files) {
}
}
async function buildDocumentConversionRequest(projectId) {
const project = await ProjectGetter.promises.getProject(projectId, {
compiler: 1,
imageName: 1,
'overleaf.history.id': 1,
rootDoc_id: 1,
rootFolder: 1,
async function buildDocumentConversionRequest(projectId, userId, options) {
return await _buildRequest(projectId, userId, {
...options,
// Use the history snapshot as populated on clsi-cache.
populateClsiCache: true,
// Read from mongo directly, skip redis.
incrementalCompilesEnabled: false,
})
if (project == null) {
throw new Errors.NotFoundError(`project does not exist: ${projectId}`)
}
const projectStateHash = ClsiStateManager.computeHash(project, {})
return _buildRequestFromMongo(projectId, {}, project, projectStateHash)
}
async function wordCount(projectId, userId, file, limits, clsiserverid) {
@@ -27,6 +27,7 @@ const exportProjectConversionSchema = z.object({
}),
query: z.object({
responseFormat: z.enum(['json', 'stream']).optional().default('stream'),
rootResourcePath: zz.filepath().optional(),
}),
})
@@ -75,17 +76,25 @@ async function _streamConvertedDocumentToResponse(
async function exportProjectConversion(req, res) {
const { params, query } = parseReq(req, exportProjectConversionSchema)
const { Project_id: projectId, type } = params
const { responseFormat } = query
const { responseFormat, rootResourcePath } = query
const userId = SessionManager.getLoggedInUserId(req.session)
Metrics.inc('document-exports', 1, { type })
const compileFromHistory = await SplitTestHandler.promises.featureFlagEnabled(
req,
res,
'compile-from-history',
{ includeReferer: true }
)
let conversionResult
try {
conversionResult =
await DocumentConversionManager.promises.convertProjectToDocument(
projectId,
userId,
type
type,
{ compileFromHistory, rootResourcePath }
)
AnalyticsManager.recordEventForUserInBackground(userId, 'convert-format', {
sourceFormat: 'latex',
@@ -9,6 +9,7 @@ import Path from 'node:path'
import {
fetchJsonWithResponse,
fetchStreamWithResponse,
RequestFailedError,
} from '@overleaf/fetch-utils'
import { pipeline } from 'node:stream/promises'
import OError from '@overleaf/o-error'
@@ -81,10 +82,59 @@ async function convertDocumentToLaTeXZipArchive(path, userId, conversionType) {
return outputPath
}
async function convertProjectToDocument(projectId, userId, type) {
/**
* @param {string} projectId
* @param {string} userId
* @param {string} type
* @param {Object} options
* @param {boolean} options.compileFromHistory
* @param {string} options.rootResourcePath
* @return {Promise<{conversionId: string, buildId: string, clsiServerId: string|null, file: string}>}
*/
async function convertProjectToDocument(projectId, userId, type, options) {
const limits = await CompileManager.promises._getUserCompileLimits(userId)
const clsiRequest =
await ClsiManager.promises.buildDocumentConversionRequest(projectId)
try {
return await convertProjectToDocumentOnce(
projectId,
userId,
type,
limits,
options
)
} catch (err) {
if (
options.compileFromHistory &&
err instanceof RequestFailedError &&
err.response.status === 409
) {
let baseHistoryVersion = -1
try {
;({ baseHistoryVersion } = JSON.parse(err.body))
} catch {}
return await convertProjectToDocumentOnce(
projectId,
userId,
type,
limits,
{ ...options, baseHistoryVersion }
)
}
throw err
}
}
async function convertProjectToDocumentOnce(
projectId,
userId,
type,
limits,
options
) {
const clsiRequest = await ClsiManager.promises.buildDocumentConversionRequest(
projectId,
userId,
options
)
const clsiUrl = new URL(Settings.apis.clsi.url)
clsiUrl.pathname = `/project/${projectId}/user/${userId}/download/project-to-document`
@@ -4,6 +4,8 @@ import { FC } from 'react'
import useConvertProject from '../../hooks/use-convert-project'
import { useCommandProvider } from '../../hooks/use-command-provider'
import OLDropdownMenuItem from '@/shared/components/ol/ol-dropdown-menu-item'
import { useRootDoc } from '@/shared/hooks/use-root-doc'
import { useEditorManagerContext } from '@/features/ide-react/context/editor-manager-context'
type ExportProjectWithConversionProps = {
featureFlag?: string
@@ -20,7 +22,13 @@ export const ExportProjectWithConversionButton: FC<
const enablePandocConversions =
getMeta('ol-ExposedSettings')?.enablePandocConversions
const anonymous = getMeta('ol-anonymous')
const downloadConversion = useConvertProject(conversionType)
const getRootDocInfo = useRootDoc()
const { openDocs } = useEditorManagerContext()
const downloadConversion = useConvertProject(
conversionType,
openDocs,
getRootDocInfo
)
const showExportButton =
splitTestEnabledIfNeeded && enablePandocConversions && !anonymous
@@ -9,13 +9,19 @@ import {
showExportDocumentSuccess,
showPreparingExportToast,
} from '../components/toolbar/export-document-toasts'
import { RootDocInfo } from '@/shared/hooks/use-root-doc'
import { OpenDocuments } from '../editor/open-documents'
const SLOW_CONVERSION_THRESHOLD = 2000
export default function useConvertProject(type: 'docx' | 'markdown') {
export default function useConvertProject(
type: 'docx' | 'markdown',
openDocs: OpenDocuments,
getRootDocInfo: () => RootDocInfo
) {
const { projectId } = useProjectContext()
const location = useLocation()
const triggerConversion = useCallback(async () => {
return useCallback(async () => {
let handle: string | undefined
const toastTimer = setTimeout(() => {
handle = showPreparingExportToast()
@@ -24,10 +30,14 @@ export default function useConvertProject(type: 'docx' | 'markdown') {
clearTimeout(toastTimer)
if (handle) hidePreparingExportToast(handle)
}
const url = new URL(window.location.origin)
url.pathname = `/project/${projectId}/download/conversion/${type}`
url.searchParams.set('responseFormat', 'json')
const { rootResourcePath } = getRootDocInfo()
url.searchParams.set('rootResourcePath', rootResourcePath)
try {
const response = await getJSON(
`/project/${projectId}/download/conversion/${type}?responseFormat=json`
)
await openDocs.awaitBufferedOps(AbortSignal.timeout(10_000))
const response = await getJSON(url.href)
hidePreparingToast()
const { downloadUrl } = response
if (downloadUrl) {
@@ -42,7 +52,5 @@ export default function useConvertProject(type: 'docx' | 'markdown') {
showExportDocumentError()
debugConsole.error(error)
}
}, [projectId, type, location])
return triggerConversion
}, [projectId, type, getRootDocInfo, openDocs, location])
}
@@ -164,7 +164,10 @@ export default class DocumentCompiler {
// unset the error before it's set again later, so that components are recreated and events are tracked
this.setError(undefined)
data.options = options
data.options = {
...options,
rootResourcePath,
}
data.rootDocId = rootDocId
if (data.clsiServerId) {
this.clsiServerId = data.clsiServerId
@@ -78,6 +78,9 @@ describe('ProjectDownloadsController', function () {
() => ({
default: (ctx.SplitTestHandler = {
featureFlagEnabled: sinon.stub().yields(null, false),
promises: {
featureFlagEnabled: sinon.stub().resolves(false),
},
}),
})
)