c5883e5954
Build and Deploy Verso / deploy (push) Successful in 14m10s
thumbnailFromBuild() now tries output.pdf → output-slides.pdf → decktape on output.html (slide 1 only). The web service's ThumbnailManager already calls this endpoint fire-and-forget on every successful compile, so RevealJS project cards will show the first slide thumbnail automatically. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
373 lines
11 KiB
JavaScript
373 lines
11 KiB
JavaScript
import crypto from 'node:crypto'
|
|
import { execFile } from 'node:child_process'
|
|
import os from 'node:os'
|
|
import { promisify } from 'node:util'
|
|
import logger from '@overleaf/logger'
|
|
import { expressify } from '@overleaf/promise-utils'
|
|
import fs from 'node:fs/promises'
|
|
import fsSync from 'node:fs'
|
|
import Metrics from '@overleaf/metrics'
|
|
import * as HistoryResourceWriter from './HistoryResourceWriter.js'
|
|
import Errors from './Errors.js'
|
|
import ConversionManager from './ConversionManager.js'
|
|
import ConversionOutputCleaner from './ConversionOutputCleaner.js'
|
|
import OutputCacheManager from './OutputCacheManager.js'
|
|
import ResourceWriter from './ResourceWriter.js'
|
|
import RequestParser from './RequestParser.js'
|
|
import { pipeline } from 'node:stream/promises'
|
|
import Settings from '@overleaf/settings'
|
|
import Path from 'node:path'
|
|
import { z } from '@overleaf/validation-tools'
|
|
|
|
const execFileAsync = promisify(execFile)
|
|
|
|
const CONVERSION_CONFIGS = {
|
|
docx: { extension: 'docx' },
|
|
markdown: { extension: 'zip' },
|
|
html: { extension: 'zip' },
|
|
}
|
|
|
|
async function convertDocumentToLaTeX(req, res) {
|
|
const { path } = req.file
|
|
const conversionType = req.query.type
|
|
if (!Settings.enablePandocConversions) {
|
|
await fs.unlink(path).catch(() => {})
|
|
return res.sendStatus(404)
|
|
}
|
|
if (!conversionType || !['docx', 'markdown'].includes(conversionType)) {
|
|
await fs.unlink(path).catch(() => {})
|
|
return res.sendStatus(400)
|
|
}
|
|
logger.debug({ path, conversionType }, 'received file for conversion')
|
|
const conversionId = crypto.randomUUID()
|
|
let zipPath
|
|
try {
|
|
zipPath = await ConversionManager.promises.convertToLaTeXWithLock(
|
|
conversionId,
|
|
path,
|
|
conversionType
|
|
)
|
|
} catch (err) {
|
|
if (err instanceof Errors.ConversionError) {
|
|
if (err.isUserFacing) {
|
|
return res.status(422).json({
|
|
error: err.stderr,
|
|
exitCode: err.exitCode,
|
|
})
|
|
} else {
|
|
logger.warn(
|
|
{ err, conversionType, stderr: err.stderr },
|
|
'Conversion failed with non-user-facing error'
|
|
)
|
|
return res.status(422).json({})
|
|
}
|
|
} else {
|
|
throw err
|
|
}
|
|
} finally {
|
|
await fs.unlink(path).catch(() => {})
|
|
}
|
|
|
|
try {
|
|
const zipStat = await fs.stat(zipPath)
|
|
|
|
res.setHeader('Content-Length', zipStat.size)
|
|
res.attachment('conversion.zip')
|
|
res.setHeader('X-Content-Type-Options', 'nosniff')
|
|
|
|
const readStream = fsSync.createReadStream(zipPath)
|
|
await pipeline(readStream, res)
|
|
} finally {
|
|
await fs
|
|
.rm(Path.dirname(zipPath), { recursive: true, force: true })
|
|
.catch(() => {})
|
|
}
|
|
}
|
|
|
|
const PDFToJPEGQuerySchema = z.object({
|
|
mode: z.enum(['preview', 'thumbnail']),
|
|
})
|
|
|
|
async function convertPDFToJPEG(req, res) {
|
|
const { path } = req.file
|
|
if (!Settings.enablePdfConversions) {
|
|
await fs.unlink(path).catch(() => {})
|
|
return res.sendStatus(404)
|
|
}
|
|
const parsed = PDFToJPEGQuerySchema.safeParse(req.query)
|
|
if (!parsed.success) {
|
|
await fs.unlink(path).catch(() => {})
|
|
return res.sendStatus(400)
|
|
}
|
|
const { mode } = parsed.data
|
|
logger.debug({ path, mode }, 'received pdf for conversion to jpeg')
|
|
const conversionId = crypto.randomUUID()
|
|
let jpegPath
|
|
try {
|
|
jpegPath = await ConversionManager.promises.convertPDFToJPEGWithLock(
|
|
conversionId,
|
|
path,
|
|
mode
|
|
)
|
|
} finally {
|
|
await fs.unlink(path).catch(() => {})
|
|
}
|
|
|
|
try {
|
|
const jpegStat = await fs.stat(jpegPath)
|
|
|
|
res.setHeader('Content-Length', jpegStat.size)
|
|
res.attachment('output.jpg')
|
|
res.setHeader('X-Content-Type-Options', 'nosniff')
|
|
|
|
const readStream = fsSync.createReadStream(jpegPath)
|
|
await pipeline(readStream, res)
|
|
} finally {
|
|
await fs
|
|
.rm(Path.dirname(jpegPath), { recursive: true, force: true })
|
|
.catch(() => {})
|
|
}
|
|
}
|
|
|
|
async function convertProjectToDocument(req, res) {
|
|
if (!Settings.enablePandocConversions) {
|
|
return res.sendStatus(404)
|
|
}
|
|
|
|
const { user_id: userId, project_id: projectId } = req.params
|
|
const type = req.query.type
|
|
if (!Object.hasOwn(CONVERSION_CONFIGS, type)) {
|
|
return res.sendStatus(400)
|
|
}
|
|
const config = CONVERSION_CONFIGS[type]
|
|
|
|
const request = await RequestParser.promises.parse(req.body)
|
|
request.project_id = projectId
|
|
request.user_id = userId
|
|
request.metricsOpts = {}
|
|
|
|
const responseFormat = req.query.responseFormat === 'json' ? 'json' : 'stream'
|
|
|
|
const conversionId = crypto.randomUUID()
|
|
const conversionDir = Path.join(Settings.path.compilesDir, conversionId)
|
|
const conversionCacheDir = Path.join(Settings.path.clsiCacheDir, conversionId)
|
|
const projectCacheDir = Path.join(Settings.path.clsiCacheDir, projectId)
|
|
const cleanupDirs = [conversionCacheDir, conversionDir]
|
|
|
|
logger.debug(
|
|
{
|
|
projectId,
|
|
userId,
|
|
rootResourcePath: request.rootResourcePath,
|
|
type,
|
|
},
|
|
'syncing resources for project-to-document conversion'
|
|
)
|
|
Metrics.inc('convert_project_to_document', 1, {
|
|
compileFromHistory: request.isCompileFromHistory,
|
|
method: type,
|
|
})
|
|
|
|
try {
|
|
if (await fs.mkdir(projectCacheDir, { recursive: true })) {
|
|
// Newly created. Cleanup behind us.
|
|
cleanupDirs.push(projectCacheDir)
|
|
}
|
|
if (request.isCompileFromHistory) {
|
|
await fs.mkdir(conversionDir)
|
|
try {
|
|
await HistoryResourceWriter.syncResourcesToDisk(
|
|
projectId,
|
|
userId,
|
|
request,
|
|
conversionDir,
|
|
{}
|
|
)
|
|
} catch (err) {
|
|
if (err instanceof Errors.MissingUpdatesError) {
|
|
return res.status(409).json({
|
|
baseHistoryVersion: err.info.baseHistoryVersion,
|
|
})
|
|
}
|
|
throw err
|
|
}
|
|
} else {
|
|
await ResourceWriter.promises.syncResourcesToDisk(request, conversionDir)
|
|
}
|
|
|
|
const documentPath =
|
|
await ConversionManager.promises.convertLaTeXToDocumentInDirWithLock(
|
|
conversionId,
|
|
conversionDir,
|
|
request.rootResourcePath,
|
|
type
|
|
)
|
|
|
|
const outputName = `output.${config.extension}`
|
|
if (responseFormat === 'json') {
|
|
// TODO: drop the streaming branch once web is migrated to the two-step flow
|
|
const buildId = await OutputCacheManager.promises.generateBuildId()
|
|
const buildDir = Path.join(
|
|
Settings.path.outputDir,
|
|
conversionId,
|
|
OutputCacheManager.CACHE_SUBDIR,
|
|
buildId
|
|
)
|
|
try {
|
|
await fs.mkdir(buildDir, { recursive: true })
|
|
await fs.copyFile(documentPath, Path.join(buildDir, outputName))
|
|
res.json({ conversionId, buildId, file: outputName })
|
|
} finally {
|
|
ConversionOutputCleaner.scheduleCleanup(conversionId)
|
|
}
|
|
} else {
|
|
const documentStat = await fs.stat(documentPath)
|
|
res.setHeader('Content-Length', documentStat.size)
|
|
res.attachment(outputName)
|
|
res.setHeader('X-Content-Type-Options', 'nosniff')
|
|
const readStream = fsSync.createReadStream(documentPath)
|
|
await pipeline(readStream, res)
|
|
}
|
|
} catch (err) {
|
|
if (err instanceof Errors.ConversionError) {
|
|
if (err.isUserFacing) {
|
|
return res.status(422).json({
|
|
error: err.stderr,
|
|
exitCode: err.exitCode,
|
|
})
|
|
} else {
|
|
logger.warn(
|
|
{ err, type, stderr: err.stderr },
|
|
'Conversion failed with non-user-facing error'
|
|
)
|
|
return res.status(422).json({})
|
|
}
|
|
} else {
|
|
throw err
|
|
}
|
|
} finally {
|
|
for (const dir of cleanupDirs) {
|
|
try {
|
|
await fs.rm(dir, { recursive: true, force: true })
|
|
} catch (err) {
|
|
logger.warn({ err, dir }, 'cleanup failed')
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Generates a JPEG thumbnail of page 1 of the compiled output using
|
|
// pdftocairo (poppler-utils). Tries output.pdf first (LaTeX / Quarto-PDF),
|
|
// then output-slides.pdf (Quarto RevealJS after a PDF-export compile), then
|
|
// falls back to rendering slide 1 of output.html via decktape (normal
|
|
// RevealJS preview compile). All temp dirs are cleaned up in finally.
|
|
async function thumbnailFromBuild(req, res) {
|
|
const { project_id: projectId, user_id: userId, build_id: buildId } = req.params
|
|
if (!buildId?.match(OutputCacheManager.BUILD_REGEX)) return res.sendStatus(400)
|
|
|
|
const compileName = userId ? `${projectId}-${userId}` : projectId
|
|
const buildDir = Path.join(
|
|
Settings.path.outputDir,
|
|
compileName,
|
|
OutputCacheManager.CACHE_SUBDIR,
|
|
buildId
|
|
)
|
|
|
|
let pdfPath = null
|
|
let deckTapeDir = null
|
|
|
|
for (const name of ['output.pdf', 'output-slides.pdf']) {
|
|
try {
|
|
const p = Path.join(buildDir, name)
|
|
await fs.access(p)
|
|
pdfPath = p
|
|
break
|
|
} catch {}
|
|
}
|
|
|
|
if (!pdfPath) {
|
|
const htmlPath = Path.join(buildDir, 'output.html')
|
|
try {
|
|
await fs.access(htmlPath)
|
|
deckTapeDir = await fs.mkdtemp(Path.join(os.tmpdir(), 'clsi-deck-'))
|
|
const chromeHome = Path.join(deckTapeDir, 'chrome')
|
|
await fs.mkdir(chromeHome, { recursive: true })
|
|
const slidePdf = Path.join(deckTapeDir, 'slide1.pdf')
|
|
await execFileAsync(
|
|
'decktape',
|
|
[
|
|
'--slides', '1',
|
|
'--chrome-arg=--no-sandbox',
|
|
'--chrome-arg=--disable-dev-shm-usage',
|
|
'--chrome-arg=--disable-gpu',
|
|
`--chrome-arg=--user-data-dir=${chromeHome}/data`,
|
|
htmlPath,
|
|
slidePdf,
|
|
],
|
|
{
|
|
timeout: 60000,
|
|
env: {
|
|
...process.env,
|
|
HOME: chromeHome,
|
|
XDG_CONFIG_HOME: chromeHome,
|
|
XDG_CACHE_HOME: chromeHome,
|
|
},
|
|
}
|
|
)
|
|
pdfPath = slidePdf
|
|
} catch (err) {
|
|
logger.warn({ err, projectId, buildId }, 'decktape slide1 thumbnail failed')
|
|
if (deckTapeDir) {
|
|
await fs.rm(deckTapeDir, { recursive: true, force: true }).catch(() => {})
|
|
deckTapeDir = null
|
|
}
|
|
return res.sendStatus(404)
|
|
}
|
|
}
|
|
|
|
const tmpDir = await fs.mkdtemp(Path.join(os.tmpdir(), 'clsi-thumb-'))
|
|
const outputBase = Path.join(tmpDir, 'thumb')
|
|
const jpegPath = outputBase + '.jpg'
|
|
|
|
try {
|
|
await execFileAsync(
|
|
'pdftocairo',
|
|
[
|
|
'-jpeg',
|
|
'-jpegopt', 'quality=82',
|
|
'-singlefile',
|
|
'-scale-to-x', '380',
|
|
'-scale-to-y', '-1',
|
|
'-f', '1',
|
|
'-l', '1',
|
|
pdfPath,
|
|
outputBase,
|
|
],
|
|
{ timeout: 30000 }
|
|
)
|
|
|
|
const jpegStat = await fs.stat(jpegPath)
|
|
res.setHeader('Content-Type', 'image/jpeg')
|
|
res.setHeader('Content-Length', jpegStat.size)
|
|
res.setHeader('Cache-Control', 'public, max-age=86400')
|
|
res.setHeader('X-Content-Type-Options', 'nosniff')
|
|
const readStream = fsSync.createReadStream(jpegPath)
|
|
await pipeline(readStream, res)
|
|
} catch (err) {
|
|
logger.warn({ err, projectId, buildId }, 'thumbnail generation failed')
|
|
if (!res.headersSent) res.sendStatus(500)
|
|
} finally {
|
|
await fs.rm(tmpDir, { recursive: true, force: true }).catch(() => {})
|
|
if (deckTapeDir) {
|
|
await fs.rm(deckTapeDir, { recursive: true, force: true }).catch(() => {})
|
|
}
|
|
}
|
|
}
|
|
|
|
export default {
|
|
convertDocumentToLaTeX: expressify(convertDocumentToLaTeX),
|
|
convertProjectToDocument: expressify(convertProjectToDocument),
|
|
convertPDFToJPEG: expressify(convertPDFToJPEG),
|
|
thumbnailFromBuild: expressify(thumbnailFromBuild),
|
|
}
|