import crypto from 'node:crypto' import { execFile } from 'node:child_process' import os from 'node:os' import { promisify } from 'node:util' import logger from '@overleaf/logger' import { expressify } from '@overleaf/promise-utils' import fs from 'node:fs/promises' import fsSync from 'node:fs' import Metrics from '@overleaf/metrics' import * as HistoryResourceWriter from './HistoryResourceWriter.js' import Errors from './Errors.js' import ConversionManager from './ConversionManager.js' import ConversionOutputCleaner from './ConversionOutputCleaner.js' import OutputCacheManager from './OutputCacheManager.js' import ResourceWriter from './ResourceWriter.js' import RequestParser from './RequestParser.js' import { pipeline } from 'node:stream/promises' import Settings from '@overleaf/settings' import Path from 'node:path' import { z } from '@overleaf/validation-tools' const execFileAsync = promisify(execFile) const CONVERSION_CONFIGS = { docx: { extension: 'docx' }, markdown: { extension: 'zip' }, html: { extension: 'zip' }, typst: { extension: 'typ' }, latex: { extension: 'tex' }, } async function convertDocumentToLaTeX(req, res) { const { path } = req.file const conversionType = req.query.type if (!Settings.enablePandocConversions) { await fs.unlink(path).catch(() => {}) return res.sendStatus(404) } if (!conversionType || !['docx', 'markdown', 'typst'].includes(conversionType)) { await fs.unlink(path).catch(() => {}) return res.sendStatus(400) } logger.debug({ path, conversionType }, 'received file for conversion') const conversionId = crypto.randomUUID() let zipPath try { zipPath = await ConversionManager.promises.convertToLaTeXWithLock( conversionId, path, conversionType ) } catch (err) { if (err instanceof Errors.ConversionError) { if (err.isUserFacing) { return res.status(422).json({ error: err.stderr, exitCode: err.exitCode, }) } else { logger.warn( { err, conversionType, stderr: err.stderr }, 'Conversion failed with non-user-facing error' ) return res.status(422).json({}) } } else { throw err } } finally { await fs.unlink(path).catch(() => {}) } try { const zipStat = await fs.stat(zipPath) res.setHeader('Content-Length', zipStat.size) res.attachment('conversion.zip') res.setHeader('X-Content-Type-Options', 'nosniff') const readStream = fsSync.createReadStream(zipPath) await pipeline(readStream, res) } finally { await fs .rm(Path.dirname(zipPath), { recursive: true, force: true }) .catch(() => {}) } } const PDFToJPEGQuerySchema = z.object({ mode: z.enum(['preview', 'thumbnail']), }) async function convertPDFToJPEG(req, res) { const { path } = req.file if (!Settings.enablePdfConversions) { await fs.unlink(path).catch(() => {}) return res.sendStatus(404) } const parsed = PDFToJPEGQuerySchema.safeParse(req.query) if (!parsed.success) { await fs.unlink(path).catch(() => {}) return res.sendStatus(400) } const { mode } = parsed.data logger.debug({ path, mode }, 'received pdf for conversion to jpeg') const conversionId = crypto.randomUUID() let jpegPath try { jpegPath = await ConversionManager.promises.convertPDFToJPEGWithLock( conversionId, path, mode ) } finally { await fs.unlink(path).catch(() => {}) } try { const jpegStat = await fs.stat(jpegPath) res.setHeader('Content-Length', jpegStat.size) res.attachment('output.jpg') res.setHeader('X-Content-Type-Options', 'nosniff') const readStream = fsSync.createReadStream(jpegPath) await pipeline(readStream, res) } finally { await fs .rm(Path.dirname(jpegPath), { recursive: true, force: true }) .catch(() => {}) } } async function convertProjectToDocument(req, res) { if (!Settings.enablePandocConversions) { return res.sendStatus(404) } const { user_id: userId, project_id: projectId } = req.params const type = req.query.type if (!Object.hasOwn(CONVERSION_CONFIGS, type)) { return res.sendStatus(400) } const config = CONVERSION_CONFIGS[type] const request = await RequestParser.promises.parse(req.body) request.project_id = projectId request.user_id = userId request.metricsOpts = {} const responseFormat = req.query.responseFormat === 'json' ? 'json' : 'stream' const conversionId = crypto.randomUUID() const conversionDir = Path.join(Settings.path.compilesDir, conversionId) const conversionCacheDir = Path.join(Settings.path.clsiCacheDir, conversionId) const projectCacheDir = Path.join(Settings.path.clsiCacheDir, projectId) const cleanupDirs = [conversionCacheDir, conversionDir] logger.debug( { projectId, userId, rootResourcePath: request.rootResourcePath, type, }, 'syncing resources for project-to-document conversion' ) Metrics.inc('convert_project_to_document', 1, { compileFromHistory: request.isCompileFromHistory, method: type, }) try { if (await fs.mkdir(projectCacheDir, { recursive: true })) { // Newly created. Cleanup behind us. cleanupDirs.push(projectCacheDir) } if (request.isCompileFromHistory) { await fs.mkdir(conversionDir) try { await HistoryResourceWriter.syncResourcesToDisk( projectId, userId, request, conversionDir, {} ) } catch (err) { if (err instanceof Errors.MissingUpdatesError) { return res.status(409).json({ baseHistoryVersion: err.info.baseHistoryVersion, }) } throw err } } else { await ResourceWriter.promises.syncResourcesToDisk(request, conversionDir) } const documentPath = await ConversionManager.promises.convertLaTeXToDocumentInDirWithLock( conversionId, conversionDir, request.rootResourcePath, type ) const outputName = `output.${config.extension}` if (responseFormat === 'json') { // TODO: drop the streaming branch once web is migrated to the two-step flow const buildId = await OutputCacheManager.promises.generateBuildId() const buildDir = Path.join( Settings.path.outputDir, conversionId, OutputCacheManager.CACHE_SUBDIR, buildId ) try { await fs.mkdir(buildDir, { recursive: true }) await fs.copyFile(documentPath, Path.join(buildDir, outputName)) res.json({ conversionId, buildId, file: outputName }) } finally { ConversionOutputCleaner.scheduleCleanup(conversionId) } } else { const documentStat = await fs.stat(documentPath) res.setHeader('Content-Length', documentStat.size) res.attachment(outputName) res.setHeader('X-Content-Type-Options', 'nosniff') const readStream = fsSync.createReadStream(documentPath) await pipeline(readStream, res) } } catch (err) { if (err instanceof Errors.ConversionError) { if (err.isUserFacing) { return res.status(422).json({ error: err.stderr, exitCode: err.exitCode, }) } else { logger.warn( { err, type, stderr: err.stderr }, 'Conversion failed with non-user-facing error' ) return res.status(422).json({}) } } else { throw err } } finally { for (const dir of cleanupDirs) { try { await fs.rm(dir, { recursive: true, force: true }) } catch (err) { logger.warn({ err, dir }, 'cleanup failed') } } } } // Generates a JPEG thumbnail of page 1 of the compiled output using // pdftocairo (poppler-utils). Tries output.pdf first (LaTeX / Quarto-PDF), // then output-slides.pdf (Quarto RevealJS after a PDF-export compile), then // falls back to rendering slide 1 of output.html via decktape (normal // RevealJS preview compile). All temp dirs are cleaned up in finally. async function thumbnailFromBuild(req, res) { const { project_id: projectId, user_id: userId, build_id: buildId } = req.params if (!buildId?.match(OutputCacheManager.BUILD_REGEX)) return res.sendStatus(400) const compileName = userId ? `${projectId}-${userId}` : projectId const buildDir = Path.join( Settings.path.outputDir, compileName, OutputCacheManager.CACHE_SUBDIR, buildId ) let pdfPath = null let deckTapeDir = null for (const name of ['output.pdf', 'output-slides.pdf']) { try { const p = Path.join(buildDir, name) await fs.access(p) pdfPath = p break } catch {} } if (!pdfPath) { const htmlPath = Path.join(buildDir, 'output.html') try { await fs.access(htmlPath) deckTapeDir = await fs.mkdtemp(Path.join(os.tmpdir(), 'clsi-deck-')) const chromeHome = Path.join(deckTapeDir, 'chrome') await fs.mkdir(chromeHome, { recursive: true }) const slidePdf = Path.join(deckTapeDir, 'slide1.pdf') await execFileAsync( 'decktape', [ '--slides', '1', '--chrome-arg=--no-sandbox', '--chrome-arg=--disable-dev-shm-usage', '--chrome-arg=--disable-gpu', `--chrome-arg=--user-data-dir=${chromeHome}/data`, htmlPath, slidePdf, ], { timeout: 60000, env: { ...process.env, HOME: chromeHome, XDG_CONFIG_HOME: chromeHome, XDG_CACHE_HOME: chromeHome, }, } ) pdfPath = slidePdf } catch (err) { logger.warn({ err, projectId, buildId }, 'decktape slide1 thumbnail failed') if (deckTapeDir) { await fs.rm(deckTapeDir, { recursive: true, force: true }).catch(() => {}) deckTapeDir = null } return res.sendStatus(404) } } const tmpDir = await fs.mkdtemp(Path.join(os.tmpdir(), 'clsi-thumb-')) const outputBase = Path.join(tmpDir, 'thumb') const jpegPath = outputBase + '.jpg' try { await execFileAsync( 'pdftocairo', [ '-jpeg', '-jpegopt', 'quality=90', '-singlefile', '-scale-to-x', '794', '-scale-to-y', '-1', '-f', '1', '-l', '1', pdfPath, outputBase, ], { timeout: 30000 } ) const jpegStat = await fs.stat(jpegPath) res.setHeader('Content-Type', 'image/jpeg') res.setHeader('Content-Length', jpegStat.size) res.setHeader('Cache-Control', 'public, max-age=86400') res.setHeader('X-Content-Type-Options', 'nosniff') const readStream = fsSync.createReadStream(jpegPath) await pipeline(readStream, res) } catch (err) { logger.warn({ err, projectId, buildId }, 'thumbnail generation failed') if (!res.headersSent) res.sendStatus(500) } finally { await fs.rm(tmpDir, { recursive: true, force: true }).catch(() => {}) if (deckTapeDir) { await fs.rm(deckTapeDir, { recursive: true, force: true }).catch(() => {}) } } } export default { convertDocumentToLaTeX: expressify(convertDocumentToLaTeX), convertProjectToDocument: expressify(convertProjectToDocument), convertPDFToJPEG: expressify(convertPDFToJPEG), thumbnailFromBuild: expressify(thumbnailFromBuild), }