import Path from 'node:path' import { promisify } from 'node:util' import logger from '@overleaf/logger' import CommandRunner from './CommandRunner.js' import fs from 'node:fs' // Maps currently-running Quarto jobs: compileName → PID (or docker container id) const ProcessTable = {} function runQuarto(compileName, options, callback) { const { directory, mainFile, image, environment, compileGroup } = options const timeout = options.timeout || 60000 logger.debug( { directory, timeout, mainFile, compileGroup }, 'starting quarto compile' ) // For the standalone-HTML export we must render a deck whose frontmatter // carries embed-resources (it cannot be set from the CLI: Quarto only honours // embed-resources when it is nested under the format, and a document's own // format block fully overrides project/CLI metadata). So write a temporary // copy of the root .qmd with the options injected and render that instead. let renderTarget = mainFile if (options.exportMode === 'html-standalone') { renderTarget = _writeStandaloneVariant(directory, mainFile) } // Where cached per-project venvs live (shared across projects, keyed by the // requirements.vrf hash). Must be on a persistent volume in production. const venvBaseDir = process.env.PYTHON_VENVS_DIR || '/var/lib/overleaf/data/python-venvs' const command = _buildQuartoCommand( renderTarget, options.exportMode, Boolean(options.allowPythonInstall), venvBaseDir ) ProcessTable[compileName] = CommandRunner.run( compileName, command, directory, image, timeout, environment || {}, compileGroup, null, function (error, output) { delete ProcessTable[compileName] // Propagate real process-level errors (killed, timed out) but NOT // ordinary non-zero exit codes from Quarto itself. A Quarto compile // failure (exit code 1) is not a server error — the absence of // output.pdf is sufficient for CompileController to return 'failure'. if (error && (error.terminated || error.timedout)) { return callback(error) } // On exit-code-1 errors LocalCommandRunner attaches stdout to the // error object; merge it so _writeLogOutput can persist it. const combined = output || (error ? { stdout: error.stdout || '' } : null) _writeLogOutput(compileName, directory, combined, () => _appendMissingResourceWarnings(directory, () => callback(null, combined) ) ) } ) } function _buildQuartoCommand( renderTarget, exportMode, allowPythonInstall, venvBaseDir ) { // Run through a POSIX shell so stderr is merged into stdout (2>&1). // LocalCommandRunner replaces $COMPILE_DIR before the shell sees it. // // We do NOT pass --to or --output: let the YAML frontmatter decide the // output format (typst → output.pdf, revealjs → output.html, etc.). // // For a normal preview compile we do NOT embed resources. A self-contained // single-file HTML breaks reveal.js plugins that load/store resources at // runtime (e.g. chalkboard, multiplex) and is slow to transfer. Instead // Quarto emits the HTML plus a sibling "_files/" asset directory; // the HTML references it with relative paths. Both the html and the asset // dir are served from the same .../output/ path, so the relative links // resolve. For the 'html-standalone' export, runQuarto instead renders a // temporary copy of the deck (renderTarget) whose frontmatter enables // embed-resources, producing a single portable file. // // After render we rename the produced top-level file to output.pdf or // output.html. The asset directory keeps its "_files" name; the // renamed output.html still points at it via the unchanged relative refs. // // The extension merge (cp -rn, no-clobber so user extensions win) and the // trailing semicolon (so a missing /opt/quarto-extensions doesn't abort) // are kept. mv uses relative paths because LocalCommandRunner.replace() // only substitutes the FIRST $COMPILE_DIR and the shell CWD is the dir. const inputPath = `$COMPILE_DIR/${renderTarget}` const baseName = renderTarget.replace(/\.[^/.]+$/, '') // strip extension let tail = `(mv ${baseName}.pdf output.pdf 2>/dev/null || ` + `mv ${baseName}.html output.html 2>/dev/null)` if (exportMode === 'pdf-slides') { // After producing output.html, print it to output-slides.pdf with decktape // (headless Chromium via Puppeteer). The CLSI runtime user has no writable // HOME, so Chromium's crashpad can't create its database and the browser // dies on launch ("chrome_crashpad_handler: --database is required"). // Point HOME / XDG dirs / the Chromium user-data-dir at a fresh writable // temp dir to give it somewhere to write. // --no-sandbox: Chromium can't sandbox as a non-root container user // --disable-dev-shm-usage: a tiny container /dev/shm crashes Chromium // --disable-gpu: there is no GPU in the container tail += ` && CHROME_HOME="$(mktemp -d)" && ` + `HOME="$CHROME_HOME" XDG_CONFIG_HOME="$CHROME_HOME" ` + `XDG_CACHE_HOME="$CHROME_HOME" decktape ` + `--chrome-arg=--no-sandbox ` + `--chrome-arg=--disable-dev-shm-usage ` + `--chrome-arg=--disable-gpu ` + `--chrome-arg=--user-data-dir="$CHROME_HOME/data" ` + `"$(pwd)/output.html" output-slides.pdf 2>&1` } // For the standalone export, remove the temporary render copy afterwards so // it can't be mistaken for a project file or picked up by a later preview // compile. Runs regardless of render success (";"). const cleanup = exportMode === 'html-standalone' ? `; rm -rf ${baseName}.qmd ${baseName}_files` : '' const venvPrep = allowPythonInstall ? _pythonVenvPrep(venvBaseDir) : '' const cmd = `mkdir -p _extensions && ` + `cp -rn /opt/quarto-extensions/_extensions/. _extensions/ 2>/dev/null; ` + venvPrep + `quarto render ${inputPath} 2>&1 && ` + tail + cleanup return ['/bin/sh', '-c', cmd] } // Shell snippet (run before `quarto render`, in the compile dir) that installs // a project's requirements.vrf into a venv cached by the file's sha256 and // points Quarto at it via QUARTO_PYTHON. Notes: // - The venv is shared across projects/compiles (keyed by content hash), so // identical dependency sets are built once. // - --system-site-packages keeps the bundled scientific stack + ipykernel // visible, so only the *extra* packages are installed. // - A per-hash flock serialises concurrent compiles building the same venv. // - Everything is merged to stdout so pip output/errors land in output.log; // on failure QUARTO_PYTHON is left unset and the render falls back to the // base interpreter (the missing-package error then surfaces normally). // - Only $-shell vars / $(...) are used (no ${...}) to avoid clashing with // JS template interpolation; only ${venvBaseDir} is substituted by JS. function _pythonVenvPrep(venvBaseDir) { return ( `if [ -f requirements.vrf ]; then ` + `VBASE="${venvBaseDir}"; ` + `RHASH=$(sha256sum requirements.vrf 2>/dev/null | cut -d" " -f1); ` + `if [ -n "$RHASH" ]; then ` + `VDIR="$VBASE/$RHASH"; mkdir -p "$VBASE" 2>/dev/null; ` + `( flock 9 || exit 0; ` + `if [ ! -f "$VDIR/.verso-ready" ]; then ` + `echo "Installing Python packages from requirements.vrf..."; rm -rf "$VDIR"; ` + `python3 -m venv --system-site-packages "$VDIR" ` + `&& "$VDIR/bin/pip" install --no-input --disable-pip-version-check -r requirements.vrf ` + // Register a python3 kernelspec INSIDE the venv (argv -> the venv's python) // so Quarto runs the kernel in the venv, not the base /usr/bin/python3 from // the global kernelspec. ipykernel is visible via --system-site-packages. `&& "$VDIR/bin/python3" -m ipykernel install --sys-prefix --name python3 --display-name "Python 3" ` + `&& touch "$VDIR/.verso-ready" ` + `|| echo "ERROR: Failed to install Python packages from requirements.vrf"; ` + `fi ` + `) 9>"$VBASE/.$RHASH.lock" 2>&1; ` + `if [ -f "$VDIR/.verso-ready" ]; then export QUARTO_PYTHON="$VDIR/bin/python3"; fi; ` + `fi; ` + `fi; ` ) } // Write a temporary copy of the root .qmd with embed-resources enabled in its // frontmatter, returning the temp filename to render. On any problem (no // frontmatter, not a nested revealjs deck, read/write error) it falls back to // the original mainFile — the export then just isn't self-contained, which is // no worse than before. The temp file lives in the same directory so relative // resources (images, _extensions) still resolve. function _writeStandaloneVariant(directory, mainFile) { try { const content = fs.readFileSync(Path.join(directory, mainFile), 'utf8') const transformed = _injectRevealjsStandaloneOptions(content) if (!transformed) return mainFile const base = mainFile.replace(/\.[^/.]+$/, '') const tempName = `${base}.verso-standalone.qmd` fs.writeFileSync(Path.join(directory, tempName), transformed) return tempName } catch (err) { logger.warn({ err, directory, mainFile }, 'could not prepare standalone qmd') return mainFile } } // Inject the self-contained options into the `revealjs:` block of a deck's // YAML frontmatter. embed-resources/self-contained-math inline all CSS/JS/ // images/MathJax into one portable file; chalkboard must be off (it is // incompatible with embed-resources and would error the render). Keys already // present in the block are overwritten in place (so we never create duplicate // YAML keys, e.g. an existing `chalkboard: true`); missing keys are inserted. // Returns the new document text, or null if it isn't a nested-revealjs deck we // can safely edit. function _injectRevealjsStandaloneOptions(content) { const fmMatch = content.match(/^(---\r?\n)([\s\S]*?)(\r?\n---\r?\n?)/) if (!fmMatch) return null const [, open, body, close] = fmMatch const lines = body.split('\n') const revealIdx = lines.findIndex(l => /^\s*revealjs:\s*$/.test(l)) if (revealIdx === -1) return null // not a `format:\n revealjs:` deck const revealIndent = lines[revealIdx].match(/^(\s*)/)[1] // Determine the block's child indent (from the first more-indented line) and // where the block ends (the first later line indented at/under revealjs:). let childIndent = revealIndent + ' ' let blockEnd = lines.length let seenChild = false for (let i = revealIdx + 1; i < lines.length; i++) { if (lines[i].trim() === '') continue const indent = lines[i].match(/^(\s*)/)[1] if (indent.length <= revealIndent.length) { blockEnd = i break } if (!seenChild) { childIndent = indent seenChild = true } } const desired = { 'embed-resources': 'true', 'self-contained-math': 'true', chalkboard: 'false', } const present = new Set() for (let i = revealIdx + 1; i < blockEnd; i++) { const km = lines[i].match(/^\s*([A-Za-z0-9_-]+):/) if (km && Object.prototype.hasOwnProperty.call(desired, km[1])) { lines[i] = `${childIndent}${km[1]}: ${desired[km[1]]}` present.add(km[1]) } } const additions = Object.keys(desired) .filter(k => !present.has(k)) .map(k => `${childIndent}${k}: ${desired[k]}`) if (additions.length) lines.splice(revealIdx + 1, 0, ...additions) return open + lines.join('\n') + close + content.slice(fmMatch[0].length) } function _writeLogOutput(compileName, directory, output, callback) { const content = (output && output.stdout) || '' if (!content) return callback() // Write to output.log so the PDF-preview log panel picks it up const logFile = Path.join(directory, 'output.log') fs.unlink(logFile, () => { fs.writeFile(logFile, content, { flag: 'wx' }, err => { if (err) { logger.error({ err, compileName, logFile }, 'error writing quarto log') } callback() }) }) } // Quarto's HTML/RevealJS output is NOT self-contained (we deliberately dropped // --embed-resources so reveal plugins like chalkboard work). A side effect is // that pandoc no longer tries to fetch referenced media, so a missing image or // video produces no compile-time warning — it just renders broken in the // browser. To restore that feedback, scan the produced output.html for local // media references and emit a [WARNING] for any that don't exist on disk. The // [WARNING] prefix is understood by the Quarto/Typst log parser on the web // side, so these surface in the Warnings tab like any other. // // Only HTML output is scanned: PDF output (Typst) already hard-errors on a // missing image, so it needs no extra check. function _appendMissingResourceWarnings(directory, callback) { const htmlFile = Path.join(directory, 'output.html') fs.readFile(htmlFile, 'utf8', (err, html) => { if (err) return callback() // no HTML output (e.g. a PDF compile) const missing = _extractLocalMediaRefs(html).filter(ref => { try { return !fs.existsSync(Path.join(directory, decodeURIComponent(ref))) } catch { return false } }) if (missing.length === 0) return callback() const warnings = missing .map( ref => `[WARNING] Missing resource: ${ref} (referenced in the document ` + `but not found in the project — it will appear broken)` ) .join('\n') + '\n' fs.appendFile(Path.join(directory, 'output.log'), '\n' + warnings, () => callback() ) }) } // Pull local media references (img/video/audio/iframe src, poster, RevealJS // data-background-*) out of the rendered HTML. External URLs, data URIs and // in-page anchors are ignored; Quarto's own generated assets (under // _files/) exist on disk, so they never get flagged. function _extractLocalMediaRefs(html) { const refs = new Set() const attrRegex = /(?:src|poster|data-background-image|data-background-video)\s*=\s*["']([^"']+)["']/gi let match while ((match = attrRegex.exec(html)) !== null) { const url = match[1].trim() if (!url) continue // Skip absolute URLs, protocol-relative, data/blob URIs and anchors. if (/^(?:[a-z]+:|\/\/|\/|#|data:|blob:)/i.test(url)) continue const clean = url.split(/[?#]/)[0] // drop query string / fragment if (clean) refs.add(clean) } return [...refs] } function isRunning(compileName) { return ProcessTable[compileName] != null } function killQuarto(compileName, callback) { logger.debug({ compileName }, 'killing running quarto compile') if (!isRunning(compileName)) { logger.warn({ compileName }, 'no such compile to kill') return callback(null) } CommandRunner.kill(ProcessTable[compileName], callback) } export default { isRunning, runQuarto, killQuarto, promises: { runQuarto: promisify(runQuarto), killQuarto: promisify(killQuarto), }, }