Files
Verso/services/clsi/app/js/QuartoRunner.js
T
claude c9727a26e4
Build and Deploy Verso / deploy (push) Successful in 9m46s
Python deps: smart missing-package hint + switch to .vrf requirements file
Option A: when a {python} cell fails with ModuleNotFoundError/ImportError, the
log now suggests the exact PyPI package to add (with a module->package map, e.g.
cv2 -> opencv-python, sklearn -> scikit-learn), names the Verso requirements
file, and notes it could instead be a local module — so the langmuirthermalstudy
case isn't mistaken for a PyPI package.

Switch the per-project requirements file from requirements.txt to a Verso-
specific requirements.vrf (so it won't be confused with arbitrary .txt files);
QuartoRunner now looks for requirements.vrf, and 'vrf' is registered as an
editable text extension. The dedicated in-UI editor (and hiding it from the
file tree) follows in a separate change.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-02 14:19:01 +00:00

366 lines
15 KiB
JavaScript

import Path from 'node:path'
import { promisify } from 'node:util'
import logger from '@overleaf/logger'
import CommandRunner from './CommandRunner.js'
import fs from 'node:fs'
// Maps currently-running Quarto jobs: compileName → PID (or docker container id)
const ProcessTable = {}
function runQuarto(compileName, options, callback) {
const { directory, mainFile, image, environment, compileGroup } = options
const timeout = options.timeout || 60000
logger.debug(
{ directory, timeout, mainFile, compileGroup },
'starting quarto compile'
)
// For the standalone-HTML export we must render a deck whose frontmatter
// carries embed-resources (it cannot be set from the CLI: Quarto only honours
// embed-resources when it is nested under the format, and a document's own
// format block fully overrides project/CLI metadata). So write a temporary
// copy of the root .qmd with the options injected and render that instead.
let renderTarget = mainFile
if (options.exportMode === 'html-standalone') {
renderTarget = _writeStandaloneVariant(directory, mainFile)
}
// Where cached per-project venvs live (shared across projects, keyed by the
// requirements.vrf hash). Must be on a persistent volume in production.
const venvBaseDir =
process.env.PYTHON_VENVS_DIR || '/var/lib/overleaf/data/python-venvs'
const command = _buildQuartoCommand(
renderTarget,
options.exportMode,
Boolean(options.allowPythonInstall),
venvBaseDir
)
ProcessTable[compileName] = CommandRunner.run(
compileName,
command,
directory,
image,
timeout,
environment || {},
compileGroup,
null,
function (error, output) {
delete ProcessTable[compileName]
// Propagate real process-level errors (killed, timed out) but NOT
// ordinary non-zero exit codes from Quarto itself. A Quarto compile
// failure (exit code 1) is not a server error — the absence of
// output.pdf is sufficient for CompileController to return 'failure'.
if (error && (error.terminated || error.timedout)) {
return callback(error)
}
// On exit-code-1 errors LocalCommandRunner attaches stdout to the
// error object; merge it so _writeLogOutput can persist it.
const combined = output || (error ? { stdout: error.stdout || '' } : null)
_writeLogOutput(compileName, directory, combined, () =>
_appendMissingResourceWarnings(directory, () =>
callback(null, combined)
)
)
}
)
}
function _buildQuartoCommand(
renderTarget,
exportMode,
allowPythonInstall,
venvBaseDir
) {
// Run through a POSIX shell so stderr is merged into stdout (2>&1).
// LocalCommandRunner replaces $COMPILE_DIR before the shell sees it.
//
// We do NOT pass --to or --output: let the YAML frontmatter decide the
// output format (typst → output.pdf, revealjs → output.html, etc.).
//
// For a normal preview compile we do NOT embed resources. A self-contained
// single-file HTML breaks reveal.js plugins that load/store resources at
// runtime (e.g. chalkboard, multiplex) and is slow to transfer. Instead
// Quarto emits the HTML plus a sibling "<basename>_files/" asset directory;
// the HTML references it with relative paths. Both the html and the asset
// dir are served from the same .../output/ path, so the relative links
// resolve. For the 'html-standalone' export, runQuarto instead renders a
// temporary copy of the deck (renderTarget) whose frontmatter enables
// embed-resources, producing a single portable file.
//
// After render we rename the produced top-level file to output.pdf or
// output.html. The asset directory keeps its "<basename>_files" name; the
// renamed output.html still points at it via the unchanged relative refs.
//
// The extension merge (cp -rn, no-clobber so user extensions win) and the
// trailing semicolon (so a missing /opt/quarto-extensions doesn't abort)
// are kept. mv uses relative paths because LocalCommandRunner.replace()
// only substitutes the FIRST $COMPILE_DIR and the shell CWD is the dir.
const inputPath = `$COMPILE_DIR/${renderTarget}`
const baseName = renderTarget.replace(/\.[^/.]+$/, '') // strip extension
let tail =
`(mv ${baseName}.pdf output.pdf 2>/dev/null || ` +
`mv ${baseName}.html output.html 2>/dev/null)`
if (exportMode === 'pdf-slides') {
// After producing output.html, print it to output-slides.pdf with decktape
// (headless Chromium via Puppeteer). The CLSI runtime user has no writable
// HOME, so Chromium's crashpad can't create its database and the browser
// dies on launch ("chrome_crashpad_handler: --database is required").
// Point HOME / XDG dirs / the Chromium user-data-dir at a fresh writable
// temp dir to give it somewhere to write.
// --no-sandbox: Chromium can't sandbox as a non-root container user
// --disable-dev-shm-usage: a tiny container /dev/shm crashes Chromium
// --disable-gpu: there is no GPU in the container
tail +=
` && CHROME_HOME="$(mktemp -d)" && ` +
`HOME="$CHROME_HOME" XDG_CONFIG_HOME="$CHROME_HOME" ` +
`XDG_CACHE_HOME="$CHROME_HOME" decktape ` +
`--chrome-arg=--no-sandbox ` +
`--chrome-arg=--disable-dev-shm-usage ` +
`--chrome-arg=--disable-gpu ` +
`--chrome-arg=--user-data-dir="$CHROME_HOME/data" ` +
`"$(pwd)/output.html" output-slides.pdf 2>&1`
}
// For the standalone export, remove the temporary render copy afterwards so
// it can't be mistaken for a project file or picked up by a later preview
// compile. Runs regardless of render success (";").
const cleanup =
exportMode === 'html-standalone'
? `; rm -rf ${baseName}.qmd ${baseName}_files`
: ''
const venvPrep = allowPythonInstall ? _pythonVenvPrep(venvBaseDir) : ''
const cmd =
`mkdir -p _extensions && ` +
`cp -rn /opt/quarto-extensions/_extensions/. _extensions/ 2>/dev/null; ` +
venvPrep +
`quarto render ${inputPath} 2>&1 && ` +
tail +
cleanup
return ['/bin/sh', '-c', cmd]
}
// Shell snippet (run before `quarto render`, in the compile dir) that installs
// a project's requirements.vrf into a venv cached by the file's sha256 and
// points Quarto at it via QUARTO_PYTHON. Notes:
// - The venv is shared across projects/compiles (keyed by content hash), so
// identical dependency sets are built once.
// - --system-site-packages keeps the bundled scientific stack + ipykernel
// visible, so only the *extra* packages are installed.
// - A per-hash flock serialises concurrent compiles building the same venv.
// - Everything is merged to stdout so pip output/errors land in output.log;
// on failure QUARTO_PYTHON is left unset and the render falls back to the
// base interpreter (the missing-package error then surfaces normally).
// - Only $-shell vars / $(...) are used (no ${...}) to avoid clashing with
// JS template interpolation; only ${venvBaseDir} is substituted by JS.
function _pythonVenvPrep(venvBaseDir) {
return (
`if [ -f requirements.vrf ]; then ` +
`VBASE="${venvBaseDir}"; ` +
`RHASH=$(sha256sum requirements.vrf 2>/dev/null | cut -d" " -f1); ` +
`if [ -n "$RHASH" ]; then ` +
`VDIR="$VBASE/$RHASH"; mkdir -p "$VBASE" 2>/dev/null; ` +
`( flock 9 || exit 0; ` +
`if [ ! -f "$VDIR/.verso-ready" ]; then ` +
`echo "Installing Python packages from requirements.vrf..."; rm -rf "$VDIR"; ` +
`python3 -m venv --system-site-packages "$VDIR" ` +
`&& "$VDIR/bin/pip" install --no-input --disable-pip-version-check -r requirements.vrf ` +
// Register a python3 kernelspec INSIDE the venv (argv -> the venv's python)
// so Quarto runs the kernel in the venv, not the base /usr/bin/python3 from
// the global kernelspec. ipykernel is visible via --system-site-packages.
`&& "$VDIR/bin/python3" -m ipykernel install --sys-prefix --name python3 --display-name "Python 3" ` +
`&& touch "$VDIR/.verso-ready" ` +
`|| echo "ERROR: Failed to install Python packages from requirements.vrf"; ` +
`fi ` +
`) 9>"$VBASE/.$RHASH.lock" 2>&1; ` +
`if [ -f "$VDIR/.verso-ready" ]; then export QUARTO_PYTHON="$VDIR/bin/python3"; fi; ` +
`fi; ` +
`fi; `
)
}
// Write a temporary copy of the root .qmd with embed-resources enabled in its
// frontmatter, returning the temp filename to render. On any problem (no
// frontmatter, not a nested revealjs deck, read/write error) it falls back to
// the original mainFile — the export then just isn't self-contained, which is
// no worse than before. The temp file lives in the same directory so relative
// resources (images, _extensions) still resolve.
function _writeStandaloneVariant(directory, mainFile) {
try {
const content = fs.readFileSync(Path.join(directory, mainFile), 'utf8')
const transformed = _injectRevealjsStandaloneOptions(content)
if (!transformed) return mainFile
const base = mainFile.replace(/\.[^/.]+$/, '')
const tempName = `${base}.verso-standalone.qmd`
fs.writeFileSync(Path.join(directory, tempName), transformed)
return tempName
} catch (err) {
logger.warn({ err, directory, mainFile }, 'could not prepare standalone qmd')
return mainFile
}
}
// Inject the self-contained options into the `revealjs:` block of a deck's
// YAML frontmatter. embed-resources/self-contained-math inline all CSS/JS/
// images/MathJax into one portable file; chalkboard must be off (it is
// incompatible with embed-resources and would error the render). Keys already
// present in the block are overwritten in place (so we never create duplicate
// YAML keys, e.g. an existing `chalkboard: true`); missing keys are inserted.
// Returns the new document text, or null if it isn't a nested-revealjs deck we
// can safely edit.
function _injectRevealjsStandaloneOptions(content) {
const fmMatch = content.match(/^(---\r?\n)([\s\S]*?)(\r?\n---\r?\n?)/)
if (!fmMatch) return null
const [, open, body, close] = fmMatch
const lines = body.split('\n')
const revealIdx = lines.findIndex(l => /^\s*revealjs:\s*$/.test(l))
if (revealIdx === -1) return null // not a `format:\n revealjs:` deck
const revealIndent = lines[revealIdx].match(/^(\s*)/)[1]
// Determine the block's child indent (from the first more-indented line) and
// where the block ends (the first later line indented at/under revealjs:).
let childIndent = revealIndent + ' '
let blockEnd = lines.length
let seenChild = false
for (let i = revealIdx + 1; i < lines.length; i++) {
if (lines[i].trim() === '') continue
const indent = lines[i].match(/^(\s*)/)[1]
if (indent.length <= revealIndent.length) {
blockEnd = i
break
}
if (!seenChild) {
childIndent = indent
seenChild = true
}
}
const desired = {
'embed-resources': 'true',
'self-contained-math': 'true',
chalkboard: 'false',
}
const present = new Set()
for (let i = revealIdx + 1; i < blockEnd; i++) {
const km = lines[i].match(/^\s*([A-Za-z0-9_-]+):/)
if (km && Object.prototype.hasOwnProperty.call(desired, km[1])) {
lines[i] = `${childIndent}${km[1]}: ${desired[km[1]]}`
present.add(km[1])
}
}
const additions = Object.keys(desired)
.filter(k => !present.has(k))
.map(k => `${childIndent}${k}: ${desired[k]}`)
if (additions.length) lines.splice(revealIdx + 1, 0, ...additions)
return open + lines.join('\n') + close + content.slice(fmMatch[0].length)
}
function _writeLogOutput(compileName, directory, output, callback) {
const content = (output && output.stdout) || ''
if (!content) return callback()
// Write to output.log so the PDF-preview log panel picks it up
const logFile = Path.join(directory, 'output.log')
fs.unlink(logFile, () => {
fs.writeFile(logFile, content, { flag: 'wx' }, err => {
if (err) {
logger.error({ err, compileName, logFile }, 'error writing quarto log')
}
callback()
})
})
}
// Quarto's HTML/RevealJS output is NOT self-contained (we deliberately dropped
// --embed-resources so reveal plugins like chalkboard work). A side effect is
// that pandoc no longer tries to fetch referenced media, so a missing image or
// video produces no compile-time warning — it just renders broken in the
// browser. To restore that feedback, scan the produced output.html for local
// media references and emit a [WARNING] for any that don't exist on disk. The
// [WARNING] prefix is understood by the Quarto/Typst log parser on the web
// side, so these surface in the Warnings tab like any other.
//
// Only HTML output is scanned: PDF output (Typst) already hard-errors on a
// missing image, so it needs no extra check.
function _appendMissingResourceWarnings(directory, callback) {
const htmlFile = Path.join(directory, 'output.html')
fs.readFile(htmlFile, 'utf8', (err, html) => {
if (err) return callback() // no HTML output (e.g. a PDF compile)
const missing = _extractLocalMediaRefs(html).filter(ref => {
try {
return !fs.existsSync(Path.join(directory, decodeURIComponent(ref)))
} catch {
return false
}
})
if (missing.length === 0) return callback()
const warnings =
missing
.map(
ref =>
`[WARNING] Missing resource: ${ref} (referenced in the document ` +
`but not found in the project — it will appear broken)`
)
.join('\n') + '\n'
fs.appendFile(Path.join(directory, 'output.log'), '\n' + warnings, () =>
callback()
)
})
}
// Pull local media references (img/video/audio/iframe src, poster, RevealJS
// data-background-*) out of the rendered HTML. External URLs, data URIs and
// in-page anchors are ignored; Quarto's own generated assets (under
// <basename>_files/) exist on disk, so they never get flagged.
function _extractLocalMediaRefs(html) {
const refs = new Set()
const attrRegex =
/(?:src|poster|data-background-image|data-background-video)\s*=\s*["']([^"']+)["']/gi
let match
while ((match = attrRegex.exec(html)) !== null) {
const url = match[1].trim()
if (!url) continue
// Skip absolute URLs, protocol-relative, data/blob URIs and anchors.
if (/^(?:[a-z]+:|\/\/|\/|#|data:|blob:)/i.test(url)) continue
const clean = url.split(/[?#]/)[0] // drop query string / fragment
if (clean) refs.add(clean)
}
return [...refs]
}
function isRunning(compileName) {
return ProcessTable[compileName] != null
}
function killQuarto(compileName, callback) {
logger.debug({ compileName }, 'killing running quarto compile')
if (!isRunning(compileName)) {
logger.warn({ compileName }, 'no such compile to kill')
return callback(null)
}
CommandRunner.kill(ProcessTable[compileName], callback)
}
export default {
isRunning,
runQuarto,
killQuarto,
promises: {
runQuarto: promisify(runQuarto),
killQuarto: promisify(killQuarto),
},
}