8530c5ebe0
Build and Deploy Verso / deploy (push) Successful in 9m33s
The global python3 kernelspec hardcodes /usr/bin/python3, so even with QUARTO_PYTHON pointing at the project venv, Quarto launched the kernel in the base interpreter — packages installed into the venv (e.g. openpyxl) were not importable. Register a python3 kernelspec inside the venv via 'ipykernel install --sys-prefix' (kernel.json argv -> the venv's python); since Quarto runs kernel discovery through QUARTO_PYTHON, the venv's kernelspec is found ahead of the global one and the kernel runs in the venv. Bump the completion marker (.verso-complete -> .verso-ready) so venvs built before this change are rebuilt with the kernelspec. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
366 lines
15 KiB
JavaScript
366 lines
15 KiB
JavaScript
import Path from 'node:path'
|
|
import { promisify } from 'node:util'
|
|
import logger from '@overleaf/logger'
|
|
import CommandRunner from './CommandRunner.js'
|
|
import fs from 'node:fs'
|
|
|
|
// Maps currently-running Quarto jobs: compileName → PID (or docker container id)
|
|
const ProcessTable = {}
|
|
|
|
function runQuarto(compileName, options, callback) {
|
|
const { directory, mainFile, image, environment, compileGroup } = options
|
|
const timeout = options.timeout || 60000
|
|
|
|
logger.debug(
|
|
{ directory, timeout, mainFile, compileGroup },
|
|
'starting quarto compile'
|
|
)
|
|
|
|
// For the standalone-HTML export we must render a deck whose frontmatter
|
|
// carries embed-resources (it cannot be set from the CLI: Quarto only honours
|
|
// embed-resources when it is nested under the format, and a document's own
|
|
// format block fully overrides project/CLI metadata). So write a temporary
|
|
// copy of the root .qmd with the options injected and render that instead.
|
|
let renderTarget = mainFile
|
|
if (options.exportMode === 'html-standalone') {
|
|
renderTarget = _writeStandaloneVariant(directory, mainFile)
|
|
}
|
|
|
|
// Where cached per-project venvs live (shared across projects, keyed by the
|
|
// requirements.txt hash). Must be on a persistent volume in production.
|
|
const venvBaseDir =
|
|
process.env.PYTHON_VENVS_DIR || '/var/lib/overleaf/data/python-venvs'
|
|
const command = _buildQuartoCommand(
|
|
renderTarget,
|
|
options.exportMode,
|
|
Boolean(options.allowPythonInstall),
|
|
venvBaseDir
|
|
)
|
|
|
|
ProcessTable[compileName] = CommandRunner.run(
|
|
compileName,
|
|
command,
|
|
directory,
|
|
image,
|
|
timeout,
|
|
environment || {},
|
|
compileGroup,
|
|
null,
|
|
function (error, output) {
|
|
delete ProcessTable[compileName]
|
|
|
|
// Propagate real process-level errors (killed, timed out) but NOT
|
|
// ordinary non-zero exit codes from Quarto itself. A Quarto compile
|
|
// failure (exit code 1) is not a server error — the absence of
|
|
// output.pdf is sufficient for CompileController to return 'failure'.
|
|
if (error && (error.terminated || error.timedout)) {
|
|
return callback(error)
|
|
}
|
|
|
|
// On exit-code-1 errors LocalCommandRunner attaches stdout to the
|
|
// error object; merge it so _writeLogOutput can persist it.
|
|
const combined = output || (error ? { stdout: error.stdout || '' } : null)
|
|
_writeLogOutput(compileName, directory, combined, () =>
|
|
_appendMissingResourceWarnings(directory, () =>
|
|
callback(null, combined)
|
|
)
|
|
)
|
|
}
|
|
)
|
|
}
|
|
|
|
function _buildQuartoCommand(
|
|
renderTarget,
|
|
exportMode,
|
|
allowPythonInstall,
|
|
venvBaseDir
|
|
) {
|
|
// Run through a POSIX shell so stderr is merged into stdout (2>&1).
|
|
// LocalCommandRunner replaces $COMPILE_DIR before the shell sees it.
|
|
//
|
|
// We do NOT pass --to or --output: let the YAML frontmatter decide the
|
|
// output format (typst → output.pdf, revealjs → output.html, etc.).
|
|
//
|
|
// For a normal preview compile we do NOT embed resources. A self-contained
|
|
// single-file HTML breaks reveal.js plugins that load/store resources at
|
|
// runtime (e.g. chalkboard, multiplex) and is slow to transfer. Instead
|
|
// Quarto emits the HTML plus a sibling "<basename>_files/" asset directory;
|
|
// the HTML references it with relative paths. Both the html and the asset
|
|
// dir are served from the same .../output/ path, so the relative links
|
|
// resolve. For the 'html-standalone' export, runQuarto instead renders a
|
|
// temporary copy of the deck (renderTarget) whose frontmatter enables
|
|
// embed-resources, producing a single portable file.
|
|
//
|
|
// After render we rename the produced top-level file to output.pdf or
|
|
// output.html. The asset directory keeps its "<basename>_files" name; the
|
|
// renamed output.html still points at it via the unchanged relative refs.
|
|
//
|
|
// The extension merge (cp -rn, no-clobber so user extensions win) and the
|
|
// trailing semicolon (so a missing /opt/quarto-extensions doesn't abort)
|
|
// are kept. mv uses relative paths because LocalCommandRunner.replace()
|
|
// only substitutes the FIRST $COMPILE_DIR and the shell CWD is the dir.
|
|
const inputPath = `$COMPILE_DIR/${renderTarget}`
|
|
const baseName = renderTarget.replace(/\.[^/.]+$/, '') // strip extension
|
|
|
|
let tail =
|
|
`(mv ${baseName}.pdf output.pdf 2>/dev/null || ` +
|
|
`mv ${baseName}.html output.html 2>/dev/null)`
|
|
|
|
if (exportMode === 'pdf-slides') {
|
|
// After producing output.html, print it to output-slides.pdf with decktape
|
|
// (headless Chromium via Puppeteer). The CLSI runtime user has no writable
|
|
// HOME, so Chromium's crashpad can't create its database and the browser
|
|
// dies on launch ("chrome_crashpad_handler: --database is required").
|
|
// Point HOME / XDG dirs / the Chromium user-data-dir at a fresh writable
|
|
// temp dir to give it somewhere to write.
|
|
// --no-sandbox: Chromium can't sandbox as a non-root container user
|
|
// --disable-dev-shm-usage: a tiny container /dev/shm crashes Chromium
|
|
// --disable-gpu: there is no GPU in the container
|
|
tail +=
|
|
` && CHROME_HOME="$(mktemp -d)" && ` +
|
|
`HOME="$CHROME_HOME" XDG_CONFIG_HOME="$CHROME_HOME" ` +
|
|
`XDG_CACHE_HOME="$CHROME_HOME" decktape ` +
|
|
`--chrome-arg=--no-sandbox ` +
|
|
`--chrome-arg=--disable-dev-shm-usage ` +
|
|
`--chrome-arg=--disable-gpu ` +
|
|
`--chrome-arg=--user-data-dir="$CHROME_HOME/data" ` +
|
|
`"$(pwd)/output.html" output-slides.pdf 2>&1`
|
|
}
|
|
|
|
// For the standalone export, remove the temporary render copy afterwards so
|
|
// it can't be mistaken for a project file or picked up by a later preview
|
|
// compile. Runs regardless of render success (";").
|
|
const cleanup =
|
|
exportMode === 'html-standalone'
|
|
? `; rm -rf ${baseName}.qmd ${baseName}_files`
|
|
: ''
|
|
|
|
const venvPrep = allowPythonInstall ? _pythonVenvPrep(venvBaseDir) : ''
|
|
|
|
const cmd =
|
|
`mkdir -p _extensions && ` +
|
|
`cp -rn /opt/quarto-extensions/_extensions/. _extensions/ 2>/dev/null; ` +
|
|
venvPrep +
|
|
`quarto render ${inputPath} 2>&1 && ` +
|
|
tail +
|
|
cleanup
|
|
return ['/bin/sh', '-c', cmd]
|
|
}
|
|
|
|
// Shell snippet (run before `quarto render`, in the compile dir) that installs
|
|
// a project's requirements.txt into a venv cached by the file's sha256 and
|
|
// points Quarto at it via QUARTO_PYTHON. Notes:
|
|
// - The venv is shared across projects/compiles (keyed by content hash), so
|
|
// identical dependency sets are built once.
|
|
// - --system-site-packages keeps the bundled scientific stack + ipykernel
|
|
// visible, so only the *extra* packages are installed.
|
|
// - A per-hash flock serialises concurrent compiles building the same venv.
|
|
// - Everything is merged to stdout so pip output/errors land in output.log;
|
|
// on failure QUARTO_PYTHON is left unset and the render falls back to the
|
|
// base interpreter (the missing-package error then surfaces normally).
|
|
// - Only $-shell vars / $(...) are used (no ${...}) to avoid clashing with
|
|
// JS template interpolation; only ${venvBaseDir} is substituted by JS.
|
|
function _pythonVenvPrep(venvBaseDir) {
|
|
return (
|
|
`if [ -f requirements.txt ]; then ` +
|
|
`VBASE="${venvBaseDir}"; ` +
|
|
`RHASH=$(sha256sum requirements.txt 2>/dev/null | cut -d" " -f1); ` +
|
|
`if [ -n "$RHASH" ]; then ` +
|
|
`VDIR="$VBASE/$RHASH"; mkdir -p "$VBASE" 2>/dev/null; ` +
|
|
`( flock 9 || exit 0; ` +
|
|
`if [ ! -f "$VDIR/.verso-ready" ]; then ` +
|
|
`echo "Installing Python packages from requirements.txt..."; rm -rf "$VDIR"; ` +
|
|
`python3 -m venv --system-site-packages "$VDIR" ` +
|
|
`&& "$VDIR/bin/pip" install --no-input --disable-pip-version-check -r requirements.txt ` +
|
|
// Register a python3 kernelspec INSIDE the venv (argv -> the venv's python)
|
|
// so Quarto runs the kernel in the venv, not the base /usr/bin/python3 from
|
|
// the global kernelspec. ipykernel is visible via --system-site-packages.
|
|
`&& "$VDIR/bin/python3" -m ipykernel install --sys-prefix --name python3 --display-name "Python 3" ` +
|
|
`&& touch "$VDIR/.verso-ready" ` +
|
|
`|| echo "ERROR: Failed to install Python packages from requirements.txt"; ` +
|
|
`fi ` +
|
|
`) 9>"$VBASE/.$RHASH.lock" 2>&1; ` +
|
|
`if [ -f "$VDIR/.verso-ready" ]; then export QUARTO_PYTHON="$VDIR/bin/python3"; fi; ` +
|
|
`fi; ` +
|
|
`fi; `
|
|
)
|
|
}
|
|
|
|
// Write a temporary copy of the root .qmd with embed-resources enabled in its
|
|
// frontmatter, returning the temp filename to render. On any problem (no
|
|
// frontmatter, not a nested revealjs deck, read/write error) it falls back to
|
|
// the original mainFile — the export then just isn't self-contained, which is
|
|
// no worse than before. The temp file lives in the same directory so relative
|
|
// resources (images, _extensions) still resolve.
|
|
function _writeStandaloneVariant(directory, mainFile) {
|
|
try {
|
|
const content = fs.readFileSync(Path.join(directory, mainFile), 'utf8')
|
|
const transformed = _injectRevealjsStandaloneOptions(content)
|
|
if (!transformed) return mainFile
|
|
const base = mainFile.replace(/\.[^/.]+$/, '')
|
|
const tempName = `${base}.verso-standalone.qmd`
|
|
fs.writeFileSync(Path.join(directory, tempName), transformed)
|
|
return tempName
|
|
} catch (err) {
|
|
logger.warn({ err, directory, mainFile }, 'could not prepare standalone qmd')
|
|
return mainFile
|
|
}
|
|
}
|
|
|
|
// Inject the self-contained options into the `revealjs:` block of a deck's
|
|
// YAML frontmatter. embed-resources/self-contained-math inline all CSS/JS/
|
|
// images/MathJax into one portable file; chalkboard must be off (it is
|
|
// incompatible with embed-resources and would error the render). Keys already
|
|
// present in the block are overwritten in place (so we never create duplicate
|
|
// YAML keys, e.g. an existing `chalkboard: true`); missing keys are inserted.
|
|
// Returns the new document text, or null if it isn't a nested-revealjs deck we
|
|
// can safely edit.
|
|
function _injectRevealjsStandaloneOptions(content) {
|
|
const fmMatch = content.match(/^(---\r?\n)([\s\S]*?)(\r?\n---\r?\n?)/)
|
|
if (!fmMatch) return null
|
|
const [, open, body, close] = fmMatch
|
|
const lines = body.split('\n')
|
|
|
|
const revealIdx = lines.findIndex(l => /^\s*revealjs:\s*$/.test(l))
|
|
if (revealIdx === -1) return null // not a `format:\n revealjs:` deck
|
|
|
|
const revealIndent = lines[revealIdx].match(/^(\s*)/)[1]
|
|
|
|
// Determine the block's child indent (from the first more-indented line) and
|
|
// where the block ends (the first later line indented at/under revealjs:).
|
|
let childIndent = revealIndent + ' '
|
|
let blockEnd = lines.length
|
|
let seenChild = false
|
|
for (let i = revealIdx + 1; i < lines.length; i++) {
|
|
if (lines[i].trim() === '') continue
|
|
const indent = lines[i].match(/^(\s*)/)[1]
|
|
if (indent.length <= revealIndent.length) {
|
|
blockEnd = i
|
|
break
|
|
}
|
|
if (!seenChild) {
|
|
childIndent = indent
|
|
seenChild = true
|
|
}
|
|
}
|
|
|
|
const desired = {
|
|
'embed-resources': 'true',
|
|
'self-contained-math': 'true',
|
|
chalkboard: 'false',
|
|
}
|
|
|
|
const present = new Set()
|
|
for (let i = revealIdx + 1; i < blockEnd; i++) {
|
|
const km = lines[i].match(/^\s*([A-Za-z0-9_-]+):/)
|
|
if (km && Object.prototype.hasOwnProperty.call(desired, km[1])) {
|
|
lines[i] = `${childIndent}${km[1]}: ${desired[km[1]]}`
|
|
present.add(km[1])
|
|
}
|
|
}
|
|
|
|
const additions = Object.keys(desired)
|
|
.filter(k => !present.has(k))
|
|
.map(k => `${childIndent}${k}: ${desired[k]}`)
|
|
if (additions.length) lines.splice(revealIdx + 1, 0, ...additions)
|
|
|
|
return open + lines.join('\n') + close + content.slice(fmMatch[0].length)
|
|
}
|
|
|
|
function _writeLogOutput(compileName, directory, output, callback) {
|
|
const content = (output && output.stdout) || ''
|
|
if (!content) return callback()
|
|
// Write to output.log so the PDF-preview log panel picks it up
|
|
const logFile = Path.join(directory, 'output.log')
|
|
fs.unlink(logFile, () => {
|
|
fs.writeFile(logFile, content, { flag: 'wx' }, err => {
|
|
if (err) {
|
|
logger.error({ err, compileName, logFile }, 'error writing quarto log')
|
|
}
|
|
callback()
|
|
})
|
|
})
|
|
}
|
|
|
|
// Quarto's HTML/RevealJS output is NOT self-contained (we deliberately dropped
|
|
// --embed-resources so reveal plugins like chalkboard work). A side effect is
|
|
// that pandoc no longer tries to fetch referenced media, so a missing image or
|
|
// video produces no compile-time warning — it just renders broken in the
|
|
// browser. To restore that feedback, scan the produced output.html for local
|
|
// media references and emit a [WARNING] for any that don't exist on disk. The
|
|
// [WARNING] prefix is understood by the Quarto/Typst log parser on the web
|
|
// side, so these surface in the Warnings tab like any other.
|
|
//
|
|
// Only HTML output is scanned: PDF output (Typst) already hard-errors on a
|
|
// missing image, so it needs no extra check.
|
|
function _appendMissingResourceWarnings(directory, callback) {
|
|
const htmlFile = Path.join(directory, 'output.html')
|
|
fs.readFile(htmlFile, 'utf8', (err, html) => {
|
|
if (err) return callback() // no HTML output (e.g. a PDF compile)
|
|
|
|
const missing = _extractLocalMediaRefs(html).filter(ref => {
|
|
try {
|
|
return !fs.existsSync(Path.join(directory, decodeURIComponent(ref)))
|
|
} catch {
|
|
return false
|
|
}
|
|
})
|
|
if (missing.length === 0) return callback()
|
|
|
|
const warnings =
|
|
missing
|
|
.map(
|
|
ref =>
|
|
`[WARNING] Missing resource: ${ref} (referenced in the document ` +
|
|
`but not found in the project — it will appear broken)`
|
|
)
|
|
.join('\n') + '\n'
|
|
fs.appendFile(Path.join(directory, 'output.log'), '\n' + warnings, () =>
|
|
callback()
|
|
)
|
|
})
|
|
}
|
|
|
|
// Pull local media references (img/video/audio/iframe src, poster, RevealJS
|
|
// data-background-*) out of the rendered HTML. External URLs, data URIs and
|
|
// in-page anchors are ignored; Quarto's own generated assets (under
|
|
// <basename>_files/) exist on disk, so they never get flagged.
|
|
function _extractLocalMediaRefs(html) {
|
|
const refs = new Set()
|
|
const attrRegex =
|
|
/(?:src|poster|data-background-image|data-background-video)\s*=\s*["']([^"']+)["']/gi
|
|
let match
|
|
while ((match = attrRegex.exec(html)) !== null) {
|
|
const url = match[1].trim()
|
|
if (!url) continue
|
|
// Skip absolute URLs, protocol-relative, data/blob URIs and anchors.
|
|
if (/^(?:[a-z]+:|\/\/|\/|#|data:|blob:)/i.test(url)) continue
|
|
const clean = url.split(/[?#]/)[0] // drop query string / fragment
|
|
if (clean) refs.add(clean)
|
|
}
|
|
return [...refs]
|
|
}
|
|
|
|
function isRunning(compileName) {
|
|
return ProcessTable[compileName] != null
|
|
}
|
|
|
|
function killQuarto(compileName, callback) {
|
|
logger.debug({ compileName }, 'killing running quarto compile')
|
|
if (!isRunning(compileName)) {
|
|
logger.warn({ compileName }, 'no such compile to kill')
|
|
return callback(null)
|
|
}
|
|
CommandRunner.kill(ProcessTable[compileName], callback)
|
|
}
|
|
|
|
export default {
|
|
isRunning,
|
|
runQuarto,
|
|
killQuarto,
|
|
promises: {
|
|
runQuarto: promisify(runQuarto),
|
|
killQuarto: promisify(killQuarto),
|
|
},
|
|
}
|