From c9727a26e4bb379ca74f61bc85a95819c93f26fe Mon Sep 17 00:00:00 2001 From: claude Date: Tue, 2 Jun 2026 14:19:01 +0000 Subject: [PATCH] Python deps: smart missing-package hint + switch to .vrf requirements file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Option A: when a {python} cell fails with ModuleNotFoundError/ImportError, the log now suggests the exact PyPI package to add (with a module->package map, e.g. cv2 -> opencv-python, sklearn -> scikit-learn), names the Verso requirements file, and notes it could instead be a local module — so the langmuirthermalstudy case isn't mistaken for a PyPI package. Switch the per-project requirements file from requirements.txt to a Verso- specific requirements.vrf (so it won't be confused with arbitrary .txt files); QuartoRunner now looks for requirements.vrf, and 'vrf' is registered as an editable text extension. The dedicated in-UI editor (and hiding it from the file tree) follows in a separate change. Co-Authored-By: Claude Opus 4.8 --- docs/python-dependencies-design.md | 12 +++--- services/clsi/app/js/QuartoRunner.js | 14 +++---- services/web/config/settings.defaults.js | 1 + .../js/ide/log-parser/quarto-log-parser.ts | 37 ++++++++++++++++--- 4 files changed, 45 insertions(+), 19 deletions(-) diff --git a/docs/python-dependencies-design.md b/docs/python-dependencies-design.md index 28695af792..2a17534e5f 100644 --- a/docs/python-dependencies-design.md +++ b/docs/python-dependencies-design.md @@ -7,7 +7,7 @@ beyond the curated base set. ## What ships in Phase 1 -- A project root `requirements.txt` is installed into a venv cached by its +- A project root `requirements.vrf` is installed into a venv cached by its sha256, created with `python3 -m venv --system-site-packages`; `QuartoRunner` points Quarto at it via `QUARTO_PYTHON`. A per-hash `flock` serialises concurrent builds; pip output is merged into `output.log`; on failure the @@ -21,7 +21,7 @@ beyond the curated base set. ### Known Phase-1 limitations -- The first build of a heavy `requirements.txt` runs within the compile +- The first build of a heavy `requirements.vrf` runs within the compile timeout; a very large install can be killed and retried next compile (the venv is only marked complete on success). - No egress restriction yet (Phase 2) — installs reach PyPI directly. @@ -47,15 +47,15 @@ security decision, not just a convenience. ## Mechanism -1. **Declaration.** A standard `requirements.txt` at the project root opts the +1. **Declaration.** A standard `requirements.vrf` at the project root opts the project in (familiar, Quarto-agnostic, supports version pinning). -2. **Keying.** CLSI hashes `sha256(requirements.txt + python version)`. The hash +2. **Keying.** CLSI hashes `sha256(requirements.vrf + python version)`. The hash names a venv directory on a **persistent volume**, e.g. `…/data/python-venvs//`. Identical dependency sets share one venv across projects and compiles. 3. **Build-if-missing.** `python3 -m venv --system-site-packages ` (so the bundled stack stays visible and only the *extra* deps are installed — smaller - and faster), then `/bin/pip install -r requirements.txt`. Guard with a + and faster), then `/bin/pip install -r requirements.vrf`. Guard with a per-hash `flock` so concurrent compiles don't build the same venv twice. 4. **Point Quarto at it.** Set `QUARTO_PYTHON=/bin/python3` in the render environment (threaded web → CLSI exactly like `exportMode`). With @@ -93,7 +93,7 @@ security decision, not just a convenience. ## Open decisions -- `requirements.txt` vs a frontmatter field vs both? +- `requirements.vrf` vs a frontmatter field vs both? - Shared global venv volume vs per-user namespacing (sharing is cheaper; per-user is stricter isolation)? - Allow native/compiled wheels (broader support) vs wheels-only/no-build diff --git a/services/clsi/app/js/QuartoRunner.js b/services/clsi/app/js/QuartoRunner.js index 4f2cc9cc57..e34ac94b9c 100644 --- a/services/clsi/app/js/QuartoRunner.js +++ b/services/clsi/app/js/QuartoRunner.js @@ -27,7 +27,7 @@ function runQuarto(compileName, options, callback) { } // Where cached per-project venvs live (shared across projects, keyed by the - // requirements.txt hash). Must be on a persistent volume in production. + // requirements.vrf hash). Must be on a persistent volume in production. const venvBaseDir = process.env.PYTHON_VENVS_DIR || '/var/lib/overleaf/data/python-venvs' const command = _buildQuartoCommand( @@ -148,7 +148,7 @@ function _buildQuartoCommand( } // Shell snippet (run before `quarto render`, in the compile dir) that installs -// a project's requirements.txt into a venv cached by the file's sha256 and +// a project's requirements.vrf into a venv cached by the file's sha256 and // points Quarto at it via QUARTO_PYTHON. Notes: // - The venv is shared across projects/compiles (keyed by content hash), so // identical dependency sets are built once. @@ -162,22 +162,22 @@ function _buildQuartoCommand( // JS template interpolation; only ${venvBaseDir} is substituted by JS. function _pythonVenvPrep(venvBaseDir) { return ( - `if [ -f requirements.txt ]; then ` + + `if [ -f requirements.vrf ]; then ` + `VBASE="${venvBaseDir}"; ` + - `RHASH=$(sha256sum requirements.txt 2>/dev/null | cut -d" " -f1); ` + + `RHASH=$(sha256sum requirements.vrf 2>/dev/null | cut -d" " -f1); ` + `if [ -n "$RHASH" ]; then ` + `VDIR="$VBASE/$RHASH"; mkdir -p "$VBASE" 2>/dev/null; ` + `( flock 9 || exit 0; ` + `if [ ! -f "$VDIR/.verso-ready" ]; then ` + - `echo "Installing Python packages from requirements.txt..."; rm -rf "$VDIR"; ` + + `echo "Installing Python packages from requirements.vrf..."; rm -rf "$VDIR"; ` + `python3 -m venv --system-site-packages "$VDIR" ` + - `&& "$VDIR/bin/pip" install --no-input --disable-pip-version-check -r requirements.txt ` + + `&& "$VDIR/bin/pip" install --no-input --disable-pip-version-check -r requirements.vrf ` + // Register a python3 kernelspec INSIDE the venv (argv -> the venv's python) // so Quarto runs the kernel in the venv, not the base /usr/bin/python3 from // the global kernelspec. ipykernel is visible via --system-site-packages. `&& "$VDIR/bin/python3" -m ipykernel install --sys-prefix --name python3 --display-name "Python 3" ` + `&& touch "$VDIR/.verso-ready" ` + - `|| echo "ERROR: Failed to install Python packages from requirements.txt"; ` + + `|| echo "ERROR: Failed to install Python packages from requirements.vrf"; ` + `fi ` + `) 9>"$VBASE/.$RHASH.lock" 2>&1; ` + `if [ -f "$VDIR/.verso-ready" ]; then export QUARTO_PYTHON="$VDIR/bin/python3"; fi; ` + diff --git a/services/web/config/settings.defaults.js b/services/web/config/settings.defaults.js index 317eaca13f..57234cb710 100644 --- a/services/web/config/settings.defaults.js +++ b/services/web/config/settings.defaults.js @@ -56,6 +56,7 @@ const defaultTextExtensions = [ 'rmd', 'qmd', 'typ', + 'vrf', // Verso requirements file (Python deps for Quarto venvs) 'lua', 'py', 'gv', diff --git a/services/web/frontend/js/ide/log-parser/quarto-log-parser.ts b/services/web/frontend/js/ide/log-parser/quarto-log-parser.ts index a6bac19ad9..a1dbdfada0 100644 --- a/services/web/frontend/js/ide/log-parser/quarto-log-parser.ts +++ b/services/web/frontend/js/ide/log-parser/quarto-log-parser.ts @@ -37,6 +37,25 @@ const R_QUITTING_REGEX = /^Quitting from lines? (\d+)(?:-\d+)?\s*(?:\(([^)]+)\)) // ImportError: No module named scipy const PY_MODULE_REGEX = /^(?:ModuleNotFoundError|ImportError): No module named ['"]?([\w.]+)['"]?/ +// Import (module) name -> PyPI package name, for the common cases where they +// differ. Anything not listed defaults to the module name itself. +const PY_MODULE_TO_PACKAGE: Record = { + cv2: 'opencv-python', + sklearn: 'scikit-learn', + skimage: 'scikit-image', + PIL: 'Pillow', + yaml: 'PyYAML', + bs4: 'beautifulsoup4', + Crypto: 'pycryptodome', + OpenSSL: 'pyOpenSSL', + dateutil: 'python-dateutil', + dotenv: 'python-dotenv', + serial: 'pyserial', + usb: 'pyusb', + cairo: 'pycairo', + gi: 'PyGObject', + win32com: 'pywin32', +} // A typst diagnostic location line: ` ┌─ main.typ:5:10` / ` --> main.typ:5:10` const TYPST_LOCATION_REGEX = /(?:[┌╭]─|-->)\s*(.+?):(\d+):(\d+)/ @@ -77,17 +96,23 @@ export default function parseQuartoLog(rawLog: string): ParseResult { // through as an opaque error (or not be surfaced at all). const pyModule = trimmed.match(PY_MODULE_REGEX) if (pyModule) { - const pkg = pyModule[1] + const moduleName = pyModule[1] + // Suggest the PyPI package for the top-level module (cv2 -> opencv-python). + const topLevel = moduleName.split('.')[0] + const suggestion = PY_MODULE_TO_PACKAGE[topLevel] || topLevel data.push({ line: pendingLocation.line ?? null, file: pendingLocation.file, level: 'error', - message: `Python package "${pkg}" is not installed on the server`, + message: `Python module "${moduleName}" is not available`, content: - `${clean}\n\nThe Python package "${pkg}" is not available in the ` + - `compile environment. Common scientific packages (numpy, pandas, ` + - `scipy, matplotlib, seaborn, scikit-learn, sympy, plotly) are ` + - `pre-installed; others must be added to the server image.`, + `${clean}\n\n` + + `If "${topLevel}" is a PyPI package, add \`${suggestion}\` to your ` + + `Verso requirements file (requirements.vrf) and recompile as the ` + + `project owner or a collaborator. If it is your own module, add its ` + + `.py file(s) to the project instead.\n` + + `Pre-installed: numpy, pandas, scipy, matplotlib, seaborn, ` + + `scikit-learn, sympy, plotly, tabulate, opencv-python (cv2), tqdm.`, raw: clean, }) pendingLocation = {}