Add cv2/tqdm to base; implement per-project Python venvs (Design B, Phase 1)
Build and Deploy Verso / deploy (push) Successful in 17m0s

Base image: add opencv-python-headless (cv2) and tqdm to the bundled
scientific stack, and python3-venv (needed to build per-project venvs).

Per-project dependencies: a project's requirements.txt is now installed into a
venv cached by its sha256 (python3 -m venv --system-site-packages, so the
bundled stack stays visible and only extra packages are installed); QuartoRunner
points Quarto at it via QUARTO_PYTHON. A per-hash flock serialises concurrent
builds; pip output is merged into output.log; on failure the render falls back
to the base interpreter. Venvs live under PYTHON_VENVS_DIR
(default /var/lib/overleaf/data/python-venvs).

Gating: PythonVenvGate.userCanInstallPython restricts installs to the project
owner + invited collaborators (ignorePublicAccess excludes anonymous/link
users), threaded to CLSI as allowPythonInstall on the editor compile,
presentation export, and publish paths. Behind OVERLEAF_ENABLE_PROJECT_PYTHON_VENV
(enabled in the deployment). Design doc updated; Phase 2 (egress policy) and
Phase 3 (venv eviction) remain.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude
2026-06-02 13:14:47 +00:00
parent 8b9fe4e760
commit 83b6b323c3
12 changed files with 152 additions and 8 deletions
+6
View File
@@ -290,6 +290,12 @@ jobs:
# need OVERLEAF_ALLOW_PUBLIC_ACCESS above.
- name: OVERLEAF_ALLOW_ANONYMOUS_READ_AND_WRITE_SHARING
value: "true"
# Let Quarto Python cells use a project's requirements.txt:
# the compiler installs it into a cached venv. Gated to the
# project owner + invited collaborators (never anonymous /
# link-sharing users).
- name: OVERLEAF_ENABLE_PROJECT_PYTHON_VENV
value: "true"
---
apiVersion: v1
kind: Service
+26 -2
View File
@@ -1,7 +1,31 @@
# Design: per-project Python dependencies (cached virtualenv)
Status: **proposal** (not yet implemented). Captures the agreed plan for letting
Quarto `{python}` cells use libraries beyond the curated base set.
Status: **Phase 1 implemented** (gated behind `OVERLEAF_ENABLE_PROJECT_PYTHON_VENV`,
on in the deployment). Network egress policy and venv eviction (Phases 23)
remain. Captures the plan for letting Quarto `{python}` cells use libraries
beyond the curated base set.
## What ships in Phase 1
- A project root `requirements.txt` is installed into a venv cached by its
sha256, created with `python3 -m venv --system-site-packages`; `QuartoRunner`
points Quarto at it via `QUARTO_PYTHON`. A per-hash `flock` serialises
concurrent builds; pip output is merged into `output.log`; on failure the
render falls back to the base interpreter (and the missing-package message
surfaces). Venvs live under `PYTHON_VENVS_DIR`
(default `/var/lib/overleaf/data/python-venvs`).
- Gated by `userCanInstallPython` (`PythonVenvGate.mjs`) to the project owner +
invited collaborators (any role) — never anonymous / link-sharing users —
threaded to CLSI as `allowPythonInstall` on the editor compile, presentation
export, and publish paths.
### Known Phase-1 limitations
- The first build of a heavy `requirements.txt` runs within the compile
timeout; a very large install can be killed and retried next compile (the
venv is only marked complete on success).
- No egress restriction yet (Phase 2) — installs reach PyPI directly.
- No eviction yet (Phase 3) — venvs accumulate under `PYTHON_VENVS_DIR`.
## Background
+8 -4
View File
@@ -81,17 +81,21 @@ RUN mkdir -p /opt/quarto-extensions \
# managed (PEP 668), hence --break-system-packages in this controlled image.
# The runtime user (www-data) writes Jupyter's runtime/connection files under
# its HOME (/var/www/.local), which is made writable in the Quarto step above.
# python3-venv is needed so a project's requirements.txt can be installed into
# a per-project venv (see QuartoRunner / PythonVenvGate).
RUN apt-get update \
&& apt-get install -y python3-pip \
&& apt-get install -y python3-pip python3-venv \
&& pip3 install --no-cache-dir --break-system-packages \
jupyter-core jupyter-client nbclient nbformat ipykernel pyyaml \
&& python3 -m ipykernel install --prefix /usr/local --name python3 --display-name "Python 3" \
# Bundle the common scientific-Python stack so most decks "just work" without
# any per-project install. matplotlib renders headless (Agg) automatically.
# To add more later, append to this list (it is the cheapest way to cover a
# library many projects need).
# any per-project install. matplotlib renders headless (Agg) automatically;
# opencv-python-headless is the GUI-less OpenCV build (provides cv2) suited to
# a server. To add more later, append to this list (the cheapest way to cover
# a library many projects need).
&& pip3 install --no-cache-dir --break-system-packages \
numpy pandas scipy matplotlib seaborn scikit-learn sympy plotly tabulate \
opencv-python-headless tqdm \
&& rm -rf /var/lib/apt/lists/* /root/.cache
# Install decktape + headless Chromium (for exporting RevealJS decks to PDF)
+1
View File
@@ -248,6 +248,7 @@ async function doCompile(request, stats, timings) {
compileGroup: request.compileGroup,
stopOnFirstError: request.stopOnFirstError,
exportMode: request.exportMode,
allowPythonInstall: request.allowPythonInstall,
stats,
timings,
})
+54 -2
View File
@@ -26,7 +26,16 @@ function runQuarto(compileName, options, callback) {
renderTarget = _writeStandaloneVariant(directory, mainFile)
}
const command = _buildQuartoCommand(renderTarget, options.exportMode)
// Where cached per-project venvs live (shared across projects, keyed by the
// requirements.txt hash). Must be on a persistent volume in production.
const venvBaseDir =
process.env.PYTHON_VENVS_DIR || '/var/lib/overleaf/data/python-venvs'
const command = _buildQuartoCommand(
renderTarget,
options.exportMode,
Boolean(options.allowPythonInstall),
venvBaseDir
)
ProcessTable[compileName] = CommandRunner.run(
compileName,
@@ -60,7 +69,12 @@ function runQuarto(compileName, options, callback) {
)
}
function _buildQuartoCommand(renderTarget, exportMode) {
function _buildQuartoCommand(
renderTarget,
exportMode,
allowPythonInstall,
venvBaseDir
) {
// Run through a POSIX shell so stderr is merged into stdout (2>&1).
// LocalCommandRunner replaces $COMPILE_DIR before the shell sees it.
//
@@ -121,15 +135,53 @@ function _buildQuartoCommand(renderTarget, exportMode) {
? `; rm -rf ${baseName}.qmd ${baseName}_files`
: ''
const venvPrep = allowPythonInstall ? _pythonVenvPrep(venvBaseDir) : ''
const cmd =
`mkdir -p _extensions && ` +
`cp -rn /opt/quarto-extensions/_extensions/. _extensions/ 2>/dev/null; ` +
venvPrep +
`quarto render ${inputPath} 2>&1 && ` +
tail +
cleanup
return ['/bin/sh', '-c', cmd]
}
// Shell snippet (run before `quarto render`, in the compile dir) that installs
// a project's requirements.txt into a venv cached by the file's sha256 and
// points Quarto at it via QUARTO_PYTHON. Notes:
// - The venv is shared across projects/compiles (keyed by content hash), so
// identical dependency sets are built once.
// - --system-site-packages keeps the bundled scientific stack + ipykernel
// visible, so only the *extra* packages are installed.
// - A per-hash flock serialises concurrent compiles building the same venv.
// - Everything is merged to stdout so pip output/errors land in output.log;
// on failure QUARTO_PYTHON is left unset and the render falls back to the
// base interpreter (the missing-package error then surfaces normally).
// - Only $-shell vars / $(...) are used (no ${...}) to avoid clashing with
// JS template interpolation; only ${venvBaseDir} is substituted by JS.
function _pythonVenvPrep(venvBaseDir) {
return (
`if [ -f requirements.txt ]; then ` +
`VBASE="${venvBaseDir}"; ` +
`RHASH=$(sha256sum requirements.txt 2>/dev/null | cut -d" " -f1); ` +
`if [ -n "$RHASH" ]; then ` +
`VDIR="$VBASE/$RHASH"; mkdir -p "$VBASE" 2>/dev/null; ` +
`( flock 9 || exit 0; ` +
`if [ ! -f "$VDIR/.verso-complete" ]; then ` +
`echo "Installing Python packages from requirements.txt..."; rm -rf "$VDIR"; ` +
`python3 -m venv --system-site-packages "$VDIR" ` +
`&& "$VDIR/bin/pip" install --no-input --disable-pip-version-check -r requirements.txt ` +
`&& touch "$VDIR/.verso-complete" ` +
`|| echo "ERROR: Failed to install Python packages from requirements.txt"; ` +
`fi ` +
`) 9>"$VBASE/.$RHASH.lock" 2>&1; ` +
`if [ -f "$VDIR/.verso-complete" ]; then export QUARTO_PYTHON="$VDIR/bin/python3"; fi; ` +
`fi; ` +
`fi; `
)
}
// Write a temporary copy of the root .qmd with embed-resources enabled in its
// frontmatter, returning the temp filename to render. On any problem (no
// frontmatter, not a nested revealjs deck, read/write error) it falls back to
+7
View File
@@ -109,6 +109,13 @@ function parse(body, callback) {
compile.options.exportMode,
{ default: '', type: 'string' }
)
// Verso: whether QuartoRunner may install the project's requirements.txt
// into a cached venv (gated by privilege on the web side).
response.allowPythonInstall = _parseAttribute(
'allowPythonInstall',
compile.options.allowPythonInstall,
{ default: false, type: 'boolean' }
)
response.flags = _parseAttribute('flags', compile.options.flags, {
default: [],
type: 'object',
@@ -1149,6 +1149,7 @@ function _finaliseRequest(projectId, options, project, docs, files) {
draft: Boolean(options.draft),
stopOnFirstError: Boolean(options.stopOnFirstError),
exportMode: options.exportMode,
allowPythonInstall: Boolean(options.allowPythonInstall),
check: options.check,
syncType: options.syncType,
syncState: options.syncState,
@@ -7,6 +7,7 @@ import logger from '@overleaf/logger'
import Settings from '@overleaf/settings'
import Errors from '../Errors/Errors.js'
import SessionManager from '../Authentication/SessionManager.mjs'
import { userCanInstallPython } from './PythonVenvGate.mjs'
import { RateLimiter } from '../../infrastructure/RateLimiter.mjs'
import Validation from '../../infrastructure/Validation.mjs'
import Path from 'node:path'
@@ -201,6 +202,11 @@ const _CompileController = {
options.incrementalCompilesEnabled = true
}
// Allow building a per-project Python venv from requirements.txt only for
// the project owner and invited collaborators — never anonymous or
// link-sharing users.
options.allowPythonInstall = await userCanInstallPython(userId, projectId)
let {
enablePdfCaching,
pdfCachingMinChunkSize,
@@ -6,6 +6,7 @@ import SessionManager from '../Authentication/SessionManager.mjs'
import CompileManager from './CompileManager.mjs'
import ClsiManager from './ClsiManager.mjs'
import ProjectGetter from '../Project/ProjectGetter.mjs'
import { userCanInstallPython } from './PythonVenvGate.mjs'
// On-demand export of a RevealJS deck from the editor's download menu.
// - html → a single self-contained .html (embed-resources)
@@ -61,6 +62,7 @@ async function exportPresentation(req, res) {
await CompileManager.promises.compile(projectId, userId, {
exportMode: format.exportMode,
bypassRecentCompileCheck: true,
allowPythonInstall: await userCanInstallPython(userId, projectId),
})
if (!buildId || !outputFiles?.some(f => f.path === format.file)) {
@@ -0,0 +1,33 @@
import Settings from '@overleaf/settings'
import logger from '@overleaf/logger'
import AuthorizationManager from '../Authorization/AuthorizationManager.mjs'
// Whether this user may have the compiler install a project's requirements.txt
// into a cached venv (so Quarto's Python cells can use libraries beyond the
// bundled base set). Gated to the project owner + invited collaborators (any
// role): ignorePublicAccess excludes link-sharing/public and anonymous users,
// who fall back to the base Python interpreter. Returns false when the feature
// is disabled or the privilege check fails.
export async function userCanInstallPython(userId, projectId) {
if (!Settings.enableProjectPythonVenv) {
return false
}
try {
const privilegeLevel =
await AuthorizationManager.promises.getPrivilegeLevelForProject(
userId,
projectId,
null,
{ ignorePublicAccess: true }
)
return Boolean(privilegeLevel)
} catch (err) {
logger.warn(
{ err, projectId, userId },
'could not determine python install privilege; defaulting to false'
)
return false
}
}
export default { userCanInstallPython }
@@ -8,6 +8,7 @@ import { fetchStream } from '@overleaf/fetch-utils'
import { callbackify } from 'node:util'
import CompileManager from '../Compile/CompileManager.mjs'
import { getOutputFileURL } from '../Compile/ClsiURLHelpers.mjs'
import { userCanInstallPython } from '../Compile/PythonVenvGate.mjs'
import { PublishedPresentation } from '../../models/PublishedPresentation.mjs'
import Errors from '../Errors/Errors.js'
@@ -56,6 +57,7 @@ async function publish(projectId, userId) {
const { status, outputFiles, clsiServerId, buildId } =
await CompileManager.promises.compile(projectId, userId, {
bypassRecentCompileCheck: true,
allowPythonInstall: await userCanInstallPython(userId, projectId),
})
if (!outputFiles?.some(f => f.path === 'output.html')) {
+6
View File
@@ -478,6 +478,12 @@ module.exports = {
: 'quarto',
enableSubscriptions: false,
restrictedCountries: [],
// When true, a project's requirements.txt is installed into a cached venv so
// Quarto's Python cells can use libraries beyond the bundled base set. Gated
// in CompileController to the project owner + invited collaborators only.
enableProjectPythonVenv:
process.env.OVERLEAF_ENABLE_PROJECT_PYTHON_VENV === 'true',
enableOnboardingEmails: process.env.ENABLE_ONBOARDING_EMAILS === 'true',
enabledLinkedFileTypes: (process.env.ENABLED_LINKED_FILE_TYPES || '').split(