Add cv2/tqdm to base; implement per-project Python venvs (Design B, Phase 1)
Build and Deploy Verso / deploy (push) Successful in 17m0s
Build and Deploy Verso / deploy (push) Successful in 17m0s
Base image: add opencv-python-headless (cv2) and tqdm to the bundled scientific stack, and python3-venv (needed to build per-project venvs). Per-project dependencies: a project's requirements.txt is now installed into a venv cached by its sha256 (python3 -m venv --system-site-packages, so the bundled stack stays visible and only extra packages are installed); QuartoRunner points Quarto at it via QUARTO_PYTHON. A per-hash flock serialises concurrent builds; pip output is merged into output.log; on failure the render falls back to the base interpreter. Venvs live under PYTHON_VENVS_DIR (default /var/lib/overleaf/data/python-venvs). Gating: PythonVenvGate.userCanInstallPython restricts installs to the project owner + invited collaborators (ignorePublicAccess excludes anonymous/link users), threaded to CLSI as allowPythonInstall on the editor compile, presentation export, and publish paths. Behind OVERLEAF_ENABLE_PROJECT_PYTHON_VENV (enabled in the deployment). Design doc updated; Phase 2 (egress policy) and Phase 3 (venv eviction) remain. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -290,6 +290,12 @@ jobs:
|
||||
# need OVERLEAF_ALLOW_PUBLIC_ACCESS above.
|
||||
- name: OVERLEAF_ALLOW_ANONYMOUS_READ_AND_WRITE_SHARING
|
||||
value: "true"
|
||||
# Let Quarto Python cells use a project's requirements.txt:
|
||||
# the compiler installs it into a cached venv. Gated to the
|
||||
# project owner + invited collaborators (never anonymous /
|
||||
# link-sharing users).
|
||||
- name: OVERLEAF_ENABLE_PROJECT_PYTHON_VENV
|
||||
value: "true"
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
|
||||
@@ -1,7 +1,31 @@
|
||||
# Design: per-project Python dependencies (cached virtualenv)
|
||||
|
||||
Status: **proposal** (not yet implemented). Captures the agreed plan for letting
|
||||
Quarto `{python}` cells use libraries beyond the curated base set.
|
||||
Status: **Phase 1 implemented** (gated behind `OVERLEAF_ENABLE_PROJECT_PYTHON_VENV`,
|
||||
on in the deployment). Network egress policy and venv eviction (Phases 2–3)
|
||||
remain. Captures the plan for letting Quarto `{python}` cells use libraries
|
||||
beyond the curated base set.
|
||||
|
||||
## What ships in Phase 1
|
||||
|
||||
- A project root `requirements.txt` is installed into a venv cached by its
|
||||
sha256, created with `python3 -m venv --system-site-packages`; `QuartoRunner`
|
||||
points Quarto at it via `QUARTO_PYTHON`. A per-hash `flock` serialises
|
||||
concurrent builds; pip output is merged into `output.log`; on failure the
|
||||
render falls back to the base interpreter (and the missing-package message
|
||||
surfaces). Venvs live under `PYTHON_VENVS_DIR`
|
||||
(default `/var/lib/overleaf/data/python-venvs`).
|
||||
- Gated by `userCanInstallPython` (`PythonVenvGate.mjs`) to the project owner +
|
||||
invited collaborators (any role) — never anonymous / link-sharing users —
|
||||
threaded to CLSI as `allowPythonInstall` on the editor compile, presentation
|
||||
export, and publish paths.
|
||||
|
||||
### Known Phase-1 limitations
|
||||
|
||||
- The first build of a heavy `requirements.txt` runs within the compile
|
||||
timeout; a very large install can be killed and retried next compile (the
|
||||
venv is only marked complete on success).
|
||||
- No egress restriction yet (Phase 2) — installs reach PyPI directly.
|
||||
- No eviction yet (Phase 3) — venvs accumulate under `PYTHON_VENVS_DIR`.
|
||||
|
||||
## Background
|
||||
|
||||
|
||||
@@ -81,17 +81,21 @@ RUN mkdir -p /opt/quarto-extensions \
|
||||
# managed (PEP 668), hence --break-system-packages in this controlled image.
|
||||
# The runtime user (www-data) writes Jupyter's runtime/connection files under
|
||||
# its HOME (/var/www/.local), which is made writable in the Quarto step above.
|
||||
# python3-venv is needed so a project's requirements.txt can be installed into
|
||||
# a per-project venv (see QuartoRunner / PythonVenvGate).
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y python3-pip \
|
||||
&& apt-get install -y python3-pip python3-venv \
|
||||
&& pip3 install --no-cache-dir --break-system-packages \
|
||||
jupyter-core jupyter-client nbclient nbformat ipykernel pyyaml \
|
||||
&& python3 -m ipykernel install --prefix /usr/local --name python3 --display-name "Python 3" \
|
||||
# Bundle the common scientific-Python stack so most decks "just work" without
|
||||
# any per-project install. matplotlib renders headless (Agg) automatically.
|
||||
# To add more later, append to this list (it is the cheapest way to cover a
|
||||
# library many projects need).
|
||||
# any per-project install. matplotlib renders headless (Agg) automatically;
|
||||
# opencv-python-headless is the GUI-less OpenCV build (provides cv2) suited to
|
||||
# a server. To add more later, append to this list (the cheapest way to cover
|
||||
# a library many projects need).
|
||||
&& pip3 install --no-cache-dir --break-system-packages \
|
||||
numpy pandas scipy matplotlib seaborn scikit-learn sympy plotly tabulate \
|
||||
opencv-python-headless tqdm \
|
||||
&& rm -rf /var/lib/apt/lists/* /root/.cache
|
||||
|
||||
# Install decktape + headless Chromium (for exporting RevealJS decks to PDF)
|
||||
|
||||
@@ -248,6 +248,7 @@ async function doCompile(request, stats, timings) {
|
||||
compileGroup: request.compileGroup,
|
||||
stopOnFirstError: request.stopOnFirstError,
|
||||
exportMode: request.exportMode,
|
||||
allowPythonInstall: request.allowPythonInstall,
|
||||
stats,
|
||||
timings,
|
||||
})
|
||||
|
||||
@@ -26,7 +26,16 @@ function runQuarto(compileName, options, callback) {
|
||||
renderTarget = _writeStandaloneVariant(directory, mainFile)
|
||||
}
|
||||
|
||||
const command = _buildQuartoCommand(renderTarget, options.exportMode)
|
||||
// Where cached per-project venvs live (shared across projects, keyed by the
|
||||
// requirements.txt hash). Must be on a persistent volume in production.
|
||||
const venvBaseDir =
|
||||
process.env.PYTHON_VENVS_DIR || '/var/lib/overleaf/data/python-venvs'
|
||||
const command = _buildQuartoCommand(
|
||||
renderTarget,
|
||||
options.exportMode,
|
||||
Boolean(options.allowPythonInstall),
|
||||
venvBaseDir
|
||||
)
|
||||
|
||||
ProcessTable[compileName] = CommandRunner.run(
|
||||
compileName,
|
||||
@@ -60,7 +69,12 @@ function runQuarto(compileName, options, callback) {
|
||||
)
|
||||
}
|
||||
|
||||
function _buildQuartoCommand(renderTarget, exportMode) {
|
||||
function _buildQuartoCommand(
|
||||
renderTarget,
|
||||
exportMode,
|
||||
allowPythonInstall,
|
||||
venvBaseDir
|
||||
) {
|
||||
// Run through a POSIX shell so stderr is merged into stdout (2>&1).
|
||||
// LocalCommandRunner replaces $COMPILE_DIR before the shell sees it.
|
||||
//
|
||||
@@ -121,15 +135,53 @@ function _buildQuartoCommand(renderTarget, exportMode) {
|
||||
? `; rm -rf ${baseName}.qmd ${baseName}_files`
|
||||
: ''
|
||||
|
||||
const venvPrep = allowPythonInstall ? _pythonVenvPrep(venvBaseDir) : ''
|
||||
|
||||
const cmd =
|
||||
`mkdir -p _extensions && ` +
|
||||
`cp -rn /opt/quarto-extensions/_extensions/. _extensions/ 2>/dev/null; ` +
|
||||
venvPrep +
|
||||
`quarto render ${inputPath} 2>&1 && ` +
|
||||
tail +
|
||||
cleanup
|
||||
return ['/bin/sh', '-c', cmd]
|
||||
}
|
||||
|
||||
// Shell snippet (run before `quarto render`, in the compile dir) that installs
|
||||
// a project's requirements.txt into a venv cached by the file's sha256 and
|
||||
// points Quarto at it via QUARTO_PYTHON. Notes:
|
||||
// - The venv is shared across projects/compiles (keyed by content hash), so
|
||||
// identical dependency sets are built once.
|
||||
// - --system-site-packages keeps the bundled scientific stack + ipykernel
|
||||
// visible, so only the *extra* packages are installed.
|
||||
// - A per-hash flock serialises concurrent compiles building the same venv.
|
||||
// - Everything is merged to stdout so pip output/errors land in output.log;
|
||||
// on failure QUARTO_PYTHON is left unset and the render falls back to the
|
||||
// base interpreter (the missing-package error then surfaces normally).
|
||||
// - Only $-shell vars / $(...) are used (no ${...}) to avoid clashing with
|
||||
// JS template interpolation; only ${venvBaseDir} is substituted by JS.
|
||||
function _pythonVenvPrep(venvBaseDir) {
|
||||
return (
|
||||
`if [ -f requirements.txt ]; then ` +
|
||||
`VBASE="${venvBaseDir}"; ` +
|
||||
`RHASH=$(sha256sum requirements.txt 2>/dev/null | cut -d" " -f1); ` +
|
||||
`if [ -n "$RHASH" ]; then ` +
|
||||
`VDIR="$VBASE/$RHASH"; mkdir -p "$VBASE" 2>/dev/null; ` +
|
||||
`( flock 9 || exit 0; ` +
|
||||
`if [ ! -f "$VDIR/.verso-complete" ]; then ` +
|
||||
`echo "Installing Python packages from requirements.txt..."; rm -rf "$VDIR"; ` +
|
||||
`python3 -m venv --system-site-packages "$VDIR" ` +
|
||||
`&& "$VDIR/bin/pip" install --no-input --disable-pip-version-check -r requirements.txt ` +
|
||||
`&& touch "$VDIR/.verso-complete" ` +
|
||||
`|| echo "ERROR: Failed to install Python packages from requirements.txt"; ` +
|
||||
`fi ` +
|
||||
`) 9>"$VBASE/.$RHASH.lock" 2>&1; ` +
|
||||
`if [ -f "$VDIR/.verso-complete" ]; then export QUARTO_PYTHON="$VDIR/bin/python3"; fi; ` +
|
||||
`fi; ` +
|
||||
`fi; `
|
||||
)
|
||||
}
|
||||
|
||||
// Write a temporary copy of the root .qmd with embed-resources enabled in its
|
||||
// frontmatter, returning the temp filename to render. On any problem (no
|
||||
// frontmatter, not a nested revealjs deck, read/write error) it falls back to
|
||||
|
||||
@@ -109,6 +109,13 @@ function parse(body, callback) {
|
||||
compile.options.exportMode,
|
||||
{ default: '', type: 'string' }
|
||||
)
|
||||
// Verso: whether QuartoRunner may install the project's requirements.txt
|
||||
// into a cached venv (gated by privilege on the web side).
|
||||
response.allowPythonInstall = _parseAttribute(
|
||||
'allowPythonInstall',
|
||||
compile.options.allowPythonInstall,
|
||||
{ default: false, type: 'boolean' }
|
||||
)
|
||||
response.flags = _parseAttribute('flags', compile.options.flags, {
|
||||
default: [],
|
||||
type: 'object',
|
||||
|
||||
@@ -1149,6 +1149,7 @@ function _finaliseRequest(projectId, options, project, docs, files) {
|
||||
draft: Boolean(options.draft),
|
||||
stopOnFirstError: Boolean(options.stopOnFirstError),
|
||||
exportMode: options.exportMode,
|
||||
allowPythonInstall: Boolean(options.allowPythonInstall),
|
||||
check: options.check,
|
||||
syncType: options.syncType,
|
||||
syncState: options.syncState,
|
||||
|
||||
@@ -7,6 +7,7 @@ import logger from '@overleaf/logger'
|
||||
import Settings from '@overleaf/settings'
|
||||
import Errors from '../Errors/Errors.js'
|
||||
import SessionManager from '../Authentication/SessionManager.mjs'
|
||||
import { userCanInstallPython } from './PythonVenvGate.mjs'
|
||||
import { RateLimiter } from '../../infrastructure/RateLimiter.mjs'
|
||||
import Validation from '../../infrastructure/Validation.mjs'
|
||||
import Path from 'node:path'
|
||||
@@ -201,6 +202,11 @@ const _CompileController = {
|
||||
options.incrementalCompilesEnabled = true
|
||||
}
|
||||
|
||||
// Allow building a per-project Python venv from requirements.txt only for
|
||||
// the project owner and invited collaborators — never anonymous or
|
||||
// link-sharing users.
|
||||
options.allowPythonInstall = await userCanInstallPython(userId, projectId)
|
||||
|
||||
let {
|
||||
enablePdfCaching,
|
||||
pdfCachingMinChunkSize,
|
||||
|
||||
@@ -6,6 +6,7 @@ import SessionManager from '../Authentication/SessionManager.mjs'
|
||||
import CompileManager from './CompileManager.mjs'
|
||||
import ClsiManager from './ClsiManager.mjs'
|
||||
import ProjectGetter from '../Project/ProjectGetter.mjs'
|
||||
import { userCanInstallPython } from './PythonVenvGate.mjs'
|
||||
|
||||
// On-demand export of a RevealJS deck from the editor's download menu.
|
||||
// - html → a single self-contained .html (embed-resources)
|
||||
@@ -61,6 +62,7 @@ async function exportPresentation(req, res) {
|
||||
await CompileManager.promises.compile(projectId, userId, {
|
||||
exportMode: format.exportMode,
|
||||
bypassRecentCompileCheck: true,
|
||||
allowPythonInstall: await userCanInstallPython(userId, projectId),
|
||||
})
|
||||
|
||||
if (!buildId || !outputFiles?.some(f => f.path === format.file)) {
|
||||
|
||||
@@ -0,0 +1,33 @@
|
||||
import Settings from '@overleaf/settings'
|
||||
import logger from '@overleaf/logger'
|
||||
import AuthorizationManager from '../Authorization/AuthorizationManager.mjs'
|
||||
|
||||
// Whether this user may have the compiler install a project's requirements.txt
|
||||
// into a cached venv (so Quarto's Python cells can use libraries beyond the
|
||||
// bundled base set). Gated to the project owner + invited collaborators (any
|
||||
// role): ignorePublicAccess excludes link-sharing/public and anonymous users,
|
||||
// who fall back to the base Python interpreter. Returns false when the feature
|
||||
// is disabled or the privilege check fails.
|
||||
export async function userCanInstallPython(userId, projectId) {
|
||||
if (!Settings.enableProjectPythonVenv) {
|
||||
return false
|
||||
}
|
||||
try {
|
||||
const privilegeLevel =
|
||||
await AuthorizationManager.promises.getPrivilegeLevelForProject(
|
||||
userId,
|
||||
projectId,
|
||||
null,
|
||||
{ ignorePublicAccess: true }
|
||||
)
|
||||
return Boolean(privilegeLevel)
|
||||
} catch (err) {
|
||||
logger.warn(
|
||||
{ err, projectId, userId },
|
||||
'could not determine python install privilege; defaulting to false'
|
||||
)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
export default { userCanInstallPython }
|
||||
@@ -8,6 +8,7 @@ import { fetchStream } from '@overleaf/fetch-utils'
|
||||
import { callbackify } from 'node:util'
|
||||
import CompileManager from '../Compile/CompileManager.mjs'
|
||||
import { getOutputFileURL } from '../Compile/ClsiURLHelpers.mjs'
|
||||
import { userCanInstallPython } from '../Compile/PythonVenvGate.mjs'
|
||||
import { PublishedPresentation } from '../../models/PublishedPresentation.mjs'
|
||||
import Errors from '../Errors/Errors.js'
|
||||
|
||||
@@ -56,6 +57,7 @@ async function publish(projectId, userId) {
|
||||
const { status, outputFiles, clsiServerId, buildId } =
|
||||
await CompileManager.promises.compile(projectId, userId, {
|
||||
bypassRecentCompileCheck: true,
|
||||
allowPythonInstall: await userCanInstallPython(userId, projectId),
|
||||
})
|
||||
|
||||
if (!outputFiles?.some(f => f.path === 'output.html')) {
|
||||
|
||||
@@ -478,6 +478,12 @@ module.exports = {
|
||||
: 'quarto',
|
||||
enableSubscriptions: false,
|
||||
restrictedCountries: [],
|
||||
|
||||
// When true, a project's requirements.txt is installed into a cached venv so
|
||||
// Quarto's Python cells can use libraries beyond the bundled base set. Gated
|
||||
// in CompileController to the project owner + invited collaborators only.
|
||||
enableProjectPythonVenv:
|
||||
process.env.OVERLEAF_ENABLE_PROJECT_PYTHON_VENV === 'true',
|
||||
enableOnboardingEmails: process.env.ENABLE_ONBOARDING_EMAILS === 'true',
|
||||
|
||||
enabledLinkedFileTypes: (process.env.ENABLED_LINKED_FILE_TYPES || '').split(
|
||||
|
||||
Reference in New Issue
Block a user