[clsi] initial version of /convert/pdf-to-jpeg (#33752)

* [monorepo] consolidate clsi-lb host/ip env-vars

Target env-var is CLSI_LB_HOST. Keep CLSI_LB_IP populated for a week.

* [clsi] initial version of /convert/pdf-to-jpeg

* [rails] use fake-secrets in CI and Codespaces

* [rails] adapt tests for using clsi to convert PDFs to image

* [rails] add rake task for comparing clsi conversion with transloadit

* [clsi] double check that output.jpg is a regular file

Co-authored-by: Brian Gough <brian.gough@overleaf.com>

* [clsi] fix composing basename

* [monorepo] fix clsi-lb host env-var post merge

* [monorepo] sort dev-environment.env hosts

* [rails] use local pdf file rather than downloading it again

Download from the old renderer code path still. It's dead code.

* [terraform] clsi: enable pdf to jpg conversion

---------

Co-authored-by: Brian Gough <brian.gough@overleaf.com>
GitOrigin-RevId: 5ecaa8559d299486340bb3961f06b29f7c4dfcca
This commit is contained in:
Jakob Ackermann
2026-06-04 12:24:10 +02:00
committed by Copybot
parent 9ec0ff065d
commit df61bfc788
14 changed files with 242 additions and 5 deletions
+2
View File
@@ -32,6 +32,7 @@ COPY libraries/overleaf-editor-core/package.json /overleaf/libraries/overleaf-ed
COPY libraries/promise-utils/package.json /overleaf/libraries/promise-utils/package.json
COPY libraries/settings/package.json /overleaf/libraries/settings/package.json
COPY libraries/stream-utils/package.json /overleaf/libraries/stream-utils/package.json
COPY libraries/validation-tools/package.json /overleaf/libraries/validation-tools/package.json
COPY services/clsi/package.json /overleaf/services/clsi/package.json
COPY .yarn/patches/ /overleaf/.yarn/patches/
@@ -45,6 +46,7 @@ COPY libraries/overleaf-editor-core/ /overleaf/libraries/overleaf-editor-core/
COPY libraries/promise-utils/ /overleaf/libraries/promise-utils/
COPY libraries/settings/ /overleaf/libraries/settings/
COPY libraries/stream-utils/ /overleaf/libraries/stream-utils/
COPY libraries/validation-tools/ /overleaf/libraries/validation-tools/
COPY services/clsi/ /overleaf/services/clsi/
FROM app AS with-texlive
+4 -2
View File
@@ -25,6 +25,7 @@ IMAGE_CACHE ?= $(IMAGE_REPO):cache-$(shell cat \
$(MONOREPO)/libraries/promise-utils/package.json \
$(MONOREPO)/libraries/settings/package.json \
$(MONOREPO)/libraries/stream-utils/package.json \
$(MONOREPO)/libraries/validation-tools/package.json \
$(MONOREPO)/services/clsi/package.json \
$(MONOREPO)/.yarn/patches/* \
| sha256sum | cut -d '-' -f1)
@@ -169,8 +170,9 @@ test_acceptance_clean:
$(DOCKER_COMPOSE_TEST_ACCEPTANCE) down -v -t 0
test_acceptance_pre_run:
docker pull us-east1-docker.pkg.dev/overleaf-ops/ol-docker/pandoc:3.9
docker pull us-east1-docker.pkg.dev/overleaf-ops/ol-docker/pandoc-staging:3.9
-docker pull us-east1-docker.pkg.dev/overleaf-ops/ol-docker/pandoc:3.9
-docker pull us-east1-docker.pkg.dev/overleaf-ops/ol-docker/pandoc-staging:3.9
-cd ../../ && docker build -t us-east1-docker.pkg.dev/overleaf-ops/ol-docker/pdftocairo:24.02 dockerfiles/pdftocairo
ifneq (,$(wildcard test/acceptance/js/scripts/pre-run))
$(DOCKER_COMPOSE_TEST_ACCEPTANCE) run $(DC_RUN_FLAGS) test_acceptance test/acceptance/js/scripts/pre-run
endif
+5
View File
@@ -145,6 +145,11 @@ app.post(
bodyParser.json({ limit: Settings.compileSizeLimit }),
ConversionController.convertProjectToDocument
)
app.post(
'/convert/pdf-to-jpeg',
FileUploadMiddleware.multerMiddleware,
ConversionController.convertPDFToJPEG
)
if (process.env.NODE_ENV === 'development' && global.__coverage__) {
app.get('/coverage', (req, res) => {
@@ -14,6 +14,7 @@ import RequestParser from './RequestParser.js'
import { pipeline } from 'node:stream/promises'
import Settings from '@overleaf/settings'
import Path from 'node:path'
import { z } from '@overleaf/validation-tools'
const CONVERSION_CONFIGS = {
docx: { extension: 'docx' },
@@ -77,6 +78,51 @@ async function convertDocumentToLaTeX(req, res) {
}
}
const PDFToJPEGQuerySchema = z.object({
mode: z.enum(['preview', 'thumbnail']),
})
async function convertPDFToJPEG(req, res) {
const { path } = req.file
if (!Settings.enablePdfConversions) {
await fs.unlink(path).catch(() => {})
return res.sendStatus(404)
}
const parsed = PDFToJPEGQuerySchema.safeParse(req.query)
if (!parsed.success) {
await fs.unlink(path).catch(() => {})
return res.sendStatus(400)
}
const { mode } = parsed.data
logger.debug({ path, mode }, 'received pdf for conversion to jpeg')
const conversionId = crypto.randomUUID()
let jpegPath
try {
jpegPath = await ConversionManager.promises.convertPDFToJPEGWithLock(
conversionId,
path,
mode
)
} finally {
await fs.unlink(path).catch(() => {})
}
try {
const jpegStat = await fs.stat(jpegPath)
res.setHeader('Content-Length', jpegStat.size)
res.attachment('output.jpg')
res.setHeader('X-Content-Type-Options', 'nosniff')
const readStream = fsSync.createReadStream(jpegPath)
await pipeline(readStream, res)
} finally {
await fs
.rm(Path.dirname(jpegPath), { recursive: true, force: true })
.catch(() => {})
}
}
async function convertProjectToDocument(req, res) {
if (!Settings.enablePandocConversions) {
return res.sendStatus(404)
@@ -207,4 +253,5 @@ async function convertProjectToDocument(req, res) {
export default {
convertDocumentToLaTeX: expressify(convertDocumentToLaTeX),
convertProjectToDocument: expressify(convertProjectToDocument),
convertPDFToJPEG: expressify(convertPDFToJPEG),
}
+79
View File
@@ -18,6 +18,18 @@ const CONVERSION_CONFIGS = {
},
}
const PDF_TO_JPEG_CONFIGS = {
preview: { width: 794, quality: 90 },
thumbnail: { width: 190, quality: 50 },
}
const PDF_TO_JPEG_INPUT_FILENAME = 'input.pdf'
const PDF_TO_JPEG_OUTPUT_FILENAME = 'output.jpg'
const PDF_TO_JPEG_OUTPUT_BASENAME = Path.basename(
PDF_TO_JPEG_OUTPUT_FILENAME,
'.jpg'
)
async function convertToLaTeXWithLock(conversionId, inputPath, conversionType) {
const conversionDir = Path.join(Settings.path.compilesDir, conversionId)
const lock = LockManager.acquire(conversionDir)
@@ -298,9 +310,76 @@ async function convertLaTeXToDocumentInDir(
return Path.join(compileDir, finalOutputName)
}
async function convertPDFToJPEGWithLock(conversionId, inputPath, mode) {
const conversionDir = Path.join(Settings.path.compilesDir, conversionId)
const lock = LockManager.acquire(conversionDir)
try {
return await convertPDFToJPEG(conversionId, conversionDir, inputPath, mode)
} finally {
lock.release()
}
}
async function convertPDFToJPEG(conversionId, conversionDir, inputPath, mode) {
const config = PDF_TO_JPEG_CONFIGS[mode]
await fs.mkdir(conversionDir, { recursive: true })
const newSourcePath = Path.join(conversionDir, PDF_TO_JPEG_INPUT_FILENAME)
await fs.copyFile(inputPath, newSourcePath)
const dstPath = Path.join(conversionDir, PDF_TO_JPEG_OUTPUT_FILENAME)
try {
const { stdout, stderr, exitCode } = await CommandRunner.promises.run(
conversionId,
[
'pdftocairo',
'-jpeg',
'-jpegopt',
`quality=${config.quality}`,
'-singlefile',
'-scale-to-x',
config.width.toString(),
'-scale-to-y',
'-1', // maintain aspect ratio
PDF_TO_JPEG_INPUT_FILENAME,
PDF_TO_JPEG_OUTPUT_BASENAME,
],
conversionDir,
Settings.pdftocairoImage,
Settings.conversionTimeoutSeconds * 1000,
{},
'conversions',
null
)
if (exitCode !== 0) {
throw new OError('Non-zero exit code from pdftocairo', {
exitCode,
stderr,
})
}
logger.debug(
{ stdout, stderr, exitCode },
'pdf-to-jpeg conversion completed'
)
const stat = await fs.lstat(dstPath)
if (!stat.isFile()) {
throw new OError('output.jpg is not a regular file', { stat })
}
// Clean up the source PDF to leave only the conversion result
await fs.unlink(newSourcePath).catch(() => {})
} catch (error) {
await fs.rm(conversionDir, { force: true, recursive: true }).catch(() => {})
throw new OError('pdf-to-jpeg conversion failed').withCause(error)
}
return dstPath
}
export default {
promises: {
convertToLaTeXWithLock,
convertLaTeXToDocumentInDirWithLock,
convertPDFToJPEGWithLock,
},
}
+1 -1
View File
@@ -1,7 +1,7 @@
clsi
--data-dirs=cache,compiles,output
--dependencies=
--env-add=DOWNLOAD_HOST=http://clsi-nginx:8080,ALLOWED_COMPILE_GROUPS=clsi-perf simple-latex-file,ENABLE_PDF_CACHING=true,PDF_CACHING_ENABLE_WORKER_POOL=true,ALLOWED_IMAGES=quay.io/sharelatex/texlive-full:2017.1 quay.io/sharelatex/texlive-full:2025.1 quay.io/sharelatex/pandoc:3.9,TEXLIVE_IMAGE=quay.io/sharelatex/texlive-full:2025.1,TEX_LIVE_IMAGE_NAME_OVERRIDE=us-east1-docker.pkg.dev/overleaf-ops/ol-docker,TEXLIVE_IMAGE_USER=tex,SANDBOXED_COMPILES=true,SANDBOXED_COMPILES_HOST_DIR_COMPILES=$PWD/compiles,SANDBOXED_COMPILES_HOST_DIR_OUTPUT=$PWD/output,ENABLE_PANDOC_CONVERSIONS=true
--env-add=DOWNLOAD_HOST=http://clsi-nginx:8080,ALLOWED_COMPILE_GROUPS=clsi-perf simple-latex-file,ENABLE_PDF_CACHING=true,PDF_CACHING_ENABLE_WORKER_POOL=true,ALLOWED_IMAGES=quay.io/sharelatex/texlive-full:2017.1 quay.io/sharelatex/texlive-full:2025.1 quay.io/sharelatex/pandoc:3.9 quay.io/sharelatex/pdftocairo:24.02,TEXLIVE_IMAGE=quay.io/sharelatex/texlive-full:2025.1,TEX_LIVE_IMAGE_NAME_OVERRIDE=us-east1-docker.pkg.dev/overleaf-ops/ol-docker,TEXLIVE_IMAGE_USER=tex,SANDBOXED_COMPILES=true,SANDBOXED_COMPILES_HOST_DIR_COMPILES=$PWD/compiles,SANDBOXED_COMPILES_HOST_DIR_OUTPUT=$PWD/output,ENABLE_PANDOC_CONVERSIONS=true,ENABLE_PDF_CONVERSIONS=true
--env-pass-through=
--esmock-loader=False
--node-version=24.14.1
@@ -31,6 +31,9 @@ module.exports = {
parseInt(process.env.CLSI_CONVERSION_TIMEOUT_SECONDS, 10) || 60,
pandocImage: process.env.PANDOC_IMAGE || 'quay.io/sharelatex/pandoc:3.9',
enablePandocConversions: process.env.ENABLE_PANDOC_CONVERSIONS === 'true',
pdftocairoImage:
process.env.PDFTOCAIRO_IMAGE || 'quay.io/sharelatex/pdftocairo:24.02',
enablePdfConversions: process.env.ENABLE_PDF_CONVERSIONS === 'true',
maxUploadSize: 50 * 1024 * 1024,
internal: {
+2 -1
View File
@@ -30,7 +30,7 @@ services:
ALLOWED_COMPILE_GROUPS: clsi-perf simple-latex-file
ENABLE_PDF_CACHING: true
PDF_CACHING_ENABLE_WORKER_POOL: true
ALLOWED_IMAGES: quay.io/sharelatex/texlive-full:2017.1 quay.io/sharelatex/texlive-full:2025.1 quay.io/sharelatex/pandoc:3.9
ALLOWED_IMAGES: quay.io/sharelatex/texlive-full:2017.1 quay.io/sharelatex/texlive-full:2025.1 quay.io/sharelatex/pandoc:3.9 quay.io/sharelatex/pdftocairo:24.02
TEXLIVE_IMAGE: quay.io/sharelatex/texlive-full:2025.1
TEX_LIVE_IMAGE_NAME_OVERRIDE: us-east1-docker.pkg.dev/overleaf-ops/ol-docker
TEXLIVE_IMAGE_USER: tex
@@ -38,6 +38,7 @@ services:
SANDBOXED_COMPILES_HOST_DIR_COMPILES: $PWD/compiles
SANDBOXED_COMPILES_HOST_DIR_OUTPUT: $PWD/output
ENABLE_PANDOC_CONVERSIONS: true
ENABLE_PDF_CONVERSIONS: true
volumes:
- ./reports:/overleaf/services/clsi/reports
- ./compiles:/overleaf/services/clsi/compiles
+2 -1
View File
@@ -53,7 +53,7 @@ services:
ALLOWED_COMPILE_GROUPS: clsi-perf simple-latex-file
ENABLE_PDF_CACHING: true
PDF_CACHING_ENABLE_WORKER_POOL: true
ALLOWED_IMAGES: quay.io/sharelatex/texlive-full:2017.1 quay.io/sharelatex/texlive-full:2025.1 quay.io/sharelatex/pandoc:3.9
ALLOWED_IMAGES: quay.io/sharelatex/texlive-full:2017.1 quay.io/sharelatex/texlive-full:2025.1 quay.io/sharelatex/pandoc:3.9 quay.io/sharelatex/pdftocairo:24.02
TEXLIVE_IMAGE: quay.io/sharelatex/texlive-full:2025.1
TEX_LIVE_IMAGE_NAME_OVERRIDE: us-east1-docker.pkg.dev/overleaf-ops/ol-docker
TEXLIVE_IMAGE_USER: tex
@@ -61,6 +61,7 @@ services:
SANDBOXED_COMPILES_HOST_DIR_COMPILES: $PWD/compiles
SANDBOXED_COMPILES_HOST_DIR_OUTPUT: $PWD/output
ENABLE_PANDOC_CONVERSIONS: true
ENABLE_PDF_CONVERSIONS: true
depends_on:
clsi-nginx:
condition: service_started
+1
View File
@@ -23,6 +23,7 @@
"@overleaf/promise-utils": "workspace:*",
"@overleaf/settings": "workspace:*",
"@overleaf/stream-utils": "workspace:*",
"@overleaf/validation-tools": "workspace:*",
"archiver": "5.3.2",
"async": "^3.2.5",
"body-parser": "1.20.4",
@@ -0,0 +1,83 @@
import Client from './helpers/Client.js'
import ClsiApp from './helpers/ClsiApp.js'
import Path from 'node:path'
import fs from 'node:fs/promises'
import { promisify } from 'node:util'
import { execFile as execFileCb } from 'node:child_process'
import { expect } from 'chai'
const execFile = promisify(execFileCb)
const FIXTURE_PDF = Path.join(import.meta.dirname, '../fixtures/minimal.pdf')
const MODE_EXPECTATIONS = {
preview: { width: 794 },
thumbnail: { width: 190 },
}
async function writeResponseToTempfile(response) {
const buffer = Buffer.from(await response.arrayBuffer())
const tmpPath = `/tmp/clsi-acceptance-pdf-to-jpeg-${crypto.randomUUID()}.jpg`
await fs.writeFile(tmpPath, buffer)
return { tmpPath, buffer }
}
describe('pdf-to-jpeg conversion', function () {
before(async function () {
await ClsiApp.ensureRunning()
})
for (const [mode, { width: expectedWidth }] of Object.entries(
MODE_EXPECTATIONS
)) {
describe(`with mode=${mode}`, function () {
let response
let tmpPath
let buffer
before(async function () {
response = await Client.convertPdfToJpeg(FIXTURE_PDF, mode)
expect(response.status).to.equal(200)
;({ tmpPath, buffer } = await writeResponseToTempfile(response))
})
after(async function () {
if (tmpPath) {
await fs.unlink(tmpPath).catch(() => {})
}
})
it('returns a JPEG (per `file`)', async function () {
const { stdout } = await execFile('file', ['--brief', tmpPath])
expect(stdout).to.match(/JPEG image data/)
})
it(`has the expected width of ${expectedWidth}px`, async function () {
const { stdout } = await execFile('identify', [
'-format',
'%w %h',
tmpPath,
])
const [width, height] = stdout.trim().split(' ').map(Number)
expect(width).to.equal(expectedWidth)
// A4 portrait is taller than wide; height must be positive and
// larger than the width (so the aspect ratio was preserved).
expect(height).to.be.greaterThan(width)
})
it('returns a non-empty body matching Content-Length', function () {
expect(buffer.length).to.be.greaterThan(0)
expect(buffer.length).to.equal(
Number(response.headers.get('content-length'))
)
})
})
}
describe('with an unsupported mode', function () {
it('returns 400', async function () {
const response = await Client.convertPdfToJpeg(FIXTURE_PDF, 'not-a-mode')
expect(response.status).to.equal(400)
})
})
})
@@ -53,6 +53,16 @@ async function convertDocument(path, type) {
}
}
async function convertPdfToJpeg(path, mode) {
const formData = new FormData()
formData.append('qqfile', await fsPromises.readFile(path), 'input.pdf')
return await fetch(`${host}/convert/pdf-to-jpeg?mode=${mode}`, {
method: 'POST',
headers: formData.getHeaders(),
body: formData.getBuffer(),
})
}
async function convertProjectToDocument(
projectId,
userId,
@@ -239,6 +249,7 @@ export default {
compile,
convertProjectToDocument,
convertDocument,
convertPdfToJpeg,
stopCompile,
clearCache,
getOutputFile,
@@ -18,6 +18,7 @@ describe('ConversionController', function () {
ctx.documentStat = { size: 5678 }
ctx.Settings = {
enablePandocConversions: true,
enablePdfConversions: true,
path: {
compilesDir: '/compiles',
outputDir: '/output',
+1
View File
@@ -6484,6 +6484,7 @@ __metadata:
"@overleaf/promise-utils": "workspace:*"
"@overleaf/settings": "workspace:*"
"@overleaf/stream-utils": "workspace:*"
"@overleaf/validation-tools": "workspace:*"
archiver: "npm:5.3.2"
async: "npm:^3.2.5"
body-parser: "npm:1.20.4"