From d7cd65d20cc8eaeff0a6cfb643490aeab0032e98 Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Wed, 24 Sep 2025 08:51:11 +0100 Subject: [PATCH] Merge pull request #28628 from overleaf/bg-cache-history-size-on-project Implement project size checks on file uploads GitOrigin-RevId: 4dffe237e6992b859f07964cffa82ff1c13e91c9 --- .../Features/FileStore/FileStoreHandler.js | 58 +++++++--- .../web/app/src/infrastructure/mongodb.js | 1 + ...22141050_project_history_sizes_indexes.mjs | 34 ++++++ .../acceptance/src/mocks/MockV1HistoryApi.mjs | 104 +++++++++--------- .../src/FileStore/FileStoreHandlerTests.js | 55 +++++++++ 5 files changed, 183 insertions(+), 69 deletions(-) create mode 100644 services/web/migrations/20250922141050_project_history_sizes_indexes.mjs diff --git a/services/web/app/src/Features/FileStore/FileStoreHandler.js b/services/web/app/src/Features/FileStore/FileStoreHandler.js index 53c59de81b..e9e7938432 100644 --- a/services/web/app/src/Features/FileStore/FileStoreHandler.js +++ b/services/web/app/src/Features/FileStore/FileStoreHandler.js @@ -7,6 +7,7 @@ const ProjectDetailsHandler = require('../Project/ProjectDetailsHandler') const { File } = require('../../models/File') const OError = require('@overleaf/o-error') const { promisifyAll } = require('@overleaf/promise-utils') +const Modules = require('../../infrastructure/Modules') const FileStoreHandler = { RETRY_ATTEMPTS: 3, @@ -69,24 +70,47 @@ const FileStoreHandler = { ) return callback(new Error('can not upload symlink')) } - FileHashManager.computeHash(fsPath, function (err, hash) { - if (err) { - return callback(err) - } - FileStoreHandler._uploadToHistory( - historyId, - hash, - stat.size, - fsPath, - function (err) { + const size = stat.size + Modules.hooks.fire( + 'preUploadFile', + { projectId, historyId, fileArgs, fsPath, size }, + preUploadErr => { + if (preUploadErr) { + return callback(preUploadErr) + } + FileHashManager.computeHash(fsPath, function (err, hash) { if (err) { return callback(err) } - fileArgs = { ...fileArgs, hash } - callback(err, new File(fileArgs), true) - } - ) - }) + FileStoreHandler._uploadToHistory( + historyId, + hash, + stat.size, + fsPath, + function (err) { + if (err) { + return callback(err) + } + const fileRef = new File({ ...fileArgs, hash }) + Modules.hooks.fire( + 'postUploadFile', + { + projectId, + fileRef, + size, + }, + postUploadErr => { + if (postUploadErr) { + return callback(postUploadErr) + } + callback(err, fileRef, true, size) + } + ) + } + ) + }) + } + ) }) }, } @@ -94,7 +118,7 @@ const FileStoreHandler = { module.exports = FileStoreHandler module.exports.promises = promisifyAll(FileStoreHandler, { multiResult: { - uploadFileFromDisk: ['fileRef', 'createdBlob'], - uploadFileFromDiskWithHistoryId: ['fileRef', 'createdBlob'], + uploadFileFromDisk: ['fileRef', 'createdBlob', 'size'], + uploadFileFromDiskWithHistoryId: ['fileRef', 'createdBlob', 'size'], }, }) diff --git a/services/web/app/src/infrastructure/mongodb.js b/services/web/app/src/infrastructure/mongodb.js index b0bce8a846..ffa5e00989 100644 --- a/services/web/app/src/infrastructure/mongodb.js +++ b/services/web/app/src/infrastructure/mongodb.js @@ -62,6 +62,7 @@ const db = { projectHistoryFailures: internalDb.collection('projectHistoryFailures'), projectHistoryGlobalBlobs: internalDb.collection('projectHistoryGlobalBlobs'), projectHistoryLabels: internalDb.collection('projectHistoryLabels'), + projectHistorySizes: internalDb.collection('projectHistorySizes'), projectHistorySyncState: internalDb.collection('projectHistorySyncState'), projectInvites: internalDb.collection('projectInvites'), projects: internalDb.collection('projects'), diff --git a/services/web/migrations/20250922141050_project_history_sizes_indexes.mjs b/services/web/migrations/20250922141050_project_history_sizes_indexes.mjs new file mode 100644 index 0000000000..312609168f --- /dev/null +++ b/services/web/migrations/20250922141050_project_history_sizes_indexes.mjs @@ -0,0 +1,34 @@ +/* eslint-disable no-unused-vars */ + +import Helpers from './lib/helpers.mjs' + +const tags = ['saas'] + +const indexes = [ + { + key: { lastUpdatedAt: 1 }, + expireAfterSeconds: 60 * 60 * 24 * 30 /* 30 days */, + name: 'projectHistorySizes_lastUpdatedAt_ttl_1', + }, + { + key: { estimatedSize: 1 }, + name: 'projectHistorySizes_estimatedSize_1', + }, +] + +const migrate = async client => { + const { db } = client + + await Helpers.addIndexesToCollection(db.projectHistorySizes, indexes) +} + +const rollback = async client => { + const { db } = client + await Helpers.dropIndexesFromCollection(db.projectHistorySizes, indexes) +} + +export default { + tags, + migrate, + rollback, +} diff --git a/services/web/test/acceptance/src/mocks/MockV1HistoryApi.mjs b/services/web/test/acceptance/src/mocks/MockV1HistoryApi.mjs index 712618a171..620b073317 100644 --- a/services/web/test/acceptance/src/mocks/MockV1HistoryApi.mjs +++ b/services/web/test/acceptance/src/mocks/MockV1HistoryApi.mjs @@ -15,18 +15,60 @@ class MockV1HistoryApi extends AbstractMockApi { this.blobs = {} } + computeBlobStats(historyId, blobHashes) { + let textBlobBytes = 0 + let binaryBlobBytes = 0 + let nTextBlobs = 0 + let nBinaryBlobs = 0 + if (!blobHashes) { + blobHashes = this.blobs[historyId] + ? Object.keys(this.blobs[historyId]) + : [] + } + for (const hash of blobHashes) { + const buf = this.blobs[historyId][hash] + if (buf) { + const size = buf.byteLength + + // Check if the blob content is valid UTF-8 + let isText = false + try { + const decoder = new TextDecoder('utf-8', { fatal: true }) + decoder.decode(buf) + isText = true + } catch (e) { + // Not valid UTF-8, treat as binary + isText = false + } + + if (isText) { + textBlobBytes += size + nTextBlobs++ + } else { + binaryBlobBytes += size + nBinaryBlobs++ + } + } + } + + const totalBytes = textBlobBytes + binaryBlobBytes + + return { + projectId: historyId, + textBlobBytes, + binaryBlobBytes, + totalBytes, + nTextBlobs, + nBinaryBlobs, + } + } + applyRoutes() { this.app.post('/api/projects/blob-stats', (req, res, next) => { res.json( + // Calculate actual sizes from uploaded blobs req.body.projectIds.map(projectId => { - return { - projectId, - textBlobBytes: 7331, - binaryBlobBytes: 1337, - totalBytes: 7331 + 1337, - nTextBlobs: 13, - nBinaryBlobs: 42, - } + return this.computeBlobStats(projectId) }) ) }) @@ -34,51 +76,9 @@ class MockV1HistoryApi extends AbstractMockApi { this.app.post('/api/projects/:historyId/blob-stats', (req, res, next) => { const { historyId } = req.params const { blobHashes } = req.body - - let textBlobBytes = 0 - let binaryBlobBytes = 0 - let nTextBlobs = 0 - let nBinaryBlobs = 0 - // Calculate actual sizes from uploaded blobs - if (blobHashes && this.blobs[historyId]) { - for (const hash of blobHashes) { - const buf = this.blobs[historyId][hash] - if (buf) { - const size = buf.byteLength - - // Check if the blob content is valid UTF-8 - let isText = false - try { - const decoder = new TextDecoder('utf-8', { fatal: true }) - decoder.decode(buf) - isText = true - } catch (e) { - // Not valid UTF-8, treat as binary - isText = false - } - - if (isText) { - textBlobBytes += size - nTextBlobs++ - } else { - binaryBlobBytes += size - nBinaryBlobs++ - } - } - } - } - - const totalBytes = textBlobBytes + binaryBlobBytes - - res.json({ - projectId: historyId, - textBlobBytes, - binaryBlobBytes, - totalBytes, - nTextBlobs, - nBinaryBlobs, - }) + const result = this.computeBlobStats(historyId, blobHashes) + res.json(result) }) this.app.get( diff --git a/services/web/test/unit/src/FileStore/FileStoreHandlerTests.js b/services/web/test/unit/src/FileStore/FileStoreHandlerTests.js index 4dd4e3937f..ae0c1d5fa1 100644 --- a/services/web/test/unit/src/FileStore/FileStoreHandlerTests.js +++ b/services/web/test/unit/src/FileStore/FileStoreHandlerTests.js @@ -71,6 +71,12 @@ describe('FileStoreHandler', function () { hasFeature: sinon.stub(), } + this.Modules = { + hooks: { + fire: sinon.stub().callsArgWith(2, null), + }, + } + this.handler = SandboxedModule.require(MODULE_PATH, { requires: { '@overleaf/settings': this.settings, @@ -79,6 +85,7 @@ describe('FileStoreHandler', function () { '../Project/ProjectDetailsHandler': this.ProjectDetailsHandler, './FileHashManager': this.FileHashManager, '../../infrastructure/Features': this.Features, + '../../infrastructure/Modules': this.Modules, // FIXME: need to stub File object here '../../models/File': { File: this.FileModel, @@ -131,6 +138,31 @@ describe('FileStoreHandler', function () { .should.equal(true) }) + it('should call the preUploadFile hook', async function () { + this.fs.createReadStream.returns({ + pipe() {}, + on(type, cb) { + if (type === 'open') { + cb() + } + }, + }) + await this.handler.promises.uploadFileFromDisk( + this.projectId, + this.fileArgs, + this.fsPath + ) + this.Modules.hooks.fire + .calledWith('preUploadFile', { + projectId: this.projectId, + historyId: this.historyId, + fileArgs: this.fileArgs, + fsPath: this.fsPath, + size: this.fileSize, + }) + .should.equal(true) + }) + it('should upload the file to the history store as a blob', async function () { this.fs.createReadStream.returns({ pipe() {}, @@ -169,6 +201,29 @@ describe('FileStoreHandler', function () { expect(this.request).to.not.have.been.called }) + it('should call the postUploadFile hook', async function () { + this.fs.createReadStream.returns({ + pipe() {}, + on(type, cb) { + if (type === 'open') { + cb() + } + }, + }) + await this.handler.promises.uploadFileFromDisk( + this.projectId, + this.fileArgs, + this.fsPath + ) + this.Modules.hooks.fire + .calledWith('postUploadFile', { + projectId: this.projectId, + fileRef: sinon.match.instanceOf(this.FileModel), + size: this.fileSize, + }) + .should.equal(true) + }) + it('should resolve with the url and fileRef', async function () { const { fileRef } = await this.handler.promises.uploadFileFromDisk( this.projectId,