Merge pull request #28628 from overleaf/bg-cache-history-size-on-project

Implement project size checks on file uploads

GitOrigin-RevId: 4dffe237e6992b859f07964cffa82ff1c13e91c9
This commit is contained in:
Brian Gough
2025-09-24 08:51:11 +01:00
committed by Copybot
parent daa4bd9062
commit d7cd65d20c
5 changed files with 183 additions and 69 deletions
@@ -7,6 +7,7 @@ const ProjectDetailsHandler = require('../Project/ProjectDetailsHandler')
const { File } = require('../../models/File')
const OError = require('@overleaf/o-error')
const { promisifyAll } = require('@overleaf/promise-utils')
const Modules = require('../../infrastructure/Modules')
const FileStoreHandler = {
RETRY_ATTEMPTS: 3,
@@ -69,24 +70,47 @@ const FileStoreHandler = {
)
return callback(new Error('can not upload symlink'))
}
FileHashManager.computeHash(fsPath, function (err, hash) {
if (err) {
return callback(err)
}
FileStoreHandler._uploadToHistory(
historyId,
hash,
stat.size,
fsPath,
function (err) {
const size = stat.size
Modules.hooks.fire(
'preUploadFile',
{ projectId, historyId, fileArgs, fsPath, size },
preUploadErr => {
if (preUploadErr) {
return callback(preUploadErr)
}
FileHashManager.computeHash(fsPath, function (err, hash) {
if (err) {
return callback(err)
}
fileArgs = { ...fileArgs, hash }
callback(err, new File(fileArgs), true)
}
)
})
FileStoreHandler._uploadToHistory(
historyId,
hash,
stat.size,
fsPath,
function (err) {
if (err) {
return callback(err)
}
const fileRef = new File({ ...fileArgs, hash })
Modules.hooks.fire(
'postUploadFile',
{
projectId,
fileRef,
size,
},
postUploadErr => {
if (postUploadErr) {
return callback(postUploadErr)
}
callback(err, fileRef, true, size)
}
)
}
)
})
}
)
})
},
}
@@ -94,7 +118,7 @@ const FileStoreHandler = {
module.exports = FileStoreHandler
module.exports.promises = promisifyAll(FileStoreHandler, {
multiResult: {
uploadFileFromDisk: ['fileRef', 'createdBlob'],
uploadFileFromDiskWithHistoryId: ['fileRef', 'createdBlob'],
uploadFileFromDisk: ['fileRef', 'createdBlob', 'size'],
uploadFileFromDiskWithHistoryId: ['fileRef', 'createdBlob', 'size'],
},
})
@@ -62,6 +62,7 @@ const db = {
projectHistoryFailures: internalDb.collection('projectHistoryFailures'),
projectHistoryGlobalBlobs: internalDb.collection('projectHistoryGlobalBlobs'),
projectHistoryLabels: internalDb.collection('projectHistoryLabels'),
projectHistorySizes: internalDb.collection('projectHistorySizes'),
projectHistorySyncState: internalDb.collection('projectHistorySyncState'),
projectInvites: internalDb.collection('projectInvites'),
projects: internalDb.collection('projects'),
@@ -0,0 +1,34 @@
/* eslint-disable no-unused-vars */
import Helpers from './lib/helpers.mjs'
const tags = ['saas']
const indexes = [
{
key: { lastUpdatedAt: 1 },
expireAfterSeconds: 60 * 60 * 24 * 30 /* 30 days */,
name: 'projectHistorySizes_lastUpdatedAt_ttl_1',
},
{
key: { estimatedSize: 1 },
name: 'projectHistorySizes_estimatedSize_1',
},
]
const migrate = async client => {
const { db } = client
await Helpers.addIndexesToCollection(db.projectHistorySizes, indexes)
}
const rollback = async client => {
const { db } = client
await Helpers.dropIndexesFromCollection(db.projectHistorySizes, indexes)
}
export default {
tags,
migrate,
rollback,
}
@@ -15,18 +15,60 @@ class MockV1HistoryApi extends AbstractMockApi {
this.blobs = {}
}
computeBlobStats(historyId, blobHashes) {
let textBlobBytes = 0
let binaryBlobBytes = 0
let nTextBlobs = 0
let nBinaryBlobs = 0
if (!blobHashes) {
blobHashes = this.blobs[historyId]
? Object.keys(this.blobs[historyId])
: []
}
for (const hash of blobHashes) {
const buf = this.blobs[historyId][hash]
if (buf) {
const size = buf.byteLength
// Check if the blob content is valid UTF-8
let isText = false
try {
const decoder = new TextDecoder('utf-8', { fatal: true })
decoder.decode(buf)
isText = true
} catch (e) {
// Not valid UTF-8, treat as binary
isText = false
}
if (isText) {
textBlobBytes += size
nTextBlobs++
} else {
binaryBlobBytes += size
nBinaryBlobs++
}
}
}
const totalBytes = textBlobBytes + binaryBlobBytes
return {
projectId: historyId,
textBlobBytes,
binaryBlobBytes,
totalBytes,
nTextBlobs,
nBinaryBlobs,
}
}
applyRoutes() {
this.app.post('/api/projects/blob-stats', (req, res, next) => {
res.json(
// Calculate actual sizes from uploaded blobs
req.body.projectIds.map(projectId => {
return {
projectId,
textBlobBytes: 7331,
binaryBlobBytes: 1337,
totalBytes: 7331 + 1337,
nTextBlobs: 13,
nBinaryBlobs: 42,
}
return this.computeBlobStats(projectId)
})
)
})
@@ -34,51 +76,9 @@ class MockV1HistoryApi extends AbstractMockApi {
this.app.post('/api/projects/:historyId/blob-stats', (req, res, next) => {
const { historyId } = req.params
const { blobHashes } = req.body
let textBlobBytes = 0
let binaryBlobBytes = 0
let nTextBlobs = 0
let nBinaryBlobs = 0
// Calculate actual sizes from uploaded blobs
if (blobHashes && this.blobs[historyId]) {
for (const hash of blobHashes) {
const buf = this.blobs[historyId][hash]
if (buf) {
const size = buf.byteLength
// Check if the blob content is valid UTF-8
let isText = false
try {
const decoder = new TextDecoder('utf-8', { fatal: true })
decoder.decode(buf)
isText = true
} catch (e) {
// Not valid UTF-8, treat as binary
isText = false
}
if (isText) {
textBlobBytes += size
nTextBlobs++
} else {
binaryBlobBytes += size
nBinaryBlobs++
}
}
}
}
const totalBytes = textBlobBytes + binaryBlobBytes
res.json({
projectId: historyId,
textBlobBytes,
binaryBlobBytes,
totalBytes,
nTextBlobs,
nBinaryBlobs,
})
const result = this.computeBlobStats(historyId, blobHashes)
res.json(result)
})
this.app.get(
@@ -71,6 +71,12 @@ describe('FileStoreHandler', function () {
hasFeature: sinon.stub(),
}
this.Modules = {
hooks: {
fire: sinon.stub().callsArgWith(2, null),
},
}
this.handler = SandboxedModule.require(MODULE_PATH, {
requires: {
'@overleaf/settings': this.settings,
@@ -79,6 +85,7 @@ describe('FileStoreHandler', function () {
'../Project/ProjectDetailsHandler': this.ProjectDetailsHandler,
'./FileHashManager': this.FileHashManager,
'../../infrastructure/Features': this.Features,
'../../infrastructure/Modules': this.Modules,
// FIXME: need to stub File object here
'../../models/File': {
File: this.FileModel,
@@ -131,6 +138,31 @@ describe('FileStoreHandler', function () {
.should.equal(true)
})
it('should call the preUploadFile hook', async function () {
this.fs.createReadStream.returns({
pipe() {},
on(type, cb) {
if (type === 'open') {
cb()
}
},
})
await this.handler.promises.uploadFileFromDisk(
this.projectId,
this.fileArgs,
this.fsPath
)
this.Modules.hooks.fire
.calledWith('preUploadFile', {
projectId: this.projectId,
historyId: this.historyId,
fileArgs: this.fileArgs,
fsPath: this.fsPath,
size: this.fileSize,
})
.should.equal(true)
})
it('should upload the file to the history store as a blob', async function () {
this.fs.createReadStream.returns({
pipe() {},
@@ -169,6 +201,29 @@ describe('FileStoreHandler', function () {
expect(this.request).to.not.have.been.called
})
it('should call the postUploadFile hook', async function () {
this.fs.createReadStream.returns({
pipe() {},
on(type, cb) {
if (type === 'open') {
cb()
}
},
})
await this.handler.promises.uploadFileFromDisk(
this.projectId,
this.fileArgs,
this.fsPath
)
this.Modules.hooks.fire
.calledWith('postUploadFile', {
projectId: this.projectId,
fileRef: sinon.match.instanceOf(this.FileModel),
size: this.fileSize,
})
.should.equal(true)
})
it('should resolve with the url and fileRef', async function () {
const { fileRef } = await this.handler.promises.uploadFileFromDisk(
this.projectId,