Migrate history-v1 recover_zip scripts from archiver to zip-stream (#32813)
* migrate recover_zip_from_backup from archiver to zip-stream Replace the `archiver` package with `zip-stream` (the lower-level library that `archiver` wraps) in the `recover_zip_from_backup.mjs` script and `backupArchiver.mjs` library. The `archiver` package has known issues with hanging when creating large zip files and is no longer actively maintained. Changes: - Add `zip-stream@^7.0.2` as a direct dependency - Update `backupArchiver.mjs` to use promisified `ZipStream.entry()` instead of `Archiver.append()` - Rewrite `recover_zip_from_backup.mjs` to use `ZipStream` with `stream/promises.pipeline` for cleaner async flow - Keep `archiver` dependency for `project_archive.js` (separate code path) Agent-Logs-Url: https://github.com/overleaf/internal/sessions/0df27a8b-97f1-43cc-ac26-f5247a84313f Co-authored-by: briangough <7457354+briangough@users.noreply.github.com> * extract finalize timeout to named constant Agent-Logs-Url: https://github.com/overleaf/internal/sessions/0df27a8b-97f1-43cc-ac26-f5247a84313f Co-authored-by: briangough <7457354+briangough@users.noreply.github.com> * convert recover_zip.js to zip-stream, remove finalize timeout, add verbose logging Agent-Logs-Url: https://github.com/overleaf/internal/sessions/9380d08a-d813-4e9f-a2ac-4891122c163b Co-authored-by: briangough <7457354+briangough@users.noreply.github.com> * add acceptance tests for recover_zip_from_backup in raw and latest modes Agent-Logs-Url: https://github.com/overleaf/internal/sessions/9380d08a-d813-4e9f-a2ac-4891122c163b Co-authored-by: briangough <7457354+briangough@users.noreply.github.com> * fix comment formatting in recover_zip_from_backup.mjs Agent-Logs-Url: https://github.com/overleaf/internal/sessions/9380d08a-d813-4e9f-a2ac-4891122c163b Co-authored-by: briangough <7457354+briangough@users.noreply.github.com> * restore EventEmitter.defaultMaxListeners in recover_zip.js, add acceptance test Agent-Logs-Url: https://github.com/overleaf/internal/sessions/e7443126-22d5-4d0e-a176-a7a5dba49ffd Co-authored-by: briangough <7457354+briangough@users.noreply.github.com> * fix formatting * refactor: simplify stream handling by using named imports for pipeline * fix blob hash verification in backup acceptance tests * fix recover_zip script and tests * fix: exit with non-zero status on error in recover_zip.js Agent-Logs-Url: https://github.com/overleaf/internal/sessions/ef3f109b-488f-47c9-84a5-b5269387166a Co-authored-by: briangough <7457354+briangough@users.noreply.github.com> * migrate from npm to yarn --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: briangough <7457354+briangough@users.noreply.github.com> Co-authored-by: Brian Gough <briangough@users.noreply.github.com> GitOrigin-RevId: 6255f9610f3c846790e2ed8b1979ac08b7effece
This commit is contained in:
@@ -44,7 +44,8 @@
|
||||
"temp": "^0.8.3",
|
||||
"throng": "^4.0.0",
|
||||
"tsscmp": "^1.0.6",
|
||||
"utf-8-validate": "^5.0.4"
|
||||
"utf-8-validate": "^5.0.4",
|
||||
"zip-stream": "^7.0.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@overleaf/migrations": "workspace:*",
|
||||
|
||||
@@ -185,13 +185,30 @@ class BackupBlobStore {
|
||||
*/
|
||||
|
||||
/**
|
||||
* @typedef {(import('archiver').Archiver)} Archiver
|
||||
* @typedef {(import('zip-stream').default)} ZipStream
|
||||
*/
|
||||
|
||||
/**
|
||||
* @typedef {(import('overleaf-editor-core').FileMap)} FileMap
|
||||
*/
|
||||
|
||||
/**
|
||||
* Promisified wrapper for ZipStream's entry method.
|
||||
*
|
||||
* @param {ZipStream} archive
|
||||
* @param {Buffer|NodeJS.ReadableStream|string} source
|
||||
* @param {{ name: string }} data
|
||||
* @return {Promise<void>}
|
||||
*/
|
||||
function addEntry(archive, source, data) {
|
||||
return new Promise((resolve, reject) => {
|
||||
archive.entry(source, data, err => {
|
||||
if (err) reject(err)
|
||||
else resolve()
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param historyId
|
||||
@@ -254,14 +271,15 @@ async function fetchBlob(historyId, hash, persistor) {
|
||||
|
||||
/**
|
||||
* @typedef {object} AddChunkOptions
|
||||
* @property {string} [prefix] Should include trailing slash (if length > 0)
|
||||
* @property {string} [prefix]
|
||||
* @property {boolean} [useBackupGlobalBlobs]
|
||||
* @property {boolean} [verbose]
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {History} history
|
||||
* @param {Archiver} archive
|
||||
* @param {ZipStream} archive
|
||||
* @param {CachedPerProjectEncryptedS3Persistor} projectCache
|
||||
* @param {string} historyId
|
||||
* @param {AddChunkOptions} [options]
|
||||
@@ -272,7 +290,7 @@ async function addChunkToArchive(
|
||||
archive,
|
||||
projectCache,
|
||||
historyId,
|
||||
{ prefix = '', useBackupGlobalBlobs = false } = {}
|
||||
{ prefix = '', useBackupGlobalBlobs = false, verbose = false } = {}
|
||||
) {
|
||||
const chunkBlobs = new Set()
|
||||
history.findBlobHashes(chunkBlobs)
|
||||
@@ -334,9 +352,16 @@ async function addChunkToArchive(
|
||||
}
|
||||
content = await blobStore.getStream(hash)
|
||||
}
|
||||
archive.append(content, {
|
||||
if (content == null) {
|
||||
logger.error({ filePath }, 'File content is empty')
|
||||
continue
|
||||
}
|
||||
await addEntry(archive, content, {
|
||||
name: `${prefix}${filePath}`,
|
||||
})
|
||||
if (verbose) {
|
||||
logger.info({ filePath: `${prefix}${filePath}` }, 'added to archive')
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -358,17 +383,20 @@ async function findStartVersionOfLatestChunk(historyId) {
|
||||
/**
|
||||
* Restore a project from the latest snapshot
|
||||
*
|
||||
* There is an assumption that the database backup has been restored.
|
||||
* There is an assumption that the database backup
|
||||
* has been restored.
|
||||
*
|
||||
* @param {Archiver} archive
|
||||
* @param {ZipStream} archive
|
||||
* @param {string} historyId
|
||||
* @param {boolean} [useBackupGlobalBlobs]
|
||||
* @param {boolean} [verbose]
|
||||
* @return {Promise<void>}
|
||||
*/
|
||||
export async function archiveLatestChunk(
|
||||
archive,
|
||||
historyId,
|
||||
useBackupGlobalBlobs = false
|
||||
useBackupGlobalBlobs = false,
|
||||
verbose = false
|
||||
) {
|
||||
logger.info({ historyId, useBackupGlobalBlobs }, 'Archiving latest chunk')
|
||||
|
||||
@@ -386,20 +414,28 @@ export async function archiveLatestChunk(
|
||||
|
||||
await addChunkToArchive(backedUpChunk, archive, projectCache, historyId, {
|
||||
useBackupGlobalBlobs,
|
||||
verbose,
|
||||
})
|
||||
|
||||
return archive
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetches all raw blobs from the project and adds them to the archive.
|
||||
* Fetches all raw blobs from the project and adds
|
||||
* them to the archive.
|
||||
*
|
||||
* @param {string} historyId
|
||||
* @param {Archiver} archive
|
||||
* @param {ZipStream} archive
|
||||
* @param {CachedPerProjectEncryptedS3Persistor} projectCache
|
||||
* @param {boolean} [verbose]
|
||||
* @return {Promise<void>}
|
||||
*/
|
||||
async function addRawBlobsToArchive(historyId, archive, projectCache) {
|
||||
async function addRawBlobsToArchive(
|
||||
historyId,
|
||||
archive,
|
||||
projectCache,
|
||||
verbose = false
|
||||
) {
|
||||
const blobKeys = await projectCache.listDirectoryKeys(
|
||||
projectBlobsBucket,
|
||||
projectKey.format(historyId)
|
||||
@@ -411,9 +447,13 @@ async function addRawBlobsToArchive(historyId, archive, projectCache) {
|
||||
key,
|
||||
{ autoGunzip: true }
|
||||
)
|
||||
archive.append(stream, {
|
||||
name: path.join(historyId, 'blobs', key),
|
||||
const entryName = path.join(historyId, 'blobs', key)
|
||||
await addEntry(archive, stream, {
|
||||
name: entryName,
|
||||
})
|
||||
if (verbose) {
|
||||
logger.info({ entryName }, 'added to archive')
|
||||
}
|
||||
} catch (err) {
|
||||
logger.warn({ err, path: key }, 'Failed to append blob to archive')
|
||||
}
|
||||
@@ -425,17 +465,20 @@ async function addRawBlobsToArchive(historyId, archive, projectCache) {
|
||||
*
|
||||
* This can work without the database being backed up.
|
||||
*
|
||||
* It will split the project into chunks per directory and download the blobs alongside the chunk.
|
||||
* It will split the project into chunks per directory
|
||||
* and download the blobs alongside the chunk.
|
||||
*
|
||||
* @param {Archiver} archive
|
||||
* @param {ZipStream} archive
|
||||
* @param {string} historyId
|
||||
* @param {boolean} [useBackupGlobalBlobs]
|
||||
* @param {boolean} [verbose]
|
||||
* @return {Promise<void>}
|
||||
*/
|
||||
export async function archiveRawProject(
|
||||
archive,
|
||||
historyId,
|
||||
useBackupGlobalBlobs = false
|
||||
useBackupGlobalBlobs = false,
|
||||
verbose = false
|
||||
) {
|
||||
const projectCache = await getProjectPersistor(historyId)
|
||||
|
||||
@@ -454,11 +497,15 @@ export async function archiveRawProject(
|
||||
|
||||
const { buffer } = await loadChunkByKey(projectCache, key)
|
||||
|
||||
archive.append(buffer, {
|
||||
name: `${historyId}/chunks/${chunkId}/chunk.json`,
|
||||
const entryName = `${historyId}/chunks/${chunkId}/chunk.json`
|
||||
await addEntry(archive, buffer, {
|
||||
name: entryName,
|
||||
})
|
||||
if (verbose) {
|
||||
logger.info({ entryName }, 'added to archive')
|
||||
}
|
||||
}
|
||||
await addRawBlobsToArchive(historyId, archive, projectCache)
|
||||
await addRawBlobsToArchive(historyId, archive, projectCache, verbose)
|
||||
}
|
||||
|
||||
export class BackupPersistorError extends OError {}
|
||||
|
||||
@@ -6,19 +6,19 @@
|
||||
*/
|
||||
'use strict'
|
||||
|
||||
const Stream = require('node:stream')
|
||||
const { pipeline } = require('node:stream/promises')
|
||||
const zlib = require('node:zlib')
|
||||
const { WritableBuffer } = require('@overleaf/stream-utils')
|
||||
|
||||
/**
|
||||
* Create a promise for the result of reading a stream to a buffer.
|
||||
*
|
||||
* @param {Stream.Readable} readStream
|
||||
* @param {import('node:stream').Readable} readStream
|
||||
* @return {Promise<Buffer>}
|
||||
*/
|
||||
async function readStreamToBuffer(readStream) {
|
||||
const bufferStream = new WritableBuffer()
|
||||
await Stream.promises.pipeline(readStream, bufferStream)
|
||||
await pipeline(readStream, bufferStream)
|
||||
return bufferStream.contents()
|
||||
}
|
||||
|
||||
@@ -33,7 +33,7 @@ exports.readStreamToBuffer = readStreamToBuffer
|
||||
async function gunzipStreamToBuffer(readStream) {
|
||||
const gunzip = zlib.createGunzip()
|
||||
const bufferStream = new WritableBuffer()
|
||||
await Stream.promises.pipeline(readStream, gunzip, bufferStream)
|
||||
await pipeline(readStream, gunzip, bufferStream)
|
||||
return bufferStream.contents()
|
||||
}
|
||||
|
||||
|
||||
@@ -17,9 +17,10 @@ const fs = require('node:fs')
|
||||
const os = require('node:os')
|
||||
const path = require('node:path')
|
||||
const util = require('node:util')
|
||||
const { pipeline } = require('node:stream/promises')
|
||||
|
||||
// Something is registering 11 listeners, over the limit of 10, which generates
|
||||
// a lot of warning noise.
|
||||
// Something is registering 11 listeners, over the limit
|
||||
// of 10, which generates a lot of warning noise.
|
||||
require('node:events').EventEmitter.defaultMaxListeners = 11
|
||||
|
||||
const config = require('config')
|
||||
@@ -27,11 +28,23 @@ const config = require('config')
|
||||
// eslint-disable-next-line import/no-extraneous-dependencies
|
||||
const { Storage } = require('@google-cloud/storage')
|
||||
const isValidUtf8 = require('utf-8-validate')
|
||||
// zip-stream@7 uses ESM default export
|
||||
const ZipStream = require('zip-stream').default
|
||||
|
||||
function createStorage() {
|
||||
const opts = {}
|
||||
if (config.has('persistor.gcs.endpoint.apiEndpoint')) {
|
||||
opts.apiEndpoint = config.get('persistor.gcs.endpoint.apiEndpoint')
|
||||
}
|
||||
if (config.has('persistor.gcs.endpoint.projectId')) {
|
||||
opts.projectId = config.get('persistor.gcs.endpoint.projectId')
|
||||
}
|
||||
return new Storage(opts)
|
||||
}
|
||||
|
||||
const core = require('overleaf-editor-core')
|
||||
const projectKey = require('@overleaf/object-persistor/src/ProjectKey.js')
|
||||
const streams = require('../lib/streams')
|
||||
const ProjectArchive = require('../lib/project_archive')
|
||||
|
||||
const {
|
||||
values: { verbose: VERBOSE },
|
||||
@@ -53,7 +66,7 @@ if (HISTORY_IDS.length === 0) {
|
||||
|
||||
async function listDeletedChunks(historyId) {
|
||||
const bucketName = config.get('chunkStore.bucket')
|
||||
const storage = new Storage()
|
||||
const storage = createStorage()
|
||||
const [files] = await storage.bucket(bucketName).getFiles({
|
||||
prefix: projectKey.format(historyId),
|
||||
versions: true,
|
||||
@@ -137,7 +150,7 @@ class RecoveryBlobStore {
|
||||
if (VERBOSE) console.log('fetching blob', hash)
|
||||
|
||||
const bucketName = config.get('blobStore.projectBucket')
|
||||
const storage = new Storage()
|
||||
const storage = createStorage()
|
||||
const [files] = await storage.bucket(bucketName).getFiles({
|
||||
prefix: this.makeProjectBlobKey(hash),
|
||||
versions: true,
|
||||
@@ -158,7 +171,7 @@ class RecoveryBlobStore {
|
||||
|
||||
async fetchGlobalBlob(hash, destination) {
|
||||
const bucketName = config.get('blobStore.globalBucket')
|
||||
const storage = new Storage()
|
||||
const storage = createStorage()
|
||||
const file = storage.bucket(bucketName).file(this.makeGlobalBlobKey(hash))
|
||||
await file.download({ destination })
|
||||
}
|
||||
@@ -203,9 +216,18 @@ class RecoveryBlobStore {
|
||||
async function uploadZip(historyId, zipPathname) {
|
||||
const bucketName = config.get('zipStore.bucket')
|
||||
const deadline = 24 * 3600 * 1000 // lifecycle limit on the zips bucket
|
||||
const storage = new Storage()
|
||||
const storage = createStorage()
|
||||
const destination = `${historyId}-recovered.zip`
|
||||
await storage.bucket(bucketName).upload(zipPathname, { destination })
|
||||
await storage.bucket(bucketName).upload(zipPathname, {
|
||||
destination,
|
||||
resumable: false,
|
||||
})
|
||||
|
||||
if (config.has('persistor.gcs.endpoint.apiEndpoint')) {
|
||||
// In emulator mode, signed URLs aren't available
|
||||
const apiEndpoint = config.get('persistor.gcs.endpoint.apiEndpoint')
|
||||
return `${apiEndpoint}/storage/v1/b/${bucketName}/o/${encodeURIComponent(destination)}?alt=media`
|
||||
}
|
||||
|
||||
const signedUrls = await storage
|
||||
.bucket(bucketName)
|
||||
@@ -219,6 +241,23 @@ async function uploadZip(historyId, zipPathname) {
|
||||
return signedUrls[0]
|
||||
}
|
||||
|
||||
/**
|
||||
* Promisified wrapper for ZipStream's entry method.
|
||||
*
|
||||
* @param {ZipStream} archive
|
||||
* @param {Buffer|NodeJS.ReadableStream|string} source
|
||||
* @param {{ name: string }} data
|
||||
* @return {Promise<void>}
|
||||
*/
|
||||
function addEntry(archive, source, data) {
|
||||
return new Promise((resolve, reject) => {
|
||||
archive.entry(source, data, err => {
|
||||
if (err) reject(err)
|
||||
else resolve()
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
async function restoreProject(historyId) {
|
||||
const tmp = await fs.promises.mkdtemp(
|
||||
path.join(os.tmpdir(), historyId.toString())
|
||||
@@ -237,9 +276,40 @@ async function restoreProject(historyId) {
|
||||
if (VERBOSE) console.log('zipping', historyId)
|
||||
|
||||
const zipPathname = path.join(tmp, `${historyId}.zip`)
|
||||
const zipTimeoutMs = 60 * 1000
|
||||
const archive = new ProjectArchive(snapshot, zipTimeoutMs)
|
||||
await archive.writeZip(blobStore, zipPathname)
|
||||
const outputFile = fs.createWriteStream(zipPathname)
|
||||
const archive = new ZipStream()
|
||||
|
||||
const pipelinePromise = pipeline(archive, outputFile)
|
||||
|
||||
for (const pathname of snapshot.getFilePathnames()) {
|
||||
const file = snapshot.getFile(pathname)
|
||||
if (!file) continue
|
||||
|
||||
await file.load('eager', blobStore)
|
||||
let content = file.getContent({
|
||||
filterTrackedDeletes: true,
|
||||
})
|
||||
|
||||
if (content === null) {
|
||||
const hash = file.getHash()
|
||||
content = await blobStore.getStream(hash)
|
||||
}
|
||||
|
||||
if (content == null) continue
|
||||
|
||||
if (typeof content === 'string') {
|
||||
content = Buffer.from(content)
|
||||
}
|
||||
await addEntry(archive, content, { name: pathname })
|
||||
if (VERBOSE) console.log(`${pathname} added`)
|
||||
}
|
||||
|
||||
archive.finalize()
|
||||
await pipelinePromise
|
||||
|
||||
if (VERBOSE) {
|
||||
console.log(`Wrote ${archive.getBytesWritten()} bytes`)
|
||||
}
|
||||
|
||||
if (VERBOSE) console.log('uploading', historyId)
|
||||
|
||||
@@ -252,4 +322,7 @@ async function main() {
|
||||
console.log(signedUrl)
|
||||
}
|
||||
}
|
||||
main().catch(console.error)
|
||||
main().catch(err => {
|
||||
console.error(err)
|
||||
process.exit(1)
|
||||
})
|
||||
|
||||
@@ -4,6 +4,7 @@ import commandLineArgs from 'command-line-args'
|
||||
import assert from '../lib/assert.js'
|
||||
import fs from 'node:fs'
|
||||
import { setTimeout } from 'node:timers/promises'
|
||||
import { pipeline } from 'node:stream/promises'
|
||||
import {
|
||||
archiveLatestChunk,
|
||||
archiveRawProject,
|
||||
@@ -11,17 +12,13 @@ import {
|
||||
} from '../lib/backupArchiver.mjs'
|
||||
import knex from '../lib/knex.js'
|
||||
import { client } from '../lib/mongodb.js'
|
||||
import archiver from 'archiver'
|
||||
import Events from 'node:events'
|
||||
import ZipStream from 'zip-stream'
|
||||
import { Chunk } from 'overleaf-editor-core'
|
||||
import _ from 'lodash'
|
||||
|
||||
// Silence warning.
|
||||
Events.setMaxListeners(20)
|
||||
|
||||
const SUPPORTED_MODES = ['raw', 'latest']
|
||||
|
||||
// Pads the mode name to a fixed length for better alignment in output.
|
||||
// Pads the mode name to a fixed length for alignment.
|
||||
const padModeName = _.partialRight(
|
||||
_.padEnd,
|
||||
Math.max(...SUPPORTED_MODES.map(mode => mode.length))
|
||||
@@ -65,25 +62,6 @@ function usage() {
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* @typedef {import('archiver').ZipArchive} ZipArchive
|
||||
*/
|
||||
|
||||
/**
|
||||
* @typedef {import('archiver').ProgressData} ProgressData
|
||||
*/
|
||||
|
||||
/**
|
||||
* @typedef {import('archiver').EntryData} EntryData
|
||||
*/
|
||||
|
||||
/**
|
||||
* @typedef {Object} ArchiverError
|
||||
* @property {string} message
|
||||
* @property {string} code
|
||||
* @property {Object} data
|
||||
*/
|
||||
|
||||
let historyId, help, mode, output, useBackupGlobalBlobs, verbose
|
||||
|
||||
try {
|
||||
@@ -136,80 +114,37 @@ await loadGlobalBlobs()
|
||||
|
||||
outputFile = fs.createWriteStream(output)
|
||||
|
||||
const archive = archiver.create('zip', {})
|
||||
const archive = new ZipStream()
|
||||
|
||||
archive.on('close', function () {
|
||||
console.log(archive.pointer() + ' total bytes')
|
||||
console.log(`Wrote ${output}`)
|
||||
shutdown().catch(e => console.error('Error shutting down', e))
|
||||
archive.on('error', function (e) {
|
||||
console.error(`Error writing archive: ${e.message}`)
|
||||
})
|
||||
|
||||
archive.on(
|
||||
'error',
|
||||
/**
|
||||
*
|
||||
* @param {ArchiverError} e
|
||||
*/
|
||||
function (e) {
|
||||
console.error(`Error writing archive: ${e.message}`)
|
||||
}
|
||||
)
|
||||
|
||||
archive.on('end', function () {
|
||||
console.log(`Wrote ${archive.pointer()} total bytes to ${output}`)
|
||||
shutdown().catch(e => console.error('Error shutting down', e))
|
||||
})
|
||||
|
||||
archive.on(
|
||||
'progress',
|
||||
/**
|
||||
*
|
||||
* @param {ProgressData} progress
|
||||
*/
|
||||
function (progress) {
|
||||
if (verbose) {
|
||||
console.log(
|
||||
`${progress.entries.processed} processed out of ${progress.entries.total}`
|
||||
)
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
archive.on(
|
||||
'entry',
|
||||
/**
|
||||
*
|
||||
* @param {EntryData} entry
|
||||
*/
|
||||
function (entry) {
|
||||
if (verbose) {
|
||||
console.log(`${entry.name} added`)
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
archive.on(
|
||||
'warning',
|
||||
/**
|
||||
*
|
||||
* @param {ArchiverError} warning
|
||||
*/
|
||||
function (warning) {
|
||||
console.warn(`Warning encountered when writing archive: ${warning.message}`)
|
||||
}
|
||||
)
|
||||
|
||||
try {
|
||||
// Pipe archive to the output file before adding entries.
|
||||
// pipeline handles backpressure and will resolve when
|
||||
// the archive stream ends.
|
||||
const pipelinePromise = pipeline(archive, outputFile)
|
||||
|
||||
switch (mode) {
|
||||
case 'latest':
|
||||
await archiveLatestChunk(archive, historyId, useBackupGlobalBlobs)
|
||||
await archiveLatestChunk(
|
||||
archive,
|
||||
historyId,
|
||||
useBackupGlobalBlobs,
|
||||
verbose
|
||||
)
|
||||
break
|
||||
case 'raw':
|
||||
default:
|
||||
await archiveRawProject(archive, historyId, useBackupGlobalBlobs)
|
||||
await archiveRawProject(archive, historyId, useBackupGlobalBlobs, verbose)
|
||||
break
|
||||
}
|
||||
archive.pipe(outputFile)
|
||||
|
||||
archive.finalize()
|
||||
await pipelinePromise
|
||||
|
||||
console.log(`Wrote ${archive.getBytesWritten()} total bytes to ${output}`)
|
||||
} catch (error) {
|
||||
if (error instanceof BackupPersistorError) {
|
||||
console.error(error.message)
|
||||
@@ -222,12 +157,7 @@ try {
|
||||
} else {
|
||||
console.error('Error encountered when writing archive')
|
||||
}
|
||||
} finally {
|
||||
await Promise.race([
|
||||
await archive.finalize(),
|
||||
setTimeout(10000).then(() => {
|
||||
console.error('Archive did not finalize in time')
|
||||
return shutdown(1)
|
||||
}),
|
||||
])
|
||||
await shutdown(1)
|
||||
}
|
||||
|
||||
await shutdown(0)
|
||||
|
||||
@@ -647,6 +647,68 @@ describe('backup script', function () {
|
||||
expect(newBackupStatus.backupStatus.lastBackedUpVersion).to.equal(50) // backup fails on final chunk
|
||||
expect(newBackupStatus.currentEndVersion).to.equal(54) // backup is incomplete due to missing blob
|
||||
})
|
||||
|
||||
it('can recover zip file from backup in raw mode', async function () {
|
||||
// First, run backup so data is available
|
||||
await runBackupScript(['--projectId', projectId])
|
||||
|
||||
const zipPath = `/tmp/test-recover-raw-${historyId}.zip`
|
||||
try {
|
||||
await runRecoverZipFromBackupScript([
|
||||
'--historyId',
|
||||
historyId,
|
||||
'--output',
|
||||
zipPath,
|
||||
'--mode=raw',
|
||||
])
|
||||
|
||||
// Verify the zip file is valid
|
||||
const { stdout } = await promisify(execFile)('unzip', ['-l', zipPath], {
|
||||
encoding: 'utf-8',
|
||||
})
|
||||
|
||||
// Raw mode includes chunk and blob keys
|
||||
// Verify chunks are present
|
||||
expect(stdout).to.include('chunk.json')
|
||||
|
||||
// Verify blob hashes are present (hashes are stored as {hash[0:2]}/{hash[2:]})
|
||||
expect(stdout).to.include(testFiles.GRAPH_PNG_HASH.slice(2))
|
||||
expect(stdout).to.include(testFiles.NON_BMP_TXT_HASH.slice(2))
|
||||
} finally {
|
||||
await fs.promises.unlink(zipPath).catch(() => {})
|
||||
}
|
||||
})
|
||||
|
||||
it('can recover zip file from backup in latest mode', async function () {
|
||||
// First, run backup so data is available
|
||||
await runBackupScript(['--projectId', projectId])
|
||||
|
||||
const zipPath = `/tmp/test-recover-latest-${historyId}.zip`
|
||||
try {
|
||||
await runRecoverZipFromBackupScript([
|
||||
'--historyId',
|
||||
historyId,
|
||||
'--output',
|
||||
zipPath,
|
||||
'--mode=latest',
|
||||
])
|
||||
|
||||
// Verify the zip file is valid
|
||||
const { stdout } = await promisify(execFile)('unzip', ['-l', zipPath], {
|
||||
encoding: 'utf-8',
|
||||
})
|
||||
|
||||
// Latest mode includes the project files
|
||||
expect(stdout).to.include('main.tex')
|
||||
expect(stdout).to.include('chapter1.tex')
|
||||
expect(stdout).to.include('chapter2.tex')
|
||||
expect(stdout).to.include('bibliography.bib')
|
||||
expect(stdout).to.include('graph.png')
|
||||
expect(stdout).to.include('unicodeFile.tex')
|
||||
} finally {
|
||||
await fs.promises.unlink(zipPath).catch(() => {})
|
||||
}
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -683,3 +745,37 @@ async function runBackupScript(args) {
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
/**
|
||||
* Run the recover_zip_from_backup script with given arguments
|
||||
* @param {string[]} args
|
||||
*/
|
||||
async function runRecoverZipFromBackupScript(args) {
|
||||
const TIMEOUT = 30 * 1000
|
||||
let result
|
||||
try {
|
||||
result = await promisify(execFile)(
|
||||
'node',
|
||||
['storage/scripts/recover_zip_from_backup.mjs', ...args],
|
||||
{
|
||||
encoding: 'utf-8',
|
||||
timeout: TIMEOUT,
|
||||
env: {
|
||||
...process.env,
|
||||
LOG_LEVEL: 'debug',
|
||||
},
|
||||
}
|
||||
)
|
||||
result.status = 0
|
||||
} catch (err) {
|
||||
const { stdout, stderr, code } = err
|
||||
if (typeof code !== 'number') {
|
||||
console.log(err)
|
||||
}
|
||||
result = { stdout, stderr, status: code }
|
||||
}
|
||||
if (result.status !== 0) {
|
||||
throw new Error(`recover_zip_from_backup failed: ${result.stderr}`)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
@@ -0,0 +1,169 @@
|
||||
import { expect } from 'chai'
|
||||
import config from 'config'
|
||||
import { execFile } from 'node:child_process'
|
||||
import fs from 'node:fs'
|
||||
import { promisify } from 'node:util'
|
||||
import { Change, Operation, File, TextOperation } from 'overleaf-editor-core'
|
||||
// We depend on this via object-persistor.
|
||||
// eslint-disable-next-line import/no-extraneous-dependencies
|
||||
import { Storage } from '@google-cloud/storage'
|
||||
import {
|
||||
loadGlobalBlobs,
|
||||
BlobStore,
|
||||
} from '../../../../storage/lib/blob_store/index.js'
|
||||
import ChunkStore from '../../../../storage/lib/chunk_store/index.js'
|
||||
import persistChanges from '../../../../storage/lib/persist_changes.js'
|
||||
import testFiles from '../storage/support/test_files.js'
|
||||
import cleanup from './support/cleanup.js'
|
||||
import { getZipEntries } from './support/unzip.js'
|
||||
|
||||
describe('recover_zip script', function () {
|
||||
let projectId
|
||||
let limitsToPersistImmediately
|
||||
|
||||
before(async function () {
|
||||
const farFuture = new Date()
|
||||
farFuture.setTime(farFuture.getTime() + 7 * 24 * 3600 * 1000)
|
||||
limitsToPersistImmediately = {
|
||||
minChangeTimestamp: farFuture,
|
||||
maxChangeTimestamp: farFuture,
|
||||
maxChanges: 10,
|
||||
maxChunkChanges: 10,
|
||||
}
|
||||
|
||||
const gcsEndpoint = config.get('persistor.gcs.endpoint')
|
||||
const storage = new Storage({
|
||||
apiEndpoint: gcsEndpoint.apiEndpoint,
|
||||
projectId: gcsEndpoint.projectId,
|
||||
})
|
||||
const bucketName = config.get('zipStore.bucket')
|
||||
try {
|
||||
const [exists] = await storage.bucket(bucketName).exists()
|
||||
if (!exists) {
|
||||
await storage.createBucket(bucketName)
|
||||
}
|
||||
} catch (err) {
|
||||
if (err.code !== 409) throw err
|
||||
}
|
||||
})
|
||||
|
||||
beforeEach(cleanup.everything)
|
||||
|
||||
beforeEach(async function () {
|
||||
await loadGlobalBlobs()
|
||||
projectId = '123'
|
||||
|
||||
// Initialize the project in the chunk store
|
||||
await ChunkStore.initializeProject(projectId)
|
||||
|
||||
const blobStore = new BlobStore(projectId)
|
||||
|
||||
// Upload binary file blob
|
||||
await blobStore.putFile(testFiles.path('graph.png'))
|
||||
|
||||
// Create initial snapshot with text and binary files
|
||||
const addMainTex = Operation.addFile(
|
||||
'main.tex',
|
||||
File.fromString('hello world')
|
||||
)
|
||||
const addGraphPng = Operation.addFile(
|
||||
'graph.png',
|
||||
File.fromHash(testFiles.GRAPH_PNG_HASH)
|
||||
)
|
||||
const change1 = new Change([addMainTex, addGraphPng], new Date(), [])
|
||||
await persistChanges(projectId, [change1], limitsToPersistImmediately, 0)
|
||||
|
||||
// Add a text edit
|
||||
const textOp = TextOperation.fromJSON({
|
||||
textOperation: ['hello world'.length, ' more'],
|
||||
})
|
||||
const editOp = Operation.editFile('main.tex', textOp)
|
||||
const change2 = new Change([editOp], new Date(), [])
|
||||
await persistChanges(projectId, [change2], limitsToPersistImmediately, 1)
|
||||
})
|
||||
|
||||
it('creates a valid zip from GCS data', async function () {
|
||||
this.timeout(30 * 1000)
|
||||
|
||||
const zipPath = `/tmp/test-recover-zip-${projectId}.zip`
|
||||
try {
|
||||
const { stdout } = await runRecoverZipScript([projectId])
|
||||
|
||||
// The script logs the signed URL to stdout
|
||||
const urlMatch = stdout.match(/(https?:\/\/[^\s]+)/)
|
||||
expect(urlMatch).to.not.be.null
|
||||
const signedUrl = urlMatch[1]
|
||||
|
||||
// Download the zip via fetch
|
||||
const res = await fetch(signedUrl)
|
||||
expect(res.ok).to.be.true
|
||||
const buffer = await res.arrayBuffer()
|
||||
await fs.promises.writeFile(zipPath, Buffer.from(buffer))
|
||||
|
||||
const zipEntries = await getZipEntries(zipPath)
|
||||
const fileNames = zipEntries.map(e => e.fileName).sort()
|
||||
|
||||
expect(fileNames).to.deep.equal(['graph.png', 'main.tex'])
|
||||
|
||||
// Verify text content size (after edit)
|
||||
const mainTexEntry = zipEntries.find(e => e.fileName === 'main.tex')
|
||||
expect(mainTexEntry.uncompressedSize).to.equal('hello world more'.length)
|
||||
|
||||
// Verify binary content size
|
||||
const graphEntry = zipEntries.find(e => e.fileName === 'graph.png')
|
||||
expect(graphEntry.uncompressedSize).to.equal(
|
||||
testFiles.GRAPH_PNG_BYTE_LENGTH
|
||||
)
|
||||
} finally {
|
||||
await fs.promises.unlink(zipPath).catch(() => {})
|
||||
}
|
||||
})
|
||||
|
||||
it('supports the --verbose flag', async function () {
|
||||
this.timeout(30 * 1000)
|
||||
|
||||
const { stdout } = await runRecoverZipScript(['--verbose', projectId])
|
||||
|
||||
// Verbose mode logs each file as it's added
|
||||
expect(stdout).to.include('main.tex added')
|
||||
expect(stdout).to.include('graph.png added')
|
||||
})
|
||||
})
|
||||
|
||||
/**
|
||||
* Run the recover_zip.js script with given arguments
|
||||
* @param {string[]} args
|
||||
*/
|
||||
async function runRecoverZipScript(args) {
|
||||
const TIMEOUT = 30 * 1000
|
||||
let result
|
||||
try {
|
||||
result = await promisify(execFile)(
|
||||
'node',
|
||||
['storage/scripts/recover_zip.js', ...args],
|
||||
{
|
||||
encoding: 'utf-8',
|
||||
timeout: TIMEOUT,
|
||||
env: {
|
||||
...process.env,
|
||||
LOG_LEVEL: 'debug',
|
||||
},
|
||||
}
|
||||
)
|
||||
result.status = 0
|
||||
} catch (err) {
|
||||
const { stdout, stderr, code } = err
|
||||
if (typeof code !== 'number') {
|
||||
console.log(err)
|
||||
}
|
||||
result = { stdout, stderr, status: code }
|
||||
}
|
||||
if (result.status !== 0 || result.stderr) {
|
||||
throw new Error(
|
||||
`recover_zip failed (exit ${result.status}):\n` +
|
||||
`stdout: ${result.stdout}\n` +
|
||||
`stderr: ${result.stderr}`
|
||||
)
|
||||
}
|
||||
return result
|
||||
}
|
||||
@@ -15123,6 +15123,19 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"compress-commons@npm:^7.0.0":
|
||||
version: 7.0.1
|
||||
resolution: "compress-commons@npm:7.0.1"
|
||||
dependencies:
|
||||
crc-32: "npm:^1.2.0"
|
||||
crc32-stream: "npm:^7.0.1"
|
||||
is-stream: "npm:^4.0.0"
|
||||
normalize-path: "npm:^3.0.0"
|
||||
readable-stream: "npm:^4.0.0"
|
||||
checksum: 10c0/9837b9971c7e536f14b113178e944741a0cb76051db2c84ff23da2b6321f2f6da8ed922d713bb643987d4cd39eeaa537c73d9d7843f9eb74f8fe47afe93d0733
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"compressible@npm:~2.0.16":
|
||||
version: 2.0.18
|
||||
resolution: "compressible@npm:2.0.18"
|
||||
@@ -15550,6 +15563,16 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"crc32-stream@npm:^7.0.1":
|
||||
version: 7.0.1
|
||||
resolution: "crc32-stream@npm:7.0.1"
|
||||
dependencies:
|
||||
crc-32: "npm:^1.2.0"
|
||||
readable-stream: "npm:^4.0.0"
|
||||
checksum: 10c0/0d8d217ca4f328bba859a6bef8593028d24841bef2fe4dc44ae892c7cace4301ce6d5676b568642ddb0a709ee3f0f7af1d1ada4c9c399bc5a4ff9b93d5d1071d
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"create-storybook@npm:10.3.5":
|
||||
version: 10.3.5
|
||||
resolution: "create-storybook@npm:10.3.5"
|
||||
@@ -21520,6 +21543,13 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"is-stream@npm:^4.0.0":
|
||||
version: 4.0.1
|
||||
resolution: "is-stream@npm:4.0.1"
|
||||
checksum: 10c0/2706c7f19b851327ba374687bc4a3940805e14ca496dc672b9629e744d143b1ad9c6f1b162dece81c7bfbc0f83b32b61ccc19ad2e05aad2dd7af347408f60c7f
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"is-string@npm:^1.1.1":
|
||||
version: 1.1.1
|
||||
resolution: "is-string@npm:1.1.1"
|
||||
@@ -25938,6 +25968,7 @@ __metadata:
|
||||
typescript: "npm:^5.0.4"
|
||||
utf-8-validate: "npm:^5.0.4"
|
||||
yauzl: "npm:^2.9.1"
|
||||
zip-stream: "npm:^7.0.2"
|
||||
languageName: unknown
|
||||
linkType: soft
|
||||
|
||||
@@ -29037,6 +29068,19 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"readable-stream@npm:^4.0.0":
|
||||
version: 4.7.0
|
||||
resolution: "readable-stream@npm:4.7.0"
|
||||
dependencies:
|
||||
abort-controller: "npm:^3.0.0"
|
||||
buffer: "npm:^6.0.3"
|
||||
events: "npm:^3.3.0"
|
||||
process: "npm:^0.11.10"
|
||||
string_decoder: "npm:^1.3.0"
|
||||
checksum: 10c0/fd86d068da21cfdb10f7a4479f2e47d9c0a9b0c862fc0c840a7e5360201580a55ac399c764b12a4f6fa291f8cee74d9c4b7562e0d53b3c4b2769f2c98155d957
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"readdir-glob@npm:^1.1.2":
|
||||
version: 1.1.3
|
||||
resolution: "readdir-glob@npm:1.1.3"
|
||||
@@ -31434,7 +31478,7 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"string_decoder@npm:^1.1.1":
|
||||
"string_decoder@npm:^1.1.1, string_decoder@npm:^1.3.0":
|
||||
version: 1.3.0
|
||||
resolution: "string_decoder@npm:1.3.0"
|
||||
dependencies:
|
||||
@@ -34968,6 +35012,17 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"zip-stream@npm:^7.0.2":
|
||||
version: 7.0.5
|
||||
resolution: "zip-stream@npm:7.0.5"
|
||||
dependencies:
|
||||
compress-commons: "npm:^7.0.0"
|
||||
normalize-path: "npm:^3.0.0"
|
||||
readable-stream: "npm:^4.0.0"
|
||||
checksum: 10c0/e1669e17031c3c7243cb9014eacfaa66f4cd2e0d613a57dbee9caf7122ae869f8b2ea2e5891b5d9eee2897060c01db048b9b2a544ba83e227f6c162905282e48
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"zod-validation-error@npm:4.0.1":
|
||||
version: 4.0.1
|
||||
resolution: "zod-validation-error@npm:4.0.1"
|
||||
|
||||
Reference in New Issue
Block a user