Migrate history-v1 recover_zip scripts from archiver to zip-stream (#32813)

* migrate recover_zip_from_backup from archiver to zip-stream

Replace the `archiver` package with `zip-stream` (the lower-level library
that `archiver` wraps) in the `recover_zip_from_backup.mjs` script and
`backupArchiver.mjs` library. The `archiver` package has known issues with
hanging when creating large zip files and is no longer actively maintained.

Changes:
- Add `zip-stream@^7.0.2` as a direct dependency
- Update `backupArchiver.mjs` to use promisified `ZipStream.entry()`
  instead of `Archiver.append()`
- Rewrite `recover_zip_from_backup.mjs` to use `ZipStream` with
  `stream/promises.pipeline` for cleaner async flow
- Keep `archiver` dependency for `project_archive.js` (separate code path)

Agent-Logs-Url: https://github.com/overleaf/internal/sessions/0df27a8b-97f1-43cc-ac26-f5247a84313f

Co-authored-by: briangough <7457354+briangough@users.noreply.github.com>

* extract finalize timeout to named constant

Agent-Logs-Url: https://github.com/overleaf/internal/sessions/0df27a8b-97f1-43cc-ac26-f5247a84313f

Co-authored-by: briangough <7457354+briangough@users.noreply.github.com>

* convert recover_zip.js to zip-stream, remove finalize timeout, add verbose logging

Agent-Logs-Url: https://github.com/overleaf/internal/sessions/9380d08a-d813-4e9f-a2ac-4891122c163b

Co-authored-by: briangough <7457354+briangough@users.noreply.github.com>

* add acceptance tests for recover_zip_from_backup in raw and latest modes

Agent-Logs-Url: https://github.com/overleaf/internal/sessions/9380d08a-d813-4e9f-a2ac-4891122c163b

Co-authored-by: briangough <7457354+briangough@users.noreply.github.com>

* fix comment formatting in recover_zip_from_backup.mjs

Agent-Logs-Url: https://github.com/overleaf/internal/sessions/9380d08a-d813-4e9f-a2ac-4891122c163b

Co-authored-by: briangough <7457354+briangough@users.noreply.github.com>

* restore EventEmitter.defaultMaxListeners in recover_zip.js, add acceptance test

Agent-Logs-Url: https://github.com/overleaf/internal/sessions/e7443126-22d5-4d0e-a176-a7a5dba49ffd

Co-authored-by: briangough <7457354+briangough@users.noreply.github.com>

* fix formatting

* refactor: simplify stream handling by using named imports for pipeline

* fix blob hash verification in backup acceptance tests

* fix recover_zip script and tests

* fix: exit with non-zero status on error in recover_zip.js

Agent-Logs-Url: https://github.com/overleaf/internal/sessions/ef3f109b-488f-47c9-84a5-b5269387166a

Co-authored-by: briangough <7457354+briangough@users.noreply.github.com>

* migrate from npm to yarn

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: briangough <7457354+briangough@users.noreply.github.com>
Co-authored-by: Brian Gough <briangough@users.noreply.github.com>
GitOrigin-RevId: 6255f9610f3c846790e2ed8b1979ac08b7effece
This commit is contained in:
Copilot
2026-06-01 14:20:08 +01:00
committed by Copybot
parent 7053434da6
commit a9a9f6ee6b
8 changed files with 504 additions and 133 deletions
+2 -1
View File
@@ -44,7 +44,8 @@
"temp": "^0.8.3",
"throng": "^4.0.0",
"tsscmp": "^1.0.6",
"utf-8-validate": "^5.0.4"
"utf-8-validate": "^5.0.4",
"zip-stream": "^7.0.2"
},
"devDependencies": {
"@overleaf/migrations": "workspace:*",
@@ -185,13 +185,30 @@ class BackupBlobStore {
*/
/**
* @typedef {(import('archiver').Archiver)} Archiver
* @typedef {(import('zip-stream').default)} ZipStream
*/
/**
* @typedef {(import('overleaf-editor-core').FileMap)} FileMap
*/
/**
* Promisified wrapper for ZipStream's entry method.
*
* @param {ZipStream} archive
* @param {Buffer|NodeJS.ReadableStream|string} source
* @param {{ name: string }} data
* @return {Promise<void>}
*/
function addEntry(archive, source, data) {
return new Promise((resolve, reject) => {
archive.entry(source, data, err => {
if (err) reject(err)
else resolve()
})
})
}
/**
*
* @param historyId
@@ -254,14 +271,15 @@ async function fetchBlob(historyId, hash, persistor) {
/**
* @typedef {object} AddChunkOptions
* @property {string} [prefix] Should include trailing slash (if length > 0)
* @property {string} [prefix]
* @property {boolean} [useBackupGlobalBlobs]
* @property {boolean} [verbose]
*/
/**
*
* @param {History} history
* @param {Archiver} archive
* @param {ZipStream} archive
* @param {CachedPerProjectEncryptedS3Persistor} projectCache
* @param {string} historyId
* @param {AddChunkOptions} [options]
@@ -272,7 +290,7 @@ async function addChunkToArchive(
archive,
projectCache,
historyId,
{ prefix = '', useBackupGlobalBlobs = false } = {}
{ prefix = '', useBackupGlobalBlobs = false, verbose = false } = {}
) {
const chunkBlobs = new Set()
history.findBlobHashes(chunkBlobs)
@@ -334,9 +352,16 @@ async function addChunkToArchive(
}
content = await blobStore.getStream(hash)
}
archive.append(content, {
if (content == null) {
logger.error({ filePath }, 'File content is empty')
continue
}
await addEntry(archive, content, {
name: `${prefix}${filePath}`,
})
if (verbose) {
logger.info({ filePath: `${prefix}${filePath}` }, 'added to archive')
}
}
})
}
@@ -358,17 +383,20 @@ async function findStartVersionOfLatestChunk(historyId) {
/**
* Restore a project from the latest snapshot
*
* There is an assumption that the database backup has been restored.
* There is an assumption that the database backup
* has been restored.
*
* @param {Archiver} archive
* @param {ZipStream} archive
* @param {string} historyId
* @param {boolean} [useBackupGlobalBlobs]
* @param {boolean} [verbose]
* @return {Promise<void>}
*/
export async function archiveLatestChunk(
archive,
historyId,
useBackupGlobalBlobs = false
useBackupGlobalBlobs = false,
verbose = false
) {
logger.info({ historyId, useBackupGlobalBlobs }, 'Archiving latest chunk')
@@ -386,20 +414,28 @@ export async function archiveLatestChunk(
await addChunkToArchive(backedUpChunk, archive, projectCache, historyId, {
useBackupGlobalBlobs,
verbose,
})
return archive
}
/**
* Fetches all raw blobs from the project and adds them to the archive.
* Fetches all raw blobs from the project and adds
* them to the archive.
*
* @param {string} historyId
* @param {Archiver} archive
* @param {ZipStream} archive
* @param {CachedPerProjectEncryptedS3Persistor} projectCache
* @param {boolean} [verbose]
* @return {Promise<void>}
*/
async function addRawBlobsToArchive(historyId, archive, projectCache) {
async function addRawBlobsToArchive(
historyId,
archive,
projectCache,
verbose = false
) {
const blobKeys = await projectCache.listDirectoryKeys(
projectBlobsBucket,
projectKey.format(historyId)
@@ -411,9 +447,13 @@ async function addRawBlobsToArchive(historyId, archive, projectCache) {
key,
{ autoGunzip: true }
)
archive.append(stream, {
name: path.join(historyId, 'blobs', key),
const entryName = path.join(historyId, 'blobs', key)
await addEntry(archive, stream, {
name: entryName,
})
if (verbose) {
logger.info({ entryName }, 'added to archive')
}
} catch (err) {
logger.warn({ err, path: key }, 'Failed to append blob to archive')
}
@@ -425,17 +465,20 @@ async function addRawBlobsToArchive(historyId, archive, projectCache) {
*
* This can work without the database being backed up.
*
* It will split the project into chunks per directory and download the blobs alongside the chunk.
* It will split the project into chunks per directory
* and download the blobs alongside the chunk.
*
* @param {Archiver} archive
* @param {ZipStream} archive
* @param {string} historyId
* @param {boolean} [useBackupGlobalBlobs]
* @param {boolean} [verbose]
* @return {Promise<void>}
*/
export async function archiveRawProject(
archive,
historyId,
useBackupGlobalBlobs = false
useBackupGlobalBlobs = false,
verbose = false
) {
const projectCache = await getProjectPersistor(historyId)
@@ -454,11 +497,15 @@ export async function archiveRawProject(
const { buffer } = await loadChunkByKey(projectCache, key)
archive.append(buffer, {
name: `${historyId}/chunks/${chunkId}/chunk.json`,
const entryName = `${historyId}/chunks/${chunkId}/chunk.json`
await addEntry(archive, buffer, {
name: entryName,
})
if (verbose) {
logger.info({ entryName }, 'added to archive')
}
}
await addRawBlobsToArchive(historyId, archive, projectCache)
await addRawBlobsToArchive(historyId, archive, projectCache, verbose)
}
export class BackupPersistorError extends OError {}
+4 -4
View File
@@ -6,19 +6,19 @@
*/
'use strict'
const Stream = require('node:stream')
const { pipeline } = require('node:stream/promises')
const zlib = require('node:zlib')
const { WritableBuffer } = require('@overleaf/stream-utils')
/**
* Create a promise for the result of reading a stream to a buffer.
*
* @param {Stream.Readable} readStream
* @param {import('node:stream').Readable} readStream
* @return {Promise<Buffer>}
*/
async function readStreamToBuffer(readStream) {
const bufferStream = new WritableBuffer()
await Stream.promises.pipeline(readStream, bufferStream)
await pipeline(readStream, bufferStream)
return bufferStream.contents()
}
@@ -33,7 +33,7 @@ exports.readStreamToBuffer = readStreamToBuffer
async function gunzipStreamToBuffer(readStream) {
const gunzip = zlib.createGunzip()
const bufferStream = new WritableBuffer()
await Stream.promises.pipeline(readStream, gunzip, bufferStream)
await pipeline(readStream, gunzip, bufferStream)
return bufferStream.contents()
}
@@ -17,9 +17,10 @@ const fs = require('node:fs')
const os = require('node:os')
const path = require('node:path')
const util = require('node:util')
const { pipeline } = require('node:stream/promises')
// Something is registering 11 listeners, over the limit of 10, which generates
// a lot of warning noise.
// Something is registering 11 listeners, over the limit
// of 10, which generates a lot of warning noise.
require('node:events').EventEmitter.defaultMaxListeners = 11
const config = require('config')
@@ -27,11 +28,23 @@ const config = require('config')
// eslint-disable-next-line import/no-extraneous-dependencies
const { Storage } = require('@google-cloud/storage')
const isValidUtf8 = require('utf-8-validate')
// zip-stream@7 uses ESM default export
const ZipStream = require('zip-stream').default
function createStorage() {
const opts = {}
if (config.has('persistor.gcs.endpoint.apiEndpoint')) {
opts.apiEndpoint = config.get('persistor.gcs.endpoint.apiEndpoint')
}
if (config.has('persistor.gcs.endpoint.projectId')) {
opts.projectId = config.get('persistor.gcs.endpoint.projectId')
}
return new Storage(opts)
}
const core = require('overleaf-editor-core')
const projectKey = require('@overleaf/object-persistor/src/ProjectKey.js')
const streams = require('../lib/streams')
const ProjectArchive = require('../lib/project_archive')
const {
values: { verbose: VERBOSE },
@@ -53,7 +66,7 @@ if (HISTORY_IDS.length === 0) {
async function listDeletedChunks(historyId) {
const bucketName = config.get('chunkStore.bucket')
const storage = new Storage()
const storage = createStorage()
const [files] = await storage.bucket(bucketName).getFiles({
prefix: projectKey.format(historyId),
versions: true,
@@ -137,7 +150,7 @@ class RecoveryBlobStore {
if (VERBOSE) console.log('fetching blob', hash)
const bucketName = config.get('blobStore.projectBucket')
const storage = new Storage()
const storage = createStorage()
const [files] = await storage.bucket(bucketName).getFiles({
prefix: this.makeProjectBlobKey(hash),
versions: true,
@@ -158,7 +171,7 @@ class RecoveryBlobStore {
async fetchGlobalBlob(hash, destination) {
const bucketName = config.get('blobStore.globalBucket')
const storage = new Storage()
const storage = createStorage()
const file = storage.bucket(bucketName).file(this.makeGlobalBlobKey(hash))
await file.download({ destination })
}
@@ -203,9 +216,18 @@ class RecoveryBlobStore {
async function uploadZip(historyId, zipPathname) {
const bucketName = config.get('zipStore.bucket')
const deadline = 24 * 3600 * 1000 // lifecycle limit on the zips bucket
const storage = new Storage()
const storage = createStorage()
const destination = `${historyId}-recovered.zip`
await storage.bucket(bucketName).upload(zipPathname, { destination })
await storage.bucket(bucketName).upload(zipPathname, {
destination,
resumable: false,
})
if (config.has('persistor.gcs.endpoint.apiEndpoint')) {
// In emulator mode, signed URLs aren't available
const apiEndpoint = config.get('persistor.gcs.endpoint.apiEndpoint')
return `${apiEndpoint}/storage/v1/b/${bucketName}/o/${encodeURIComponent(destination)}?alt=media`
}
const signedUrls = await storage
.bucket(bucketName)
@@ -219,6 +241,23 @@ async function uploadZip(historyId, zipPathname) {
return signedUrls[0]
}
/**
* Promisified wrapper for ZipStream's entry method.
*
* @param {ZipStream} archive
* @param {Buffer|NodeJS.ReadableStream|string} source
* @param {{ name: string }} data
* @return {Promise<void>}
*/
function addEntry(archive, source, data) {
return new Promise((resolve, reject) => {
archive.entry(source, data, err => {
if (err) reject(err)
else resolve()
})
})
}
async function restoreProject(historyId) {
const tmp = await fs.promises.mkdtemp(
path.join(os.tmpdir(), historyId.toString())
@@ -237,9 +276,40 @@ async function restoreProject(historyId) {
if (VERBOSE) console.log('zipping', historyId)
const zipPathname = path.join(tmp, `${historyId}.zip`)
const zipTimeoutMs = 60 * 1000
const archive = new ProjectArchive(snapshot, zipTimeoutMs)
await archive.writeZip(blobStore, zipPathname)
const outputFile = fs.createWriteStream(zipPathname)
const archive = new ZipStream()
const pipelinePromise = pipeline(archive, outputFile)
for (const pathname of snapshot.getFilePathnames()) {
const file = snapshot.getFile(pathname)
if (!file) continue
await file.load('eager', blobStore)
let content = file.getContent({
filterTrackedDeletes: true,
})
if (content === null) {
const hash = file.getHash()
content = await blobStore.getStream(hash)
}
if (content == null) continue
if (typeof content === 'string') {
content = Buffer.from(content)
}
await addEntry(archive, content, { name: pathname })
if (VERBOSE) console.log(`${pathname} added`)
}
archive.finalize()
await pipelinePromise
if (VERBOSE) {
console.log(`Wrote ${archive.getBytesWritten()} bytes`)
}
if (VERBOSE) console.log('uploading', historyId)
@@ -252,4 +322,7 @@ async function main() {
console.log(signedUrl)
}
}
main().catch(console.error)
main().catch(err => {
console.error(err)
process.exit(1)
})
@@ -4,6 +4,7 @@ import commandLineArgs from 'command-line-args'
import assert from '../lib/assert.js'
import fs from 'node:fs'
import { setTimeout } from 'node:timers/promises'
import { pipeline } from 'node:stream/promises'
import {
archiveLatestChunk,
archiveRawProject,
@@ -11,17 +12,13 @@ import {
} from '../lib/backupArchiver.mjs'
import knex from '../lib/knex.js'
import { client } from '../lib/mongodb.js'
import archiver from 'archiver'
import Events from 'node:events'
import ZipStream from 'zip-stream'
import { Chunk } from 'overleaf-editor-core'
import _ from 'lodash'
// Silence warning.
Events.setMaxListeners(20)
const SUPPORTED_MODES = ['raw', 'latest']
// Pads the mode name to a fixed length for better alignment in output.
// Pads the mode name to a fixed length for alignment.
const padModeName = _.partialRight(
_.padEnd,
Math.max(...SUPPORTED_MODES.map(mode => mode.length))
@@ -65,25 +62,6 @@ function usage() {
})
}
/**
* @typedef {import('archiver').ZipArchive} ZipArchive
*/
/**
* @typedef {import('archiver').ProgressData} ProgressData
*/
/**
* @typedef {import('archiver').EntryData} EntryData
*/
/**
* @typedef {Object} ArchiverError
* @property {string} message
* @property {string} code
* @property {Object} data
*/
let historyId, help, mode, output, useBackupGlobalBlobs, verbose
try {
@@ -136,80 +114,37 @@ await loadGlobalBlobs()
outputFile = fs.createWriteStream(output)
const archive = archiver.create('zip', {})
const archive = new ZipStream()
archive.on('close', function () {
console.log(archive.pointer() + ' total bytes')
console.log(`Wrote ${output}`)
shutdown().catch(e => console.error('Error shutting down', e))
archive.on('error', function (e) {
console.error(`Error writing archive: ${e.message}`)
})
archive.on(
'error',
/**
*
* @param {ArchiverError} e
*/
function (e) {
console.error(`Error writing archive: ${e.message}`)
}
)
archive.on('end', function () {
console.log(`Wrote ${archive.pointer()} total bytes to ${output}`)
shutdown().catch(e => console.error('Error shutting down', e))
})
archive.on(
'progress',
/**
*
* @param {ProgressData} progress
*/
function (progress) {
if (verbose) {
console.log(
`${progress.entries.processed} processed out of ${progress.entries.total}`
)
}
}
)
archive.on(
'entry',
/**
*
* @param {EntryData} entry
*/
function (entry) {
if (verbose) {
console.log(`${entry.name} added`)
}
}
)
archive.on(
'warning',
/**
*
* @param {ArchiverError} warning
*/
function (warning) {
console.warn(`Warning encountered when writing archive: ${warning.message}`)
}
)
try {
// Pipe archive to the output file before adding entries.
// pipeline handles backpressure and will resolve when
// the archive stream ends.
const pipelinePromise = pipeline(archive, outputFile)
switch (mode) {
case 'latest':
await archiveLatestChunk(archive, historyId, useBackupGlobalBlobs)
await archiveLatestChunk(
archive,
historyId,
useBackupGlobalBlobs,
verbose
)
break
case 'raw':
default:
await archiveRawProject(archive, historyId, useBackupGlobalBlobs)
await archiveRawProject(archive, historyId, useBackupGlobalBlobs, verbose)
break
}
archive.pipe(outputFile)
archive.finalize()
await pipelinePromise
console.log(`Wrote ${archive.getBytesWritten()} total bytes to ${output}`)
} catch (error) {
if (error instanceof BackupPersistorError) {
console.error(error.message)
@@ -222,12 +157,7 @@ try {
} else {
console.error('Error encountered when writing archive')
}
} finally {
await Promise.race([
await archive.finalize(),
setTimeout(10000).then(() => {
console.error('Archive did not finalize in time')
return shutdown(1)
}),
])
await shutdown(1)
}
await shutdown(0)
@@ -647,6 +647,68 @@ describe('backup script', function () {
expect(newBackupStatus.backupStatus.lastBackedUpVersion).to.equal(50) // backup fails on final chunk
expect(newBackupStatus.currentEndVersion).to.equal(54) // backup is incomplete due to missing blob
})
it('can recover zip file from backup in raw mode', async function () {
// First, run backup so data is available
await runBackupScript(['--projectId', projectId])
const zipPath = `/tmp/test-recover-raw-${historyId}.zip`
try {
await runRecoverZipFromBackupScript([
'--historyId',
historyId,
'--output',
zipPath,
'--mode=raw',
])
// Verify the zip file is valid
const { stdout } = await promisify(execFile)('unzip', ['-l', zipPath], {
encoding: 'utf-8',
})
// Raw mode includes chunk and blob keys
// Verify chunks are present
expect(stdout).to.include('chunk.json')
// Verify blob hashes are present (hashes are stored as {hash[0:2]}/{hash[2:]})
expect(stdout).to.include(testFiles.GRAPH_PNG_HASH.slice(2))
expect(stdout).to.include(testFiles.NON_BMP_TXT_HASH.slice(2))
} finally {
await fs.promises.unlink(zipPath).catch(() => {})
}
})
it('can recover zip file from backup in latest mode', async function () {
// First, run backup so data is available
await runBackupScript(['--projectId', projectId])
const zipPath = `/tmp/test-recover-latest-${historyId}.zip`
try {
await runRecoverZipFromBackupScript([
'--historyId',
historyId,
'--output',
zipPath,
'--mode=latest',
])
// Verify the zip file is valid
const { stdout } = await promisify(execFile)('unzip', ['-l', zipPath], {
encoding: 'utf-8',
})
// Latest mode includes the project files
expect(stdout).to.include('main.tex')
expect(stdout).to.include('chapter1.tex')
expect(stdout).to.include('chapter2.tex')
expect(stdout).to.include('bibliography.bib')
expect(stdout).to.include('graph.png')
expect(stdout).to.include('unicodeFile.tex')
} finally {
await fs.promises.unlink(zipPath).catch(() => {})
}
})
})
})
@@ -683,3 +745,37 @@ async function runBackupScript(args) {
}
return result
}
/**
* Run the recover_zip_from_backup script with given arguments
* @param {string[]} args
*/
async function runRecoverZipFromBackupScript(args) {
const TIMEOUT = 30 * 1000
let result
try {
result = await promisify(execFile)(
'node',
['storage/scripts/recover_zip_from_backup.mjs', ...args],
{
encoding: 'utf-8',
timeout: TIMEOUT,
env: {
...process.env,
LOG_LEVEL: 'debug',
},
}
)
result.status = 0
} catch (err) {
const { stdout, stderr, code } = err
if (typeof code !== 'number') {
console.log(err)
}
result = { stdout, stderr, status: code }
}
if (result.status !== 0) {
throw new Error(`recover_zip_from_backup failed: ${result.stderr}`)
}
return result
}
@@ -0,0 +1,169 @@
import { expect } from 'chai'
import config from 'config'
import { execFile } from 'node:child_process'
import fs from 'node:fs'
import { promisify } from 'node:util'
import { Change, Operation, File, TextOperation } from 'overleaf-editor-core'
// We depend on this via object-persistor.
// eslint-disable-next-line import/no-extraneous-dependencies
import { Storage } from '@google-cloud/storage'
import {
loadGlobalBlobs,
BlobStore,
} from '../../../../storage/lib/blob_store/index.js'
import ChunkStore from '../../../../storage/lib/chunk_store/index.js'
import persistChanges from '../../../../storage/lib/persist_changes.js'
import testFiles from '../storage/support/test_files.js'
import cleanup from './support/cleanup.js'
import { getZipEntries } from './support/unzip.js'
describe('recover_zip script', function () {
let projectId
let limitsToPersistImmediately
before(async function () {
const farFuture = new Date()
farFuture.setTime(farFuture.getTime() + 7 * 24 * 3600 * 1000)
limitsToPersistImmediately = {
minChangeTimestamp: farFuture,
maxChangeTimestamp: farFuture,
maxChanges: 10,
maxChunkChanges: 10,
}
const gcsEndpoint = config.get('persistor.gcs.endpoint')
const storage = new Storage({
apiEndpoint: gcsEndpoint.apiEndpoint,
projectId: gcsEndpoint.projectId,
})
const bucketName = config.get('zipStore.bucket')
try {
const [exists] = await storage.bucket(bucketName).exists()
if (!exists) {
await storage.createBucket(bucketName)
}
} catch (err) {
if (err.code !== 409) throw err
}
})
beforeEach(cleanup.everything)
beforeEach(async function () {
await loadGlobalBlobs()
projectId = '123'
// Initialize the project in the chunk store
await ChunkStore.initializeProject(projectId)
const blobStore = new BlobStore(projectId)
// Upload binary file blob
await blobStore.putFile(testFiles.path('graph.png'))
// Create initial snapshot with text and binary files
const addMainTex = Operation.addFile(
'main.tex',
File.fromString('hello world')
)
const addGraphPng = Operation.addFile(
'graph.png',
File.fromHash(testFiles.GRAPH_PNG_HASH)
)
const change1 = new Change([addMainTex, addGraphPng], new Date(), [])
await persistChanges(projectId, [change1], limitsToPersistImmediately, 0)
// Add a text edit
const textOp = TextOperation.fromJSON({
textOperation: ['hello world'.length, ' more'],
})
const editOp = Operation.editFile('main.tex', textOp)
const change2 = new Change([editOp], new Date(), [])
await persistChanges(projectId, [change2], limitsToPersistImmediately, 1)
})
it('creates a valid zip from GCS data', async function () {
this.timeout(30 * 1000)
const zipPath = `/tmp/test-recover-zip-${projectId}.zip`
try {
const { stdout } = await runRecoverZipScript([projectId])
// The script logs the signed URL to stdout
const urlMatch = stdout.match(/(https?:\/\/[^\s]+)/)
expect(urlMatch).to.not.be.null
const signedUrl = urlMatch[1]
// Download the zip via fetch
const res = await fetch(signedUrl)
expect(res.ok).to.be.true
const buffer = await res.arrayBuffer()
await fs.promises.writeFile(zipPath, Buffer.from(buffer))
const zipEntries = await getZipEntries(zipPath)
const fileNames = zipEntries.map(e => e.fileName).sort()
expect(fileNames).to.deep.equal(['graph.png', 'main.tex'])
// Verify text content size (after edit)
const mainTexEntry = zipEntries.find(e => e.fileName === 'main.tex')
expect(mainTexEntry.uncompressedSize).to.equal('hello world more'.length)
// Verify binary content size
const graphEntry = zipEntries.find(e => e.fileName === 'graph.png')
expect(graphEntry.uncompressedSize).to.equal(
testFiles.GRAPH_PNG_BYTE_LENGTH
)
} finally {
await fs.promises.unlink(zipPath).catch(() => {})
}
})
it('supports the --verbose flag', async function () {
this.timeout(30 * 1000)
const { stdout } = await runRecoverZipScript(['--verbose', projectId])
// Verbose mode logs each file as it's added
expect(stdout).to.include('main.tex added')
expect(stdout).to.include('graph.png added')
})
})
/**
* Run the recover_zip.js script with given arguments
* @param {string[]} args
*/
async function runRecoverZipScript(args) {
const TIMEOUT = 30 * 1000
let result
try {
result = await promisify(execFile)(
'node',
['storage/scripts/recover_zip.js', ...args],
{
encoding: 'utf-8',
timeout: TIMEOUT,
env: {
...process.env,
LOG_LEVEL: 'debug',
},
}
)
result.status = 0
} catch (err) {
const { stdout, stderr, code } = err
if (typeof code !== 'number') {
console.log(err)
}
result = { stdout, stderr, status: code }
}
if (result.status !== 0 || result.stderr) {
throw new Error(
`recover_zip failed (exit ${result.status}):\n` +
`stdout: ${result.stdout}\n` +
`stderr: ${result.stderr}`
)
}
return result
}
+56 -1
View File
@@ -15123,6 +15123,19 @@ __metadata:
languageName: node
linkType: hard
"compress-commons@npm:^7.0.0":
version: 7.0.1
resolution: "compress-commons@npm:7.0.1"
dependencies:
crc-32: "npm:^1.2.0"
crc32-stream: "npm:^7.0.1"
is-stream: "npm:^4.0.0"
normalize-path: "npm:^3.0.0"
readable-stream: "npm:^4.0.0"
checksum: 10c0/9837b9971c7e536f14b113178e944741a0cb76051db2c84ff23da2b6321f2f6da8ed922d713bb643987d4cd39eeaa537c73d9d7843f9eb74f8fe47afe93d0733
languageName: node
linkType: hard
"compressible@npm:~2.0.16":
version: 2.0.18
resolution: "compressible@npm:2.0.18"
@@ -15550,6 +15563,16 @@ __metadata:
languageName: node
linkType: hard
"crc32-stream@npm:^7.0.1":
version: 7.0.1
resolution: "crc32-stream@npm:7.0.1"
dependencies:
crc-32: "npm:^1.2.0"
readable-stream: "npm:^4.0.0"
checksum: 10c0/0d8d217ca4f328bba859a6bef8593028d24841bef2fe4dc44ae892c7cace4301ce6d5676b568642ddb0a709ee3f0f7af1d1ada4c9c399bc5a4ff9b93d5d1071d
languageName: node
linkType: hard
"create-storybook@npm:10.3.5":
version: 10.3.5
resolution: "create-storybook@npm:10.3.5"
@@ -21520,6 +21543,13 @@ __metadata:
languageName: node
linkType: hard
"is-stream@npm:^4.0.0":
version: 4.0.1
resolution: "is-stream@npm:4.0.1"
checksum: 10c0/2706c7f19b851327ba374687bc4a3940805e14ca496dc672b9629e744d143b1ad9c6f1b162dece81c7bfbc0f83b32b61ccc19ad2e05aad2dd7af347408f60c7f
languageName: node
linkType: hard
"is-string@npm:^1.1.1":
version: 1.1.1
resolution: "is-string@npm:1.1.1"
@@ -25938,6 +25968,7 @@ __metadata:
typescript: "npm:^5.0.4"
utf-8-validate: "npm:^5.0.4"
yauzl: "npm:^2.9.1"
zip-stream: "npm:^7.0.2"
languageName: unknown
linkType: soft
@@ -29037,6 +29068,19 @@ __metadata:
languageName: node
linkType: hard
"readable-stream@npm:^4.0.0":
version: 4.7.0
resolution: "readable-stream@npm:4.7.0"
dependencies:
abort-controller: "npm:^3.0.0"
buffer: "npm:^6.0.3"
events: "npm:^3.3.0"
process: "npm:^0.11.10"
string_decoder: "npm:^1.3.0"
checksum: 10c0/fd86d068da21cfdb10f7a4479f2e47d9c0a9b0c862fc0c840a7e5360201580a55ac399c764b12a4f6fa291f8cee74d9c4b7562e0d53b3c4b2769f2c98155d957
languageName: node
linkType: hard
"readdir-glob@npm:^1.1.2":
version: 1.1.3
resolution: "readdir-glob@npm:1.1.3"
@@ -31434,7 +31478,7 @@ __metadata:
languageName: node
linkType: hard
"string_decoder@npm:^1.1.1":
"string_decoder@npm:^1.1.1, string_decoder@npm:^1.3.0":
version: 1.3.0
resolution: "string_decoder@npm:1.3.0"
dependencies:
@@ -34968,6 +35012,17 @@ __metadata:
languageName: node
linkType: hard
"zip-stream@npm:^7.0.2":
version: 7.0.5
resolution: "zip-stream@npm:7.0.5"
dependencies:
compress-commons: "npm:^7.0.0"
normalize-path: "npm:^3.0.0"
readable-stream: "npm:^4.0.0"
checksum: 10c0/e1669e17031c3c7243cb9014eacfaa66f4cd2e0d613a57dbee9caf7122ae869f8b2ea2e5891b5d9eee2897060c01db048b9b2a544ba83e227f6c162905282e48
languageName: node
linkType: hard
"zod-validation-error@npm:4.0.1":
version: 4.0.1
resolution: "zod-validation-error@npm:4.0.1"