Merge pull request #30045 from overleaf/bg-object-persistor-make-list-directory-safer
Improve safety of object persistor GitOrigin-RevId: bced9814de6613b388ca288a5f72cd42cff6c1d3
This commit is contained in:
@@ -177,4 +177,21 @@ module.exports = class AbstractPersistor {
|
||||
prefix,
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* List objects in a directory, returning key and size information.
|
||||
*
|
||||
* Suitable only for directories where the number of keys is known to be small.
|
||||
*
|
||||
* @param {string} location
|
||||
* @param {string} prefix
|
||||
* @returns {Promise<Array<{key: string, size: number}>>}
|
||||
*/
|
||||
async listDirectoryStats(location, prefix) {
|
||||
throw new NotImplementedError('method not implemented in persistor', {
|
||||
method: 'listDirectoryStats',
|
||||
location,
|
||||
prefix,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -196,7 +196,46 @@ module.exports = class FSPersistor extends AbstractPersistor {
|
||||
|
||||
async listDirectoryKeys(location, name) {
|
||||
const fsPath = this._getFsPath(location, name)
|
||||
return await this._listDirectory(fsPath)
|
||||
const paths = await this._listDirectory(fsPath)
|
||||
|
||||
// Filter to only return files, not directories
|
||||
const files = []
|
||||
for (const path of paths) {
|
||||
try {
|
||||
const stat = await fsPromises.stat(path)
|
||||
if (stat.isFile()) {
|
||||
files.push(path)
|
||||
}
|
||||
} catch (err) {
|
||||
// ignore files that may have just been deleted
|
||||
if (err.code !== 'ENOENT') {
|
||||
throw err
|
||||
}
|
||||
}
|
||||
}
|
||||
return files
|
||||
}
|
||||
|
||||
async listDirectoryStats(location, name) {
|
||||
const fsPath = this._getFsPath(location, name)
|
||||
const paths = await this._listDirectory(fsPath)
|
||||
|
||||
// Filter to only return files, not directories, with their sizes
|
||||
const stats = []
|
||||
for (const path of paths) {
|
||||
try {
|
||||
const stat = await fsPromises.stat(path)
|
||||
if (stat.isFile()) {
|
||||
stats.push({ key: path, size: stat.size })
|
||||
}
|
||||
} catch (err) {
|
||||
// ignore files that may have just been deleted
|
||||
if (err.code !== 'ENOENT') {
|
||||
throw err
|
||||
}
|
||||
}
|
||||
}
|
||||
return stats
|
||||
}
|
||||
|
||||
async checkIfObjectExists(location, name) {
|
||||
|
||||
@@ -322,6 +322,17 @@ module.exports = class GcsPersistor extends AbstractPersistor {
|
||||
return files.map(file => file.name)
|
||||
}
|
||||
|
||||
async listDirectoryStats(bucketName, prefix) {
|
||||
const files = await this.#listDirectory(
|
||||
bucketName,
|
||||
ensurePrefixIsDirectory(prefix)
|
||||
)
|
||||
return files.map(file => ({
|
||||
key: file.name,
|
||||
size: parseInt(file.metadata.size, 10),
|
||||
}))
|
||||
}
|
||||
|
||||
async checkIfObjectExists(bucketName, key) {
|
||||
try {
|
||||
const [response] = await this.storage
|
||||
|
||||
@@ -417,10 +417,20 @@ class CachedPerProjectEncryptedS3Persistor {
|
||||
*
|
||||
* @param {string} bucketName
|
||||
* @param {string} path
|
||||
* @return {Promise<ListDirectoryResult>}
|
||||
* @return {Promise<string[]>}
|
||||
*/
|
||||
async listDirectory(bucketName, path) {
|
||||
return await this.#parent.listDirectory(bucketName, path)
|
||||
async listDirectoryKeys(bucketName, path) {
|
||||
return await this.#parent.listDirectoryKeys(bucketName, path)
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {string} bucketName
|
||||
* @param {string} path
|
||||
* @return {Promise<Array<{key: string, size: number}>>}
|
||||
*/
|
||||
async listDirectoryStats(bucketName, path) {
|
||||
return await this.#parent.listDirectoryStats(bucketName, path)
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -266,7 +266,7 @@ class S3Persistor extends AbstractPersistor {
|
||||
* @return {Promise<void>}
|
||||
*/
|
||||
async deleteDirectory(bucketName, key, continuationToken) {
|
||||
const { contents, response } = await this.listDirectory(
|
||||
const { contents, response } = await this.#listDirectory(
|
||||
bucketName,
|
||||
key,
|
||||
continuationToken
|
||||
@@ -309,7 +309,7 @@ class S3Persistor extends AbstractPersistor {
|
||||
* @param {string} [continuationToken]
|
||||
* @return {Promise<ListDirectoryResult>}
|
||||
*/
|
||||
async listDirectory(bucketName, key, continuationToken) {
|
||||
async #listDirectory(bucketName, key, continuationToken) {
|
||||
let response
|
||||
const options = { Bucket: bucketName, Prefix: key }
|
||||
if (continuationToken) {
|
||||
@@ -535,6 +535,59 @@ class S3Persistor extends AbstractPersistor {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} bucketName
|
||||
* @param {string} prefix
|
||||
* @return {Promise<Array<string>>}
|
||||
*/
|
||||
async listDirectoryKeys(bucketName, prefix) {
|
||||
const keys = []
|
||||
let continuationToken
|
||||
|
||||
do {
|
||||
const { contents, response } = await this.#listDirectory(
|
||||
bucketName,
|
||||
prefix,
|
||||
continuationToken
|
||||
)
|
||||
keys.push(...contents.map(item => item.Key || ''))
|
||||
continuationToken = response.IsTruncated
|
||||
? response.NextContinuationToken
|
||||
: undefined
|
||||
} while (continuationToken)
|
||||
|
||||
return keys
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} bucketName
|
||||
* @param {string} prefix
|
||||
* @return {Promise<Array<{key: string, size: number}>>}
|
||||
*/
|
||||
async listDirectoryStats(bucketName, prefix) {
|
||||
const stats = []
|
||||
let continuationToken
|
||||
|
||||
do {
|
||||
const { contents, response } = await this.#listDirectory(
|
||||
bucketName,
|
||||
prefix,
|
||||
continuationToken
|
||||
)
|
||||
stats.push(
|
||||
...contents.map(item => ({
|
||||
key: item.Key || '',
|
||||
size: item.Size ?? -1,
|
||||
}))
|
||||
)
|
||||
continuationToken = response.IsTruncated
|
||||
? response.NextContinuationToken
|
||||
: undefined
|
||||
} while (continuationToken)
|
||||
|
||||
return stats
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} bucket
|
||||
* @return {S3Client}
|
||||
|
||||
@@ -400,6 +400,56 @@ describe('FSPersistorTests', function () {
|
||||
).to.equal(0)
|
||||
})
|
||||
})
|
||||
|
||||
describe('listDirectoryKeys', function () {
|
||||
beforeEach(async function () {
|
||||
for (const file of Object.values(files)) {
|
||||
await persistor.sendFile(location, file, '/uploads/info.txt')
|
||||
}
|
||||
})
|
||||
|
||||
it('should list directory keys', async function () {
|
||||
const keys = await persistor.listDirectoryKeys(location, 'animals')
|
||||
expect(keys).to.have.lengthOf(2)
|
||||
expect(keys).to.include(scenario.fsPath(files.wombat))
|
||||
expect(keys).to.include(scenario.fsPath(files.giraffe))
|
||||
})
|
||||
|
||||
it('should return empty array for non-existing directories', async function () {
|
||||
const keys = await persistor.listDirectoryKeys(
|
||||
location,
|
||||
'does-not-exist'
|
||||
)
|
||||
expect(keys).to.deep.equal([])
|
||||
})
|
||||
})
|
||||
|
||||
describe('listDirectoryStats', function () {
|
||||
beforeEach(async function () {
|
||||
for (const file of Object.values(files)) {
|
||||
await persistor.sendFile(location, file, '/uploads/info.txt')
|
||||
}
|
||||
})
|
||||
|
||||
it('should list directory stats', async function () {
|
||||
const stats = await persistor.listDirectoryStats(location, 'animals')
|
||||
expect(stats).to.have.lengthOf(2)
|
||||
const keys = stats.map(s => s.key)
|
||||
expect(keys).to.include(scenario.fsPath(files.wombat))
|
||||
expect(keys).to.include(scenario.fsPath(files.giraffe))
|
||||
for (const stat of stats) {
|
||||
expect(stat.size).to.equal(localFiles['/uploads/info.txt'].length)
|
||||
}
|
||||
})
|
||||
|
||||
it('should return empty array for non-existing directories', async function () {
|
||||
const stats = await persistor.listDirectoryStats(
|
||||
location,
|
||||
'does-not-exist'
|
||||
)
|
||||
expect(stats).to.deep.equal([])
|
||||
})
|
||||
})
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
@@ -723,4 +723,127 @@ describe('GcsPersistorTests', function () {
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('listDirectoryKeys', function () {
|
||||
describe('with valid parameters', function () {
|
||||
let keys
|
||||
|
||||
beforeEach(async function () {
|
||||
const filesWithNames = files.map((file, i) => ({
|
||||
...file,
|
||||
name: i === 0 ? 'llama' : 'hippo',
|
||||
}))
|
||||
GcsBucket.getFiles.resolves([filesWithNames])
|
||||
keys = await GcsPersistor.listDirectoryKeys(bucket, key)
|
||||
})
|
||||
|
||||
it('should list the objects in the directory', function () {
|
||||
expect(Storage.prototype.bucket).to.have.been.calledWith(bucket)
|
||||
expect(GcsBucket.getFiles).to.have.been.calledWith({
|
||||
prefix: `${key}/`,
|
||||
})
|
||||
})
|
||||
|
||||
it('should return the keys', function () {
|
||||
expect(keys).to.deep.equal(['llama', 'hippo'])
|
||||
})
|
||||
})
|
||||
|
||||
describe('when there are no files', function () {
|
||||
let keys
|
||||
|
||||
beforeEach(async function () {
|
||||
GcsBucket.getFiles.resolves([[]])
|
||||
keys = await GcsPersistor.listDirectoryKeys(bucket, key)
|
||||
})
|
||||
|
||||
it('should return an empty array', function () {
|
||||
expect(keys).to.deep.equal([])
|
||||
})
|
||||
})
|
||||
|
||||
describe('when there is an error listing the objects', function () {
|
||||
let error
|
||||
|
||||
beforeEach(async function () {
|
||||
GcsBucket.getFiles.rejects(genericError)
|
||||
try {
|
||||
await GcsPersistor.listDirectoryKeys(bucket, key)
|
||||
} catch (err) {
|
||||
error = err
|
||||
}
|
||||
})
|
||||
|
||||
it('should generate a ReadError', function () {
|
||||
expect(error).to.be.an.instanceOf(Errors.ReadError)
|
||||
})
|
||||
|
||||
it('should wrap the error', function () {
|
||||
expect(error.cause).to.equal(genericError)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('listDirectoryStats', function () {
|
||||
describe('with valid parameters', function () {
|
||||
let stats
|
||||
|
||||
beforeEach(async function () {
|
||||
const filesWithNames = files.map((file, i) => ({
|
||||
...file,
|
||||
name: i === 0 ? 'llama' : 'hippo',
|
||||
}))
|
||||
GcsBucket.getFiles.resolves([filesWithNames])
|
||||
stats = await GcsPersistor.listDirectoryStats(bucket, key)
|
||||
})
|
||||
|
||||
it('should list the objects in the directory', function () {
|
||||
expect(Storage.prototype.bucket).to.have.been.calledWith(bucket)
|
||||
expect(GcsBucket.getFiles).to.have.been.calledWith({
|
||||
prefix: `${key}/`,
|
||||
})
|
||||
})
|
||||
|
||||
it('should return the stats', function () {
|
||||
expect(stats).to.deep.equal([
|
||||
{ key: 'llama', size: 11 },
|
||||
{ key: 'hippo', size: 22 },
|
||||
])
|
||||
})
|
||||
})
|
||||
|
||||
describe('when there are no files', function () {
|
||||
let stats
|
||||
|
||||
beforeEach(async function () {
|
||||
GcsBucket.getFiles.resolves([[]])
|
||||
stats = await GcsPersistor.listDirectoryStats(bucket, key)
|
||||
})
|
||||
|
||||
it('should return an empty array', function () {
|
||||
expect(stats).to.deep.equal([])
|
||||
})
|
||||
})
|
||||
|
||||
describe('when there is an error listing the objects', function () {
|
||||
let error
|
||||
|
||||
beforeEach(async function () {
|
||||
GcsBucket.getFiles.rejects(genericError)
|
||||
try {
|
||||
await GcsPersistor.listDirectoryStats(bucket, key)
|
||||
} catch (err) {
|
||||
error = err
|
||||
}
|
||||
})
|
||||
|
||||
it('should generate a ReadError', function () {
|
||||
expect(error).to.be.an.instanceOf(Errors.ReadError)
|
||||
})
|
||||
|
||||
it('should wrap the error', function () {
|
||||
expect(error.cause).to.equal(genericError)
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -1046,4 +1046,215 @@ describe('S3PersistorTests', function () {
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('listDirectoryKeys', function () {
|
||||
describe('with valid parameters', function () {
|
||||
let keys
|
||||
|
||||
beforeEach(async function () {
|
||||
S3.mockSend(S3.ListObjectsV2Command, {
|
||||
Contents: files.map(file => ({ Key: file.Key })),
|
||||
IsTruncated: false,
|
||||
})
|
||||
keys = await S3Persistor.listDirectoryKeys(bucket, key)
|
||||
})
|
||||
|
||||
it('should list the objects in the directory', function () {
|
||||
S3.assertSendCalledWith(S3.ListObjectsV2Command, {
|
||||
Bucket: bucket,
|
||||
Prefix: key,
|
||||
})
|
||||
})
|
||||
|
||||
it('should return the keys', function () {
|
||||
expect(keys).to.deep.equal(['llama', 'hippo'])
|
||||
})
|
||||
})
|
||||
|
||||
describe('when there are no files', function () {
|
||||
let keys
|
||||
|
||||
beforeEach(async function () {
|
||||
S3.mockSend(S3.ListObjectsV2Command, {
|
||||
Contents: [],
|
||||
IsTruncated: false,
|
||||
})
|
||||
keys = await S3Persistor.listDirectoryKeys(bucket, key)
|
||||
})
|
||||
|
||||
it('should return an empty array', function () {
|
||||
expect(keys).to.deep.equal([])
|
||||
})
|
||||
})
|
||||
|
||||
describe('when there are more files available', function () {
|
||||
const continuationToken = 'wombat'
|
||||
let keys
|
||||
|
||||
beforeEach(async function () {
|
||||
S3.mockSend(
|
||||
S3.ListObjectsV2Command,
|
||||
{
|
||||
Contents: [files[0]].map(file => ({ Key: file.Key })),
|
||||
IsTruncated: true,
|
||||
NextContinuationToken: continuationToken,
|
||||
},
|
||||
{
|
||||
nextResponses: [
|
||||
{
|
||||
Contents: [files[1]].map(file => ({ Key: file.Key })),
|
||||
IsTruncated: false,
|
||||
},
|
||||
],
|
||||
}
|
||||
)
|
||||
keys = await S3Persistor.listDirectoryKeys(bucket, key)
|
||||
})
|
||||
|
||||
it('should list the objects in multiple calls', function () {
|
||||
S3.assertSendCallCount(S3.ListObjectsV2Command, 2)
|
||||
expect(S3.s3ClientStub.send.firstCall.args[0].payload).to.deep.equal({
|
||||
Bucket: bucket,
|
||||
Prefix: key,
|
||||
})
|
||||
expect(S3.s3ClientStub.send.secondCall.args[0].payload).to.deep.equal({
|
||||
Bucket: bucket,
|
||||
Prefix: key,
|
||||
ContinuationToken: continuationToken,
|
||||
})
|
||||
})
|
||||
|
||||
it('should return all keys', function () {
|
||||
expect(keys).to.deep.equal(['llama', 'hippo'])
|
||||
})
|
||||
})
|
||||
|
||||
describe('when there is an error listing the objects', function () {
|
||||
let error
|
||||
|
||||
beforeEach(async function () {
|
||||
S3.mockSend(S3.ListObjectsV2Command, genericError, { rejects: true })
|
||||
try {
|
||||
await S3Persistor.listDirectoryKeys(bucket, key)
|
||||
} catch (err) {
|
||||
error = err
|
||||
}
|
||||
})
|
||||
|
||||
it('should generate a ReadError', function () {
|
||||
expect(error).to.be.an.instanceOf(Errors.ReadError)
|
||||
})
|
||||
|
||||
it('should wrap the error', function () {
|
||||
expect(error.cause).to.exist
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('listDirectoryStats', function () {
|
||||
describe('with valid parameters', function () {
|
||||
let stats
|
||||
|
||||
beforeEach(async function () {
|
||||
S3.mockSend(S3.ListObjectsV2Command, {
|
||||
Contents: files.map(file => ({ Key: file.Key, Size: file.Size })),
|
||||
IsTruncated: false,
|
||||
})
|
||||
stats = await S3Persistor.listDirectoryStats(bucket, key)
|
||||
})
|
||||
|
||||
it('should list the objects in the directory', function () {
|
||||
S3.assertSendCalledWith(S3.ListObjectsV2Command, {
|
||||
Bucket: bucket,
|
||||
Prefix: key,
|
||||
})
|
||||
})
|
||||
|
||||
it('should return the stats', function () {
|
||||
expect(stats).to.deep.equal([
|
||||
{ key: 'llama', size: 11 },
|
||||
{ key: 'hippo', size: 22 },
|
||||
])
|
||||
})
|
||||
})
|
||||
|
||||
describe('when there are no files', function () {
|
||||
let stats
|
||||
|
||||
beforeEach(async function () {
|
||||
S3.mockSend(S3.ListObjectsV2Command, {
|
||||
Contents: [],
|
||||
IsTruncated: false,
|
||||
})
|
||||
stats = await S3Persistor.listDirectoryStats(bucket, key)
|
||||
})
|
||||
|
||||
it('should return an empty array', function () {
|
||||
expect(stats).to.deep.equal([])
|
||||
})
|
||||
})
|
||||
|
||||
describe('when there are more files available', function () {
|
||||
const continuationToken = 'wombat'
|
||||
let stats
|
||||
|
||||
beforeEach(async function () {
|
||||
S3.mockSend(
|
||||
S3.ListObjectsV2Command,
|
||||
{
|
||||
Contents: [files[0]].map(file => ({
|
||||
Key: file.Key,
|
||||
Size: file.Size,
|
||||
})),
|
||||
IsTruncated: true,
|
||||
NextContinuationToken: continuationToken,
|
||||
},
|
||||
{
|
||||
nextResponses: [
|
||||
{
|
||||
Contents: [files[1]].map(file => ({
|
||||
Key: file.Key,
|
||||
Size: file.Size,
|
||||
})),
|
||||
IsTruncated: false,
|
||||
},
|
||||
],
|
||||
}
|
||||
)
|
||||
stats = await S3Persistor.listDirectoryStats(bucket, key)
|
||||
})
|
||||
|
||||
it('should list the objects in multiple calls', function () {
|
||||
S3.assertSendCallCount(S3.ListObjectsV2Command, 2)
|
||||
})
|
||||
|
||||
it('should return all stats', function () {
|
||||
expect(stats).to.deep.equal([
|
||||
{ key: 'llama', size: 11 },
|
||||
{ key: 'hippo', size: 22 },
|
||||
])
|
||||
})
|
||||
})
|
||||
|
||||
describe('when there is an error listing the objects', function () {
|
||||
let error
|
||||
|
||||
beforeEach(async function () {
|
||||
S3.mockSend(S3.ListObjectsV2Command, genericError, { rejects: true })
|
||||
try {
|
||||
await S3Persistor.listDirectoryStats(bucket, key)
|
||||
} catch (err) {
|
||||
error = err
|
||||
}
|
||||
})
|
||||
|
||||
it('should generate a ReadError', function () {
|
||||
expect(error).to.be.an.instanceOf(Errors.ReadError)
|
||||
})
|
||||
|
||||
it('should wrap the error', function () {
|
||||
expect(error.cause).to.exist
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -400,28 +400,22 @@ export async function archiveLatestChunk(
|
||||
* @return {Promise<void>}
|
||||
*/
|
||||
async function addRawBlobsToArchive(historyId, archive, projectCache) {
|
||||
const key = projectKey.format(historyId)
|
||||
const { contents } = await projectCache.listDirectory(projectBlobsBucket, key)
|
||||
for (const blobRecord of contents) {
|
||||
if (!blobRecord.Key) {
|
||||
logger.debug({ blobRecord }, 'no key')
|
||||
continue
|
||||
}
|
||||
const blobKey = blobRecord.Key
|
||||
const blobKeys = await projectCache.listDirectoryKeys(
|
||||
projectBlobsBucket,
|
||||
projectKey.format(historyId)
|
||||
)
|
||||
for (const key of blobKeys) {
|
||||
try {
|
||||
const stream = await projectCache.getObjectStream(
|
||||
projectBlobsBucket,
|
||||
blobKey,
|
||||
key,
|
||||
{ autoGunzip: true }
|
||||
)
|
||||
archive.append(stream, {
|
||||
name: path.join(historyId, 'blobs', blobKey),
|
||||
name: path.join(historyId, 'blobs', key),
|
||||
})
|
||||
} catch (err) {
|
||||
logger.warn(
|
||||
{ err, path: blobRecord.Key },
|
||||
'Failed to append blob to archive'
|
||||
)
|
||||
logger.warn({ err, path: key }, 'Failed to append blob to archive')
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -445,24 +439,20 @@ export async function archiveRawProject(
|
||||
) {
|
||||
const projectCache = await getProjectPersistor(historyId)
|
||||
|
||||
const { contents: chunks } = await projectCache.listDirectory(
|
||||
const chunkKeys = await projectCache.listDirectoryKeys(
|
||||
chunksBucket,
|
||||
projectKey.format(historyId)
|
||||
)
|
||||
|
||||
if (chunks.length === 0) {
|
||||
if (chunkKeys.length === 0) {
|
||||
throw new Error('No chunks found')
|
||||
}
|
||||
|
||||
for (const chunkRecord of chunks) {
|
||||
if (!chunkRecord.Key) {
|
||||
logger.debug({ chunkRecord }, 'no key')
|
||||
continue
|
||||
}
|
||||
const chunkId = chunkRecord.Key.split('/').pop()
|
||||
logger.debug({ chunkId, key: chunkRecord.Key }, 'Processing chunk')
|
||||
for (const key of chunkKeys) {
|
||||
const chunkId = key.split('/').pop()
|
||||
logger.debug({ chunkId, key }, 'Processing chunk')
|
||||
|
||||
const { buffer } = await loadChunkByKey(projectCache, chunkRecord.Key)
|
||||
const { buffer } = await loadChunkByKey(projectCache, key)
|
||||
|
||||
archive.append(buffer, {
|
||||
name: `${historyId}/chunks/${chunkId}/chunk.json`,
|
||||
|
||||
@@ -900,6 +900,11 @@ class BlobComparator {
|
||||
|
||||
const SHA1_HEX_REGEX = /^[a-f0-9]{40}$/
|
||||
|
||||
/**
|
||||
* Get a listing of all blobs for a project
|
||||
* @param {string} historyId - The history ID
|
||||
* @returns {Promise<Map<string, {key: string, size: number}>>} Map of blob hash to blob metadata
|
||||
*/
|
||||
async function getBlobListing(historyId) {
|
||||
const backupPersistorForProject = await backupPersistor.forProject(
|
||||
projectBlobsBucket,
|
||||
@@ -909,7 +914,7 @@ async function getBlobListing(historyId) {
|
||||
// get the blob listing
|
||||
const projectBlobsPath = projectKey.format(historyId)
|
||||
|
||||
const { contents: blobList } = await backupPersistorForProject.listDirectory(
|
||||
const blobList = await backupPersistorForProject.listDirectoryStats(
|
||||
projectBlobsBucket,
|
||||
projectBlobsPath
|
||||
)
|
||||
@@ -918,21 +923,22 @@ async function getBlobListing(historyId) {
|
||||
return new Map()
|
||||
}
|
||||
|
||||
/** @type {Map<string, {key: string, size: number}>} */
|
||||
const remoteBlobs = new Map()
|
||||
|
||||
for (const blobRecord of blobList) {
|
||||
if (!blobRecord.Key) {
|
||||
logger.debug({ blobRecord }, 'no key')
|
||||
if (!blobRecord.key || typeof blobRecord.size !== 'number') {
|
||||
logger.debug({ blobRecord }, 'invalid blob record')
|
||||
continue
|
||||
}
|
||||
const parts = blobRecord.Key.split('/')
|
||||
const parts = blobRecord.key.split('/')
|
||||
const hash = parts[3] + parts[4]
|
||||
|
||||
if (!SHA1_HEX_REGEX.test(hash)) {
|
||||
console.warn(`Invalid SHA1 hash for project ${historyId}: ${hash}`)
|
||||
continue
|
||||
}
|
||||
remoteBlobs.set(hash, blobRecord)
|
||||
remoteBlobs.set(hash, { key: blobRecord.key, size: blobRecord.size })
|
||||
}
|
||||
return remoteBlobs
|
||||
}
|
||||
@@ -1075,26 +1081,26 @@ async function compareBackups(projectId, options, log = console.log) {
|
||||
const blobListEntry = blobsFromListing.get(blob.hash)
|
||||
if (options.fast) {
|
||||
if (blobListEntry) {
|
||||
if (blob.byteLength === blobListEntry.Size) {
|
||||
if (blob.byteLength === blobListEntry.size) {
|
||||
// Size matches exactly
|
||||
log(
|
||||
` ✓ Blob ${blob.hash} exists on remote with expected size (${blob.byteLength} bytes)`
|
||||
)
|
||||
totalBlobMatches++
|
||||
continue
|
||||
} else if (blob.stringLength > 0 && blobListEntry.Size > 0) {
|
||||
} else if (blob.stringLength > 0 && blobListEntry.size > 0) {
|
||||
// Text file present with compressed size, assume valid as we are in --fast comparison mode
|
||||
const compressionRatio = (
|
||||
blobListEntry.Size / blob.byteLength
|
||||
blobListEntry.size / blob.byteLength
|
||||
).toFixed(2)
|
||||
log(
|
||||
` ✓ Blob ${blob.hash} consistent with compressed data on remote (${blob.byteLength} bytes => ${blobListEntry.Size} bytes, ratio=${compressionRatio})`
|
||||
` ✓ Blob ${blob.hash} consistent with compressed data on remote (${blob.byteLength} bytes => ${blobListEntry.size} bytes, ratio=${compressionRatio})`
|
||||
)
|
||||
totalBlobMatches++
|
||||
continue
|
||||
} else {
|
||||
log(
|
||||
` ✗ Blob ${blob.hash} size mismatch (original: ${blob.byteLength} bytes, stringLength: ${blob.stringLength}, backup: ${blobListEntry.Size} bytes)`
|
||||
` ✗ Blob ${blob.hash} size mismatch (original: ${blob.byteLength} bytes, stringLength: ${blob.stringLength}, backup: ${blobListEntry.size} bytes)`
|
||||
)
|
||||
totalBlobMismatches++
|
||||
errors.push({
|
||||
|
||||
Reference in New Issue
Block a user