diff --git a/services/filestore/app/js/FSPersistor.js b/services/filestore/app/js/FSPersistor.js index 862acb9bcb..2ba65f06d2 100644 --- a/services/filestore/app/js/FSPersistor.js +++ b/services/filestore/app/js/FSPersistor.js @@ -103,12 +103,17 @@ async function deleteFile(location, name) { try { await fsUnlink(`${location}/${filteredName}`) } catch (err) { - throw _wrapError( + const wrappedError = _wrapError( err, 'failed to delete file', { location, filteredName }, WriteError ) + if (!(wrappedError instanceof NotFoundError)) { + // S3 doesn't give us a 404 when a file wasn't there to be deleted, so we + // should be consistent here as well + throw wrappedError + } } } diff --git a/services/filestore/app/js/MigrationPersistor.js b/services/filestore/app/js/MigrationPersistor.js new file mode 100644 index 0000000000..9f7a834f31 --- /dev/null +++ b/services/filestore/app/js/MigrationPersistor.js @@ -0,0 +1,113 @@ +const metrics = require('metrics-sharelatex') +const Settings = require('settings-sharelatex') +const logger = require('logger-sharelatex') +const { callbackify } = require('util') +const { NotFoundError } = require('./Errors') + +// Persistor that wraps two other persistors. Talks to the 'primary' by default, +// but will fall back to an older persistor in the case of a not-found error. +// If `Settings.filestore.fallback.copyOnMiss` is set, this will copy files from the fallback +// to the primary, in the event that they are missing. +// +// It is unlikely that the bucket/location name will be the same on the fallback +// as the primary. The bucket names should be overridden in `Settings.filestore.fallback.buckets` +// e.g. +// Settings.filestore.fallback.buckets = { +// myBucketOnS3: 'myBucketOnGCS' +// }s + +module.exports = function(primary, fallback) { + function _wrapMethodOnBothPersistors(method) { + return async function(bucket, key, ...moreArgs) { + const fallbackBucket = _getFallbackBucket(bucket) + + await Promise.all([ + primary.promises[method](bucket, key, ...moreArgs), + fallback.promises[method](fallbackBucket, key, ...moreArgs) + ]) + } + } + + async function copyFileWithFallback(bucket, sourceKey, destKey) { + try { + return await primary.promises.copyFile(bucket, sourceKey, destKey) + } catch (err) { + if (err instanceof NotFoundError) { + const fallbackBucket = _getFallbackBucket(bucket) + return _copyFileFromFallback(fallbackBucket, bucket, sourceKey, destKey) + } + } + } + + function _getFallbackBucket(bucket) { + return ( + Settings.filestore.fallback.buckets && + Settings.filestore.fallback.buckets[bucket] + ) + } + + function _wrapFallbackMethod(method, enableCopy = true) { + return async function(bucket, key, ...moreArgs) { + try { + return await primary.promises[method](bucket, key, ...moreArgs) + } catch (err) { + if (err instanceof NotFoundError) { + const fallbackBucket = _getFallbackBucket(bucket) + if (Settings.filestore.fallback.copyOnMiss && enableCopy) { + // run in background + _copyFileFromFallback(fallbackBucket, bucket, key, key).catch( + err => { + logger.warn({ err }, 'failed to copy file from fallback') + } + ) + } + return fallback.promises[method](fallbackBucket, key, ...moreArgs) + } + throw err + } + } + } + + async function _copyFileFromFallback( + sourceBucket, + destBucket, + sourceKey, + destKey + ) { + const sourceStream = await fallback.promises.getFileStream( + sourceBucket, + sourceKey, + {} + ) + + await primary.promises.sendStream(destBucket, destKey, sourceStream) + metrics.inc('fallback.copy') + } + + return { + primaryPersistor: primary, + fallbackPersistor: fallback, + sendFile: primary.sendFile, + sendStream: primary.sendStream, + getFileStream: callbackify(_wrapFallbackMethod('getFileStream')), + deleteDirectory: callbackify( + _wrapMethodOnBothPersistors('deleteDirectory') + ), + getFileSize: callbackify(_wrapFallbackMethod('getFileSize')), + deleteFile: callbackify(_wrapMethodOnBothPersistors('deleteFile')), + copyFile: callbackify(copyFileWithFallback), + checkIfFileExists: callbackify(_wrapFallbackMethod('checkIfFileExists')), + directorySize: callbackify(_wrapFallbackMethod('directorySize', false)), + promises: { + sendFile: primary.promises.sendFile, + sendStream: primary.promises.sendStream, + getFileStream: _wrapFallbackMethod('getFileStream'), + deleteDirectory: _wrapMethodOnBothPersistors('deleteDirectory'), + getFileSize: _wrapFallbackMethod('getFileSize'), + deleteFile: _wrapMethodOnBothPersistors('deleteFile'), + copyFile: copyFileWithFallback, + checkIfFileExists: _wrapFallbackMethod('checkIfFileExists'), + directorySize: _wrapFallbackMethod('directorySize', false) + } + } +} diff --git a/services/filestore/app/js/PersistorManager.js b/services/filestore/app/js/PersistorManager.js index bd944a03f7..32f6cd41f8 100644 --- a/services/filestore/app/js/PersistorManager.js +++ b/services/filestore/app/js/PersistorManager.js @@ -3,7 +3,8 @@ const logger = require('logger-sharelatex') logger.log( { - backend: settings.filestore.backend + backend: settings.filestore.backend, + fallback: settings.filestore.fallback && settings.filestore.fallback.backend }, 'Loading backend' ) @@ -11,14 +12,26 @@ if (!settings.filestore.backend) { throw new Error('no backend specified - config incomplete') } -switch (settings.filestore.backend) { - case 'aws-sdk': - case 's3': - module.exports = require('./S3Persistor') - break - case 'fs': - module.exports = require('./FSPersistor') - break - default: - throw new Error(`unknown filestore backend: ${settings.filestore.backend}`) +function getPersistor(backend) { + switch (backend) { + case 'aws-sdk': + case 's3': + return require('./S3Persistor') + case 'fs': + return require('./FSPersistor') + default: + throw new Error(`unknown filestore backend: ${backend}`) + } } + +let persistor = getPersistor(settings.filestore.backend) + +if (settings.filestore.fallback && settings.filestore.fallback.backend) { + const migrationPersistor = require('./MigrationPersistor') + persistor = migrationPersistor( + persistor, + getPersistor(settings.filestore.fallback.backend) + ) +} + +module.exports = persistor diff --git a/services/filestore/app/js/S3Persistor.js b/services/filestore/app/js/S3Persistor.js index 52cadfbfbd..6d22823401 100644 --- a/services/filestore/app/js/S3Persistor.js +++ b/services/filestore/app/js/S3Persistor.js @@ -173,6 +173,7 @@ async function deleteFile(bucketName, key) { .deleteObject({ Bucket: bucketName, Key: key }) .promise() } catch (err) { + // s3 does not give us a NotFoundError here throw _wrapError( err, 'failed to delete file in S3', @@ -232,8 +233,12 @@ async function directorySize(bucketName, key) { } function _wrapError(error, message, params, ErrorType) { + // the AWS client can return one of 'NoSuchKey', 'NotFound' or 404 (integer) + // when something is not found, depending on the endpoint if ( - ['NoSuchKey', 'NotFound', 'AccessDenied', 'ENOENT'].includes(error.code) + ['NoSuchKey', 'NotFound', 404, 'AccessDenied', 'ENOENT'].includes( + error.code + ) ) { return new NotFoundError({ message: 'no such file', diff --git a/services/filestore/config/settings.defaults.coffee b/services/filestore/config/settings.defaults.coffee index 206f932a76..a4a2df2d24 100644 --- a/services/filestore/config/settings.defaults.coffee +++ b/services/filestore/config/settings.defaults.coffee @@ -7,6 +7,19 @@ if process.env['AWS_KEY'] && !process.env['AWS_ACCESS_KEY_ID'] if process.env['AWS_SECRET'] && !process.env['AWS_SECRET_ACCESS_KEY'] process.env['AWS_SECRET_ACCESS_KEY'] = process.env['AWS_SECRET'] +# pre-backend setting, fall back to old behaviour +unless process.env['BACKEND']? + if process.env['AWS_ACCESS_KEY_ID']? or process.env['S3_BUCKET_CREDENTIALS']? + process.env['BACKEND'] = "s3" + process.env['USER_FILES_BUCKET_NAME'] = process.env['AWS_S3_USER_FILES_BUCKET_NAME'] + process.env['TEMPLATE_FILES_BUCKET_NAME'] = process.env['AWS_S3_TEMPLATE_FILES_BUCKET_NAME'] + process.env['PUBLIC_FILES_BUCKET_NAME'] = process.env['AWS_S3_PUBLIC_FILES_BUCKET_NAME'] + else + process.env['BACKEND'] = "fs" + process.env['USER_FILES_BUCKET_NAME'] = Path.resolve(__dirname + "/../user_files") + process.env['TEMPLATE_FILES_BUCKET_NAME'] = Path.resolve(__dirname + "/../public_files") + process.env['PUBLIC_FILES_BUCKET_NAME'] = Path.resolve(__dirname + "/../template_files") + settings = internal: filestore: @@ -18,38 +31,28 @@ settings = # Choices are # s3 - Amazon S3 # fs - local filesystem - if process.env['AWS_ACCESS_KEY_ID']? or process.env['S3_BUCKET_CREDENTIALS']? - backend: "s3" - s3: + backend: process.env['BACKEND'] + + s3: + if process.env['AWS_ACCESS_KEY_ID']? or process.env['S3_BUCKET_CREDENTIALS']? key: process.env['AWS_ACCESS_KEY_ID'] secret: process.env['AWS_SECRET_ACCESS_KEY'] endpoint: process.env['AWS_S3_ENDPOINT'] - stores: - user_files: process.env['AWS_S3_USER_FILES_BUCKET_NAME'] - template_files: process.env['AWS_S3_TEMPLATE_FILES_BUCKET_NAME'] - public_files: process.env['AWS_S3_PUBLIC_FILES_BUCKET_NAME'] - # if you are using S3, then fill in your S3 details below, - # or use env var with the same structure. - # s3: - # key: "" # default - # secret: "" # default - # - # s3BucketCreds: - # bucketname1: # secrets for bucketname1 - # auth_key: "" - # auth_secret: "" - # bucketname2: # secrets for bucketname2... - s3BucketCreds: JSON.parse process.env['S3_BUCKET_CREDENTIALS'] if process.env['S3_BUCKET_CREDENTIALS']? - else - backend: "fs" - stores: - # - # For local filesystem this is the directory to store the files in. - # Must contain full path, e.g. "/var/lib/sharelatex/data". - # This path must exist, not be tmpfs and be writable to by the user sharelatex is run as. - user_files: Path.resolve(__dirname + "/../user_files") - public_files: Path.resolve(__dirname + "/../public_files") - template_files: Path.resolve(__dirname + "/../template_files") + + stores: + user_files: process.env['USER_FILES_BUCKET_NAME'] + template_files: process.env['TEMPLATE_FILES_BUCKET_NAME'] + public_files: process.env['PUBLIC_FILES_BUCKET_NAME'] + + s3BucketCreds: JSON.parse process.env['S3_BUCKET_CREDENTIALS'] if process.env['S3_BUCKET_CREDENTIALS']? + + fallback: + if process.env['FALLBACK_BACKEND']? + backend: process.env['FALLBACK_BACKEND'] + # mapping of bucket names on the fallback, to bucket names on the primary. + # e.g. { myS3UserFilesBucketName: 'myGoogleUserFilesBucketName' } + buckets: JSON.parse process.env['FALLBACK_BUCKET_MAPPING'] if process.env['FALLBACK_BUCKET_MAPPING']? + copyOnMiss: if process.env['COPY_ON_MISS'] == 'true' then true else false path: uploadFolder: Path.resolve(__dirname + "/../uploads") diff --git a/services/filestore/npm-shrinkwrap.json b/services/filestore/npm-shrinkwrap.json index 8d78271caa..b343d6ad2c 100644 --- a/services/filestore/npm-shrinkwrap.json +++ b/services/filestore/npm-shrinkwrap.json @@ -5055,6 +5055,12 @@ "resolved": "https://registry.npmjs.org/stream-shift/-/stream-shift-1.0.0.tgz", "integrity": "sha1-1cdSgl5TZ+eG944Y5EXqIjoVWVI=" }, + "streamifier": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/streamifier/-/streamifier-0.1.1.tgz", + "integrity": "sha1-l+mNj6TRBdYqJpHR3AfoINuN/E8=", + "dev": true + }, "string-width": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/string-width/-/string-width-2.1.1.tgz", diff --git a/services/filestore/package.json b/services/filestore/package.json index 14e35cd8a2..303393bd56 100644 --- a/services/filestore/package.json +++ b/services/filestore/package.json @@ -68,6 +68,7 @@ "prettier-eslint-cli": "^5.0.0", "sandboxed-module": "2.0.3", "sinon": "7.1.1", - "sinon-chai": "^3.3.0" + "sinon-chai": "^3.3.0", + "streamifier": "^0.1.1" } } diff --git a/services/filestore/test/acceptance/js/FilestoreApp.js b/services/filestore/test/acceptance/js/FilestoreApp.js index 718d53bcf8..20564e2d40 100644 --- a/services/filestore/test/acceptance/js/FilestoreApp.js +++ b/services/filestore/test/acceptance/js/FilestoreApp.js @@ -56,6 +56,7 @@ class FilestoreApp { } this.initing = false + this.persistor = require('../../../app/js/PersistorManager') } async waitForInit() { diff --git a/services/filestore/test/acceptance/js/FilestoreTests.js b/services/filestore/test/acceptance/js/FilestoreTests.js index d7dfbce57c..5a0de3abd8 100644 --- a/services/filestore/test/acceptance/js/FilestoreTests.js +++ b/services/filestore/test/acceptance/js/FilestoreTests.js @@ -11,6 +11,7 @@ const S3 = require('aws-sdk/clients/s3') const Stream = require('stream') const request = require('request') const { promisify } = require('util') +const streamifier = require('streamifier') chai.use(require('chai-as-promised')) const fsWriteFile = promisify(fs.writeFile) @@ -25,6 +26,19 @@ async function getMetric(filestoreUrl, metric) { return parseInt(found ? found[1] : 0) || 0 } +if (!process.env.AWS_ACCESS_KEY_ID) { + throw new Error('please provide credentials for the AWS S3 test server') +} + +function streamToString(stream) { + const chunks = [] + return new Promise((resolve, reject) => { + stream.on('data', chunk => chunks.push(chunk)) + stream.on('error', reject) + stream.on('end', () => resolve(Buffer.concat(chunks).toString('utf8'))) + }) +} + // store settings for multiple backends, so that we can test each one. // fs will always be available - add others if they are configured const BackendSettings = { @@ -35,11 +49,8 @@ const BackendSettings = { public_files: Path.resolve(__dirname, '../../../public_files'), template_files: Path.resolve(__dirname, '../../../template_files') } - } -} - -if (process.env.AWS_ACCESS_KEY_ID) { - BackendSettings.S3Persistor = { + }, + S3Persistor: { backend: 's3', s3: { key: process.env.AWS_ACCESS_KEY_ID, @@ -52,6 +63,62 @@ if (process.env.AWS_ACCESS_KEY_ID) { template_files: process.env.AWS_S3_TEMPLATE_FILES_BUCKET_NAME, public_files: process.env.AWS_S3_PUBLIC_FILES_BUCKET_NAME } + }, + FallbackS3ToFSPersistor: { + backend: 's3', + s3: { + key: process.env.AWS_ACCESS_KEY_ID, + secret: process.env.AWS_SECRET_ACCESS_KEY, + endpoint: process.env.AWS_S3_ENDPOINT, + pathStyle: true + }, + stores: { + user_files: process.env.AWS_S3_USER_FILES_BUCKET_NAME, + template_files: process.env.AWS_S3_TEMPLATE_FILES_BUCKET_NAME, + public_files: process.env.AWS_S3_PUBLIC_FILES_BUCKET_NAME + }, + fallback: { + backend: 'fs', + buckets: { + [process.env.AWS_S3_USER_FILES_BUCKET_NAME]: Path.resolve( + __dirname, + '../../../user_files' + ), + [process.env.AWS_S3_TEMPLATE_FILES_BUCKET_NAME]: Path.resolve( + __dirname, + '../../../public_files' + ), + [process.env.AWS_S3_PUBLIC_FILES_BUCKET_NAME]: Path.resolve( + __dirname, + '../../../template_files' + ) + } + } + }, + FallbackFSToS3Persistor: { + backend: 'fs', + s3: { + key: process.env.AWS_ACCESS_KEY_ID, + secret: process.env.AWS_SECRET_ACCESS_KEY, + endpoint: process.env.AWS_S3_ENDPOINT, + pathStyle: true + }, + stores: { + user_files: Path.resolve(__dirname, '../../../user_files'), + public_files: Path.resolve(__dirname, '../../../public_files'), + template_files: Path.resolve(__dirname, '../../../template_files') + }, + fallback: { + backend: 's3', + buckets: { + [Path.resolve(__dirname, '../../../user_files')]: process.env + .AWS_S3_USER_FILES_BUCKET_NAME, + [Path.resolve(__dirname, '../../../public_files')]: process.env + .AWS_S3_TEMPLATE_FILES_BUCKET_NAME, + [Path.resolve(__dirname, '../../../template_files')]: process.env + .AWS_S3_PUBLIC_FILES_BUCKET_NAME + } + } } } @@ -100,23 +167,21 @@ describe('Filestore', function() { }) describe('with a file on the server', function() { - let fileId, fileUrl + let fileId, fileUrl, constantFileContent const localFileReadPath = '/tmp/filestore_acceptance_tests_file_read.txt' - const constantFileContent = [ - 'hello world', - `line 2 goes here ${Math.random()}`, - 'there are 3 lines in all' - ].join('\n') - - before(async function() { - await fsWriteFile(localFileReadPath, constantFileContent) - }) beforeEach(async function() { fileId = Math.random() fileUrl = `${filestoreUrl}/project/acceptance_tests/file/${directoryName}%2F${fileId}` + constantFileContent = [ + 'hello world', + `line 2 goes here ${Math.random()}`, + 'there are 3 lines in all' + ].join('\n') + + await fsWriteFile(localFileReadPath, constantFileContent) const writeStream = request.post(fileUrl) const readStream = fs.createReadStream(localFileReadPath) @@ -177,7 +242,7 @@ describe('Filestore', function() { }) it('should be able to copy files', async function() { - const newProjectID = 'acceptance_tests_copyied_project' + const newProjectID = 'acceptance_tests_copied_project' const newFileId = Math.random() const newFileUrl = `${filestoreUrl}/project/${newProjectID}/file/${directoryName}%2F${newFileId}` const opts = { @@ -198,6 +263,18 @@ describe('Filestore', function() { expect(response.body).to.equal(constantFileContent) }) + it('should be able to overwrite the file', async function() { + const newContent = `here is some different content, ${Math.random()}` + const writeStream = request.post(fileUrl) + const readStream = streamifier.createReadStream(newContent) + // hack to consume the result to ensure the http request has been fully processed + const resultStream = fs.createWriteStream('/dev/null') + await pipeline(readStream, writeStream, resultStream) + + const response = await rp.get(fileUrl) + expect(response.body).to.equal(newContent) + }) + if (backend === 'S3Persistor') { it('should record an egress metric for the upload', async function() { const metric = await getMetric(filestoreUrl, 's3_egress') @@ -292,10 +369,10 @@ describe('Filestore', function() { if (backend === 'S3Persistor') { describe('with a file in a specific bucket', function() { - let constantFileContents, fileId, fileUrl, bucketName + let constantFileContent, fileId, fileUrl, bucketName beforeEach(async function() { - constantFileContents = `This is a file in a different S3 bucket ${Math.random()}` + constantFileContent = `This is a file in a different S3 bucket ${Math.random()}` fileId = Math.random().toString() bucketName = Math.random().toString() fileUrl = `${filestoreUrl}/bucket/${bucketName}/key/${fileId}` @@ -320,14 +397,302 @@ describe('Filestore', function() { .upload({ Bucket: bucketName, Key: fileId, - Body: constantFileContents + Body: constantFileContent }) .promise() }) it('should get the file from the specified bucket', async function() { const response = await rp.get(fileUrl) - expect(response.body).to.equal(constantFileContents) + expect(response.body).to.equal(constantFileContent) + }) + }) + } + + if (BackendSettings[backend].fallback) { + describe('with a fallback', function() { + async function uploadStringToPersistor( + persistor, + bucket, + key, + content + ) { + const fileStream = streamifier.createReadStream(content) + await persistor.promises.sendStream(bucket, key, fileStream) + } + + async function getStringFromPersistor(persistor, bucket, key) { + const stream = await persistor.promises.getFileStream( + bucket, + key, + {} + ) + return streamToString(stream) + } + + async function expectPersistorToHaveFile( + persistor, + bucket, + key, + content + ) { + const foundContent = await getStringFromPersistor( + persistor, + bucket, + key + ) + expect(foundContent).to.equal(content) + } + + async function expectPersistorNotToHaveFile(persistor, bucket, key) { + await expect( + getStringFromPersistor(persistor, bucket, key) + ).to.eventually.have.been.rejected.with.property( + 'name', + 'NotFoundError' + ) + } + + let constantFileContent, + fileId, + fileKey, + fileUrl, + bucket, + fallbackBucket + const projectId = 'acceptance_tests' + + beforeEach(function() { + constantFileContent = `This is yet more file content ${Math.random()}` + fileId = Math.random().toString() + fileKey = `${projectId}/${directoryName}/${fileId}` + fileUrl = `${filestoreUrl}/project/${projectId}/file/${directoryName}%2F${fileId}` + + bucket = Settings.filestore.stores.user_files + fallbackBucket = Settings.filestore.fallback.buckets[bucket] + }) + + describe('with a file in the fallback bucket', function() { + beforeEach(async function() { + await uploadStringToPersistor( + app.persistor.fallbackPersistor, + fallbackBucket, + fileKey, + constantFileContent + ) + }) + + it('should not find file in the primary', async function() { + await expectPersistorNotToHaveFile( + app.persistor.primaryPersistor, + bucket, + fileKey + ) + }) + + it('should find the file in the fallback', async function() { + await expectPersistorToHaveFile( + app.persistor.fallbackPersistor, + fallbackBucket, + fileKey, + constantFileContent + ) + }) + + it('should fetch the file', async function() { + const res = await rp.get(fileUrl) + expect(res.body).to.equal(constantFileContent) + }) + + it('should not copy the file to the primary', async function() { + await rp.get(fileUrl) + + await expectPersistorNotToHaveFile( + app.persistor.primaryPersistor, + bucket, + fileKey + ) + }) + + describe('when copyOnMiss is enabled', function() { + beforeEach(function() { + Settings.filestore.fallback.copyOnMiss = true + }) + + it('copies the file to the primary', async function() { + await rp.get(fileUrl) + // wait for the file to copy in the background + await promisify(setTimeout)(1000) + + await expectPersistorToHaveFile( + app.persistor.primaryPersistor, + bucket, + fileKey, + constantFileContent + ) + }) + }) + + describe('when copying a file', function() { + let newFileId, newFileUrl, newFileKey + const newProjectID = 'acceptance_tests_copied_project' + + beforeEach(async function() { + newFileId = Math.random() + newFileUrl = `${filestoreUrl}/project/${newProjectID}/file/${directoryName}%2F${newFileId}` + newFileKey = `${newProjectID}/${directoryName}/${newFileId}` + + const opts = { + method: 'put', + uri: newFileUrl, + json: { + source: { + project_id: 'acceptance_tests', + file_id: `${directoryName}/${fileId}` + } + } + } + + const response = await rp(opts) + expect(response.statusCode).to.equal(200) + }) + + it('should leave the old file in the old bucket', async function() { + await expectPersistorToHaveFile( + app.persistor.fallbackPersistor, + fallbackBucket, + fileKey, + constantFileContent + ) + }) + + it('should not create a new file in the old bucket', async function() { + await expectPersistorNotToHaveFile( + app.persistor.fallbackPersistor, + fallbackBucket, + newFileKey + ) + }) + + it('should not copy the old file to the new bucket', async function() { + await expectPersistorNotToHaveFile( + app.persistor.primaryPersistor, + bucket, + fileKey + ) + }) + + it('should create a new file in the new bucket', async function() { + await expectPersistorToHaveFile( + app.persistor.primaryPersistor, + bucket, + newFileKey, + constantFileContent + ) + }) + }) + }) + + describe('when sending a file', function() { + beforeEach(async function() { + const writeStream = request.post(fileUrl) + const readStream = streamifier.createReadStream( + constantFileContent + ) + // hack to consume the result to ensure the http request has been fully processed + const resultStream = fs.createWriteStream('/dev/null') + await pipeline(readStream, writeStream, resultStream) + }) + + it('should store the file on the primary', async function() { + await expectPersistorToHaveFile( + app.persistor.primaryPersistor, + bucket, + fileKey, + constantFileContent + ) + }) + + it('should not store the file on the fallback', async function() { + await expectPersistorNotToHaveFile( + app.persistor.fallbackPersistor, + fallbackBucket, + `acceptance_tests/${directoryName}/${fileId}` + ) + }) + }) + + describe('when deleting a file', function() { + describe('when the file exists on the primary', function() { + beforeEach(async function() { + await uploadStringToPersistor( + app.persistor.primaryPersistor, + bucket, + fileKey, + constantFileContent + ) + }) + + it('should delete the file', async function() { + const response = await rp.del(fileUrl) + expect(response.statusCode).to.equal(204) + await expect( + rp.get(fileUrl) + ).to.eventually.be.rejected.and.have.property('statusCode', 404) + }) + }) + + describe('when the file exists on the fallback', function() { + beforeEach(async function() { + await uploadStringToPersistor( + app.persistor.fallbackPersistor, + fallbackBucket, + fileKey, + constantFileContent + ) + }) + + it('should delete the file', async function() { + const response = await rp.del(fileUrl) + expect(response.statusCode).to.equal(204) + await expect( + rp.get(fileUrl) + ).to.eventually.be.rejected.and.have.property('statusCode', 404) + }) + }) + + describe('when the file exists on both the primary and the fallback', function() { + beforeEach(async function() { + await uploadStringToPersistor( + app.persistor.primaryPersistor, + bucket, + fileKey, + constantFileContent + ) + await uploadStringToPersistor( + app.persistor.fallbackPersistor, + fallbackBucket, + fileKey, + constantFileContent + ) + }) + + it('should delete the files', async function() { + const response = await rp.del(fileUrl) + expect(response.statusCode).to.equal(204) + await expect( + rp.get(fileUrl) + ).to.eventually.be.rejected.and.have.property('statusCode', 404) + }) + }) + + describe('when the file does not exist', function() { + it('should return return 204', async function() { + // S3 doesn't give us a 404 when the object doesn't exist, so to stay + // consistent we merrily return 204 ourselves here as well + const response = await rp.del(fileUrl) + expect(response.statusCode).to.equal(204) + }) + }) }) }) } diff --git a/services/filestore/test/unit/js/MigrationPersistorTests.js b/services/filestore/test/unit/js/MigrationPersistorTests.js new file mode 100644 index 0000000000..1cc8324d46 --- /dev/null +++ b/services/filestore/test/unit/js/MigrationPersistorTests.js @@ -0,0 +1,463 @@ +const sinon = require('sinon') +const chai = require('chai') +const { expect } = chai +const modulePath = '../../../app/js/MigrationPersistor.js' +const SandboxedModule = require('sandboxed-module') + +const Errors = require('../../../app/js/Errors') + +// Not all methods are tested here, but a method with each type of wrapping has +// tests. Specifically, the following wrapping methods are tested here: +// getFileStream: _wrapFallbackMethod +// sendStream: forward-to-primary +// deleteFile: _wrapMethodOnBothPersistors +// copyFile: copyFileWithFallback + +describe('MigrationPersistorTests', function() { + const bucket = 'womBucket' + const fallbackBucket = 'bucKangaroo' + const key = 'monKey' + const destKey = 'donKey' + const genericError = new Error('guru meditation error') + const notFoundError = new Errors.NotFoundError('not found') + const size = 33 + const fileStream = 'fileStream' + + function newPersistor(hasFile) { + return { + promises: { + sendFile: sinon.stub().resolves(), + sendStream: sinon.stub().resolves(), + getFileStream: hasFile + ? sinon.stub().resolves(fileStream) + : sinon.stub().rejects(notFoundError), + deleteDirectory: sinon.stub().resolves(), + getFileSize: hasFile + ? sinon.stub().resolves(size) + : sinon.stub().rejects(notFoundError), + deleteFile: sinon.stub().resolves(), + copyFile: hasFile + ? sinon.stub().resolves() + : sinon.stub().rejects(notFoundError), + checkIfFileExists: sinon.stub().resolves(hasFile), + directorySize: hasFile + ? sinon.stub().resolves(size) + : sinon.stub().rejects(notFoundError) + } + } + } + + let Metrics, Settings, Logger, MigrationPersistor + + beforeEach(function() { + Settings = { + filestore: { + fallback: { + buckets: { + [bucket]: fallbackBucket + } + } + } + } + + Metrics = { + inc: sinon.stub() + } + + Logger = { + warn: sinon.stub() + } + + MigrationPersistor = SandboxedModule.require(modulePath, { + requires: { + 'settings-sharelatex': Settings, + './Errors': Errors, + 'metrics-sharelatex': Metrics, + 'logger-sharelatex': Logger + }, + globals: { console } + }) + }) + + describe('getFileStream', function() { + const options = { wombat: 'potato' } + describe('when the primary persistor has the file', function() { + let primaryPersistor, fallbackPersistor, migrationPersistor, response + beforeEach(async function() { + primaryPersistor = newPersistor(true) + fallbackPersistor = newPersistor(false) + migrationPersistor = MigrationPersistor( + primaryPersistor, + fallbackPersistor + ) + response = await migrationPersistor.promises.getFileStream( + bucket, + key, + options + ) + }) + + it('should return the file stream', function() { + expect(response).to.equal(fileStream) + }) + + it('should fetch the file from the primary persistor, with the correct options', function() { + expect( + primaryPersistor.promises.getFileStream + ).to.have.been.calledWithExactly(bucket, key, options) + }) + + it('should not query the fallback persistor', function() { + expect(fallbackPersistor.promises.getFileStream).not.to.have.been.called + }) + }) + + describe('when the fallback persistor has the file', function() { + let primaryPersistor, fallbackPersistor, migrationPersistor, response + beforeEach(async function() { + primaryPersistor = newPersistor(false) + fallbackPersistor = newPersistor(true) + migrationPersistor = MigrationPersistor( + primaryPersistor, + fallbackPersistor + ) + response = await migrationPersistor.promises.getFileStream( + bucket, + key, + options + ) + }) + + it('should return the file stream', function() { + expect(response).to.equal(fileStream) + }) + + it('should fetch the file from the primary persistor with the correct options', function() { + expect( + primaryPersistor.promises.getFileStream + ).to.have.been.calledWithExactly(bucket, key, options) + }) + + it('should fetch the file from the fallback persistor with the fallback bucket with the correct options', function() { + expect( + fallbackPersistor.promises.getFileStream + ).to.have.been.calledWithExactly(fallbackBucket, key, options) + }) + + it('should only create one stream', function() { + expect(fallbackPersistor.promises.getFileStream).to.have.been.calledOnce + }) + + it('should not send the file to the primary', function() { + expect(primaryPersistor.promises.sendStream).not.to.have.been.called + }) + }) + + describe('when the file should be copied to the primary', function() { + let primaryPersistor, fallbackPersistor, migrationPersistor + beforeEach(async function() { + primaryPersistor = newPersistor(false) + fallbackPersistor = newPersistor(true) + migrationPersistor = MigrationPersistor( + primaryPersistor, + fallbackPersistor + ) + Settings.filestore.fallback.copyOnMiss = true + return migrationPersistor.promises.getFileStream(bucket, key, options) + }) + + it('should create two streams', function() { + expect(fallbackPersistor.promises.getFileStream).to.have.been + .calledTwice + }) + + it('should send one of the streams to the primary', function() { + expect( + primaryPersistor.promises.sendStream + ).to.have.been.calledWithExactly(bucket, key, fileStream) + }) + }) + + describe('when neither persistor has the file', function() { + it('rejects with a NotFoundError', async function() { + const migrationPersistor = MigrationPersistor( + newPersistor(false), + newPersistor(false) + ) + return expect( + migrationPersistor.promises.getFileStream(bucket, key) + ).to.eventually.be.rejected.and.be.an.instanceOf(Errors.NotFoundError) + }) + }) + + describe('when the primary persistor throws an unexpected error', function() { + let primaryPersistor, fallbackPersistor, migrationPersistor, error + beforeEach(async function() { + primaryPersistor = newPersistor(false) + fallbackPersistor = newPersistor(true) + primaryPersistor.promises.getFileStream = sinon + .stub() + .rejects(genericError) + migrationPersistor = MigrationPersistor( + primaryPersistor, + fallbackPersistor + ) + try { + await migrationPersistor.promises.getFileStream(bucket, key, options) + } catch (err) { + error = err + } + }) + + it('rejects with the error', function() { + expect(error).to.equal(genericError) + }) + + it('does not call the fallback', function() { + expect(fallbackPersistor.promises.getFileStream).not.to.have.been.called + }) + }) + + describe('when the fallback persistor throws an unexpected error', function() { + let primaryPersistor, fallbackPersistor, migrationPersistor, error + beforeEach(async function() { + primaryPersistor = newPersistor(false) + fallbackPersistor = newPersistor(false) + fallbackPersistor.promises.getFileStream = sinon + .stub() + .rejects(genericError) + migrationPersistor = MigrationPersistor( + primaryPersistor, + fallbackPersistor + ) + try { + await migrationPersistor.promises.getFileStream(bucket, key, options) + } catch (err) { + error = err + } + }) + + it('rejects with the error', function() { + expect(error).to.equal(genericError) + }) + + it('should have called the fallback', function() { + expect( + fallbackPersistor.promises.getFileStream + ).to.have.been.calledWith(fallbackBucket, key) + }) + }) + }) + + describe('sendStream', function() { + let primaryPersistor, fallbackPersistor, migrationPersistor + beforeEach(function() { + primaryPersistor = newPersistor(false) + fallbackPersistor = newPersistor(false) + migrationPersistor = MigrationPersistor( + primaryPersistor, + fallbackPersistor + ) + }) + + describe('when it works', function() { + beforeEach(async function() { + return migrationPersistor.promises.sendStream(bucket, key, fileStream) + }) + + it('should send the file to the primary persistor', function() { + expect( + primaryPersistor.promises.sendStream + ).to.have.been.calledWithExactly(bucket, key, fileStream) + }) + + it('should not send the file to the fallback persistor', function() { + expect(fallbackPersistor.promises.sendStream).not.to.have.been.called + }) + }) + + describe('when the primary persistor throws an error', function() { + it('returns the error', async function() { + primaryPersistor.promises.sendStream.rejects(notFoundError) + return expect( + migrationPersistor.promises.sendStream(bucket, key, fileStream) + ).to.eventually.be.rejected.and.be.an.instanceOf(Errors.NotFoundError) + }) + }) + }) + + describe('deleteFile', function() { + let primaryPersistor, fallbackPersistor, migrationPersistor + beforeEach(function() { + primaryPersistor = newPersistor(false) + fallbackPersistor = newPersistor(false) + migrationPersistor = MigrationPersistor( + primaryPersistor, + fallbackPersistor + ) + }) + + describe('when it works', function() { + beforeEach(async function() { + return migrationPersistor.promises.deleteFile(bucket, key) + }) + + it('should delete the file from the primary', function() { + expect( + primaryPersistor.promises.deleteFile + ).to.have.been.calledWithExactly(bucket, key) + }) + + it('should delete the file from the fallback', function() { + expect( + fallbackPersistor.promises.deleteFile + ).to.have.been.calledWithExactly(fallbackBucket, key) + }) + }) + + describe('when the primary persistor throws an error', function() { + let error + beforeEach(async function() { + primaryPersistor.promises.deleteFile.rejects(genericError) + try { + await migrationPersistor.promises.deleteFile(bucket, key) + } catch (err) { + error = err + } + }) + + it('should return the error', function() { + expect(error).to.equal(genericError) + }) + + it('should delete the file from the primary', function() { + expect( + primaryPersistor.promises.deleteFile + ).to.have.been.calledWithExactly(bucket, key) + }) + + it('should delete the file from the fallback', function() { + expect( + fallbackPersistor.promises.deleteFile + ).to.have.been.calledWithExactly(fallbackBucket, key) + }) + }) + + describe('when the fallback persistor throws an error', function() { + let error + beforeEach(async function() { + fallbackPersistor.promises.deleteFile.rejects(genericError) + try { + await migrationPersistor.promises.deleteFile(bucket, key) + } catch (err) { + error = err + } + }) + + it('should return the error', function() { + expect(error).to.equal(genericError) + }) + + it('should delete the file from the primary', function() { + expect( + primaryPersistor.promises.deleteFile + ).to.have.been.calledWithExactly(bucket, key) + }) + + it('should delete the file from the fallback', function() { + expect( + fallbackPersistor.promises.deleteFile + ).to.have.been.calledWithExactly(fallbackBucket, key) + }) + }) + }) + + describe('copyFile', function() { + describe('when the file exists on the primary', function() { + let primaryPersistor, fallbackPersistor, migrationPersistor + beforeEach(async function() { + primaryPersistor = newPersistor(true) + fallbackPersistor = newPersistor(false) + migrationPersistor = MigrationPersistor( + primaryPersistor, + fallbackPersistor + ) + return migrationPersistor.promises.copyFile(bucket, key, destKey) + }) + + it('should call copyFile to copy the file', function() { + expect( + primaryPersistor.promises.copyFile + ).to.have.been.calledWithExactly(bucket, key, destKey) + }) + + it('should not try to read from the fallback', function() { + expect(fallbackPersistor.promises.getFileStream).not.to.have.been.called + }) + }) + + describe('when the file does not exist on the primary', function() { + let primaryPersistor, fallbackPersistor, migrationPersistor + beforeEach(async function() { + primaryPersistor = newPersistor(false) + fallbackPersistor = newPersistor(true) + migrationPersistor = MigrationPersistor( + primaryPersistor, + fallbackPersistor + ) + return migrationPersistor.promises.copyFile(bucket, key, destKey) + }) + + it('should call copyFile to copy the file', function() { + expect( + primaryPersistor.promises.copyFile + ).to.have.been.calledWithExactly(bucket, key, destKey) + }) + + it('should fetch the file from the fallback', function() { + expect( + fallbackPersistor.promises.getFileStream + ).not.to.have.been.calledWithExactly(fallbackBucket, key) + }) + + it('should send the file to the primary', function() { + expect( + primaryPersistor.promises.sendStream + ).to.have.been.calledWithExactly(bucket, destKey, fileStream) + }) + }) + + describe('when the file does not exist on the fallback', function() { + let primaryPersistor, fallbackPersistor, migrationPersistor, error + beforeEach(async function() { + primaryPersistor = newPersistor(false) + fallbackPersistor = newPersistor(false) + migrationPersistor = MigrationPersistor( + primaryPersistor, + fallbackPersistor + ) + try { + await migrationPersistor.promises.copyFile(bucket, key, destKey) + } catch (err) { + error = err + } + }) + + it('should call copyFile to copy the file', function() { + expect( + primaryPersistor.promises.copyFile + ).to.have.been.calledWithExactly(bucket, key, destKey) + }) + + it('should fetch the file from the fallback', function() { + expect( + fallbackPersistor.promises.getFileStream + ).not.to.have.been.calledWithExactly(fallbackBucket, key) + }) + + it('should return a not-found error', function() { + expect(error).to.be.an.instanceOf(Errors.NotFoundError) + }) + }) + }) +})