diff --git a/services/docstore/app.js b/services/docstore/app.js index 7cb774b9f8..75154471be 100644 --- a/services/docstore/app.js +++ b/services/docstore/app.js @@ -54,6 +54,7 @@ app.get('/project/:project_id/ranges', HttpController.getAllRanges) app.get('/project/:project_id/doc/:doc_id', HttpController.getDoc) app.get('/project/:project_id/doc/:doc_id/deleted', HttpController.isDocDeleted) app.get('/project/:project_id/doc/:doc_id/raw', HttpController.getRawDoc) +app.get('/project/:project_id/doc/:doc_id/peek', HttpController.peekDoc) // Add 64kb overhead for the JSON encoding, and double the size to allow for ranges in the json payload app.post( '/project/:project_id/doc/:doc_id', @@ -90,6 +91,8 @@ app.use(function (error, req, res, next) { logger.error({ err: error, req }, 'request errored') if (error instanceof Errors.NotFoundError) { return res.sendStatus(404) + } else if (error instanceof Errors.DocModifiedError) { + return res.sendStatus(409) } else { return res.status(500).send('Oops, something went wrong') } diff --git a/services/docstore/app/js/DocArchiveManager.js b/services/docstore/app/js/DocArchiveManager.js index dcd713eb62..edd920833d 100644 --- a/services/docstore/app/js/DocArchiveManager.js +++ b/services/docstore/app/js/DocArchiveManager.js @@ -23,6 +23,7 @@ module.exports = { unarchiveDoc: callbackify(unarchiveDoc), destroyAllDocs: callbackify(destroyAllDocs), destroyDoc: callbackify(destroyDoc), + getDoc: callbackify(getDoc), promises: { archiveAllDocs, archiveDocById, @@ -31,6 +32,7 @@ module.exports = { unarchiveDoc, destroyAllDocs, destroyDoc, + getDoc, }, } @@ -125,38 +127,17 @@ async function unArchiveAllDocs(projectId) { } } -async function unarchiveDoc(projectId, docId) { - logger.log( - { project_id: projectId, doc_id: docId }, - 'getting doc from persistor' - ) - const originalDoc = await MongoManager.findDoc(projectId, docId, { inS3: 1 }) - if (!originalDoc.inS3) { - // return if it's not actually in S3 as there's nothing to do - return - } +// get the doc from the PersistorManager without storing it in mongo +async function getDoc(projectId, docId) { const key = `${projectId}/${docId}` - let stream, sourceMd5 - try { - sourceMd5 = await PersistorManager.getObjectMd5Hash( - settings.docstore.bucket, - key - ) - stream = await PersistorManager.getObjectStream( - settings.docstore.bucket, - key - ) - } catch (err) { - // if we get a 404, we could be in a race and something else has unarchived the doc already - if (err instanceof Errors.NotFoundError) { - const doc = await MongoManager.findDoc(projectId, docId, { inS3: 1 }) - if (!doc.inS3) { - // the doc has been archived while we were looking for it, so no error - return - } - } - throw err - } + const sourceMd5 = await PersistorManager.getObjectMd5Hash( + settings.docstore.bucket, + key + ) + const stream = await PersistorManager.getObjectStream( + settings.docstore.bucket, + key + ) stream.resume() const json = await _streamToString(stream) const md5 = crypto.createHash('md5').update(json).digest('hex') @@ -181,6 +162,36 @@ async function unarchiveDoc(projectId, docId) { } else { throw new Error("I don't understand the doc format in s3") } + + return mongoDoc +} + +// get the doc and unarchive it to mongo +async function unarchiveDoc(projectId, docId) { + logger.log( + { project_id: projectId, doc_id: docId }, + 'getting doc from persistor' + ) + const key = `${projectId}/${docId}` + const originalDoc = await MongoManager.findDoc(projectId, docId, { inS3: 1 }) + if (!originalDoc.inS3) { + // return if it's not actually in S3 as there's nothing to do + return + } + let mongoDoc + try { + mongoDoc = await getDoc(projectId, docId) + } catch (err) { + // if we get a 404, we could be in a race and something else has unarchived the doc already + if (err instanceof Errors.NotFoundError) { + const doc = await MongoManager.findDoc(projectId, docId, { inS3: 1 }) + if (!doc.inS3) { + // the doc has been archived while we were looking for it, so no error + return + } + } + throw err + } await MongoManager.upsertIntoDocCollection(projectId, docId, mongoDoc) await PersistorManager.deleteObject(settings.docstore.bucket, key) } diff --git a/services/docstore/app/js/DocManager.js b/services/docstore/app/js/DocManager.js index 51a974d5a7..51e3beaf27 100644 --- a/services/docstore/app/js/DocManager.js +++ b/services/docstore/app/js/DocManager.js @@ -124,6 +124,70 @@ module.exports = DocManager = { ) }, + // returns the doc without any version information + _peekRawDoc(project_id, doc_id, callback) { + MongoManager.findDoc( + project_id, + doc_id, + { + lines: true, + rev: true, + deleted: true, + version: true, + ranges: true, + inS3: true, + }, + (err, doc) => { + if (err) return callback(err) + if (doc == null) { + return callback( + new Errors.NotFoundError( + `No such doc: ${doc_id} in project ${project_id}` + ) + ) + } + if (doc && !doc.inS3) { + return callback(null, doc) + } + // skip the unarchiving to mongo when getting a doc + DocArchive.getDoc(project_id, doc_id, function (err, archivedDoc) { + if (err != null) { + logger.err( + { err, project_id, doc_id }, + 'error getting doc from archive' + ) + return callback(err) + } + doc = _.extend(doc, archivedDoc) + callback(null, doc) + }) + } + ) + }, + + // get the doc from mongo if possible, or from the persistent store otherwise, + // without unarchiving it (avoids unnecessary writes to mongo) + peekDoc(project_id, doc_id, callback) { + DocManager._peekRawDoc(project_id, doc_id, (err, doc) => { + if (err) { + return callback(err) + } + MongoManager.withRevCheck( + doc, + MongoManager.getDocVersion, + function (error, version) { + // If the doc has been modified while we were retrieving it, we + // will get a DocModified error + if (error != null) { + return callback(error) + } + doc.version = version + return callback(err, doc) + } + ) + }) + }, + getDocLines(project_id, doc_id, callback) { if (callback == null) { callback = function (err, doc) {} diff --git a/services/docstore/app/js/Errors.js b/services/docstore/app/js/Errors.js index 4eaa5481d3..3cf5ad74a4 100644 --- a/services/docstore/app/js/Errors.js +++ b/services/docstore/app/js/Errors.js @@ -4,7 +4,10 @@ const { Errors } = require('@overleaf/object-persistor') class Md5MismatchError extends OError {} +class DocModifiedError extends OError {} + module.exports = { Md5MismatchError, + DocModifiedError, ...Errors, } diff --git a/services/docstore/app/js/HttpController.js b/services/docstore/app/js/HttpController.js index f53c9068bc..7e901c4357 100644 --- a/services/docstore/app/js/HttpController.js +++ b/services/docstore/app/js/HttpController.js @@ -44,6 +44,23 @@ module.exports = HttpController = { }) }, + peekDoc(req, res, next) { + const { project_id } = req.params + const { doc_id } = req.params + logger.log({ project_id, doc_id }, 'peeking doc') + DocManager.peekDoc(project_id, doc_id, function (error, doc) { + if (error) { + return next(error) + } + if (doc == null) { + return res.sendStatus(404) + } else { + res.setHeader('x-doc-status', doc.inS3 ? 'archived' : 'active') + return res.json(HttpController._buildDocView(doc)) + } + }) + }, + isDocDeleted(req, res, next) { const { doc_id: docId, project_id: projectId } = req.params DocManager.isDocDeleted(projectId, docId, function (error, deleted) { diff --git a/services/docstore/app/js/MongoManager.js b/services/docstore/app/js/MongoManager.js index 5434db581d..263a162516 100644 --- a/services/docstore/app/js/MongoManager.js +++ b/services/docstore/app/js/MongoManager.js @@ -15,6 +15,7 @@ const { db, ObjectId } = require('./mongodb') const logger = require('logger-sharelatex') const metrics = require('@overleaf/metrics') const Settings = require('@overleaf/settings') +const Errors = require('./Errors') const { promisify } = require('util') module.exports = MongoManager = { @@ -178,6 +179,45 @@ module.exports = MongoManager = { ) }, + getDocRev(doc_id, callback) { + db.docs.findOne( + { + _id: ObjectId(doc_id.toString()), + }, + { + projection: { rev: 1 }, + }, + function (err, doc) { + if (err != null) { + return callback(err) + } + callback(null, doc && doc.rev) + } + ) + }, + + // Helper method to support optimistic locking. Call the provided method for + // an existing doc and return the result if the rev in mongo is unchanged when + // checked afterwards. If the rev has changed, return a DocModifiedError. + withRevCheck(doc, method, callback) { + method(doc._id, function (err, result) { + if (err) return callback(err) + MongoManager.getDocRev(doc._id, function (err, currentRev) { + if (err) return callback(err) + if (doc.rev !== currentRev) { + return callback( + new Errors.DocModifiedError('doc rev has changed', { + doc_id: doc._id, + rev: doc.rev, + currentRev, + }) + ) + } + return callback(null, result) + }) + }) + }, + destroyDoc(doc_id, callback) { db.docs.deleteOne( { diff --git a/services/docstore/test/acceptance/js/ArchiveDocsTests.js b/services/docstore/test/acceptance/js/ArchiveDocsTests.js index 2dc02152d9..381e90fabd 100644 --- a/services/docstore/test/acceptance/js/ArchiveDocsTests.js +++ b/services/docstore/test/acceptance/js/ArchiveDocsTests.js @@ -12,7 +12,7 @@ * DS207: Consider shorter variations of null checks * Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md */ -process.env.BACKEND = 'gcs' + const Settings = require('@overleaf/settings') const { expect } = require('chai') const { db, ObjectId } = require('../../../app/js/mongodb') diff --git a/services/docstore/test/acceptance/js/GettingDocsFromArchiveTest.js b/services/docstore/test/acceptance/js/GettingDocsFromArchiveTest.js new file mode 100644 index 0000000000..4dddbb862d --- /dev/null +++ b/services/docstore/test/acceptance/js/GettingDocsFromArchiveTest.js @@ -0,0 +1,127 @@ +const Settings = require('@overleaf/settings') +const { ObjectId } = require('../../../app/js/mongodb') +const DocstoreApp = require('./helpers/DocstoreApp') +const DocstoreClient = require('./helpers/DocstoreClient') +const { Storage } = require('@google-cloud/storage') + +describe('Getting A Doc from Archive', function () { + before(function (done) { + return DocstoreApp.ensureRunning(done) + }) + + before(async function () { + const storage = new Storage(Settings.docstore.gcs.endpoint) + await storage.createBucket(Settings.docstore.bucket) + await storage.createBucket(`${Settings.docstore.bucket}-deleted`) + }) + + describe('for an archived doc', function () { + before(function (done) { + this.project_id = ObjectId() + this.timeout(1000 * 30) + this.doc = { + _id: ObjectId(), + lines: ['foo', 'bar'], + ranges: {}, + version: 2, + } + DocstoreClient.createDoc( + this.project_id, + this.doc._id, + this.doc.lines, + this.doc.version, + this.doc.ranges, + error => { + if (error) { + return done(error) + } + DocstoreClient.archiveDocById( + this.project_id, + this.doc._id, + (error, res) => { + this.res = res + if (error) { + return done(error) + } + done() + } + ) + } + ) + }) + + it('should successully archive the doc', function (done) { + this.res.statusCode.should.equal(204) + done() + }) + + it('should return the doc lines and version from persistent storage', function (done) { + return DocstoreClient.peekDoc( + this.project_id, + this.doc._id, + {}, + (error, res, doc) => { + res.statusCode.should.equal(200) + res.headers['x-doc-status'].should.equal('archived') + doc.lines.should.deep.equal(this.doc.lines) + doc.version.should.equal(this.doc.version) + doc.ranges.should.deep.equal(this.doc.ranges) + return done() + } + ) + }) + + it('should return the doc lines and version from persistent storage on subsequent requests', function (done) { + return DocstoreClient.peekDoc( + this.project_id, + this.doc._id, + {}, + (error, res, doc) => { + res.statusCode.should.equal(200) + res.headers['x-doc-status'].should.equal('archived') + doc.lines.should.deep.equal(this.doc.lines) + doc.version.should.equal(this.doc.version) + doc.ranges.should.deep.equal(this.doc.ranges) + return done() + } + ) + }) + + describe('for an non-archived doc', function () { + before(function (done) { + this.project_id = ObjectId() + this.timeout(1000 * 30) + this.doc = { + _id: ObjectId(), + lines: ['foo', 'bar'], + ranges: {}, + version: 2, + } + DocstoreClient.createDoc( + this.project_id, + this.doc._id, + this.doc.lines, + this.doc.version, + this.doc.ranges, + done + ) + }) + + it('should return the doc lines and version from mongo', function (done) { + return DocstoreClient.peekDoc( + this.project_id, + this.doc._id, + {}, + (error, res, doc) => { + res.statusCode.should.equal(200) + res.headers['x-doc-status'].should.equal('active') + doc.lines.should.deep.equal(this.doc.lines) + doc.version.should.equal(this.doc.version) + doc.ranges.should.deep.equal(this.doc.ranges) + return done() + } + ) + }) + }) + }) +}) diff --git a/services/docstore/test/acceptance/js/helpers/DocstoreClient.js b/services/docstore/test/acceptance/js/helpers/DocstoreClient.js index 1164540bd3..1af9f10ce7 100644 --- a/services/docstore/test/acceptance/js/helpers/DocstoreClient.js +++ b/services/docstore/test/acceptance/js/helpers/DocstoreClient.js @@ -60,6 +60,17 @@ module.exports = DocstoreClient = { ) }, + peekDoc(project_id, doc_id, qs, callback) { + request.get( + { + url: `http://localhost:${settings.internal.docstore.port}/project/${project_id}/doc/${doc_id}/peek`, + json: true, + qs, + }, + callback + ) + }, + isDocDeleted(project_id, doc_id, callback) { request.get( { diff --git a/services/docstore/test/setup.js b/services/docstore/test/setup.js index fbf29a495b..e45df5834f 100644 --- a/services/docstore/test/setup.js +++ b/services/docstore/test/setup.js @@ -4,6 +4,8 @@ const sinonChai = require('sinon-chai') const chaiAsPromised = require('chai-as-promised') const SandboxedModule = require('sandboxed-module') +process.env.BACKEND = 'gcs' + // Chai configuration chai.should() chai.use(sinonChai) diff --git a/services/docstore/test/unit/js/MongoManagerTests.js b/services/docstore/test/unit/js/MongoManagerTests.js index 50cc8268c3..5bfcb3c179 100644 --- a/services/docstore/test/unit/js/MongoManagerTests.js +++ b/services/docstore/test/unit/js/MongoManagerTests.js @@ -17,6 +17,7 @@ const modulePath = require('path').join( ) const { ObjectId } = require('mongodb') const { assert } = require('chai') +const Errors = require('../../../app/js/Errors') describe('MongoManager', function () { beforeEach(function () { @@ -28,6 +29,7 @@ describe('MongoManager', function () { }, '@overleaf/metrics': { timeAsyncMethod: sinon.stub() }, '@overleaf/settings': { max_deleted_docs: 42 }, + './Errors': Errors, }, }) this.project_id = ObjectId().toString() @@ -305,7 +307,7 @@ describe('MongoManager', function () { }) }) - return describe('setDocVersion', function () { + describe('setDocVersion', function () { beforeEach(function () { this.version = 42 this.db.docOps.updateOne = sinon.stub().callsArg(3) @@ -338,4 +340,36 @@ describe('MongoManager', function () { return this.callback.called.should.equal(true) }) }) + + describe('withRevCheck', function () { + this.beforeEach(function () { + this.doc = { _id: ObjectId(), name: 'mock-doc', rev: 1 } + this.testFunction = sinon.stub().yields(null, 'foo') + }) + + it('should call the callback when the rev has not changed', function (done) { + this.db.docs.findOne = sinon.stub().callsArgWith(2, null, { rev: 1 }) + this.MongoManager.withRevCheck( + this.doc, + this.testFunction, + (err, result) => { + result.should.equal('foo') + assert.isNull(err) + done() + } + ) + }) + + it('should return an error when the rev has changed', function (done) { + this.db.docs.findOne = sinon.stub().callsArgWith(2, null, { rev: 2 }) + this.MongoManager.withRevCheck( + this.doc, + this.testFunction, + (err, result) => { + err.should.be.instanceof(Errors.DocModifiedError) + done() + } + ) + }) + }) })